diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index f6cf749..4111f6b 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -27,7 +27,7 @@ jobs: - uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0 - - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.12' @@ -46,7 +46,7 @@ jobs: CI: true - name: Upload artifact - uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1 + uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0 with: path: docs-site/site @@ -59,4 +59,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5 + uses: actions/deploy-pages@cd2ce8fcbc39b97be8ca5fce6e763baed58fa128 # v5.0.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8fb9341..7d7ee6a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -411,7 +411,7 @@ jobs: provenance: name: Generate SLSA provenance - needs: [hash] + needs: [hash, release] permissions: actions: read id-token: write @@ -419,28 +419,8 @@ jobs: uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@f7dd8c54c2067bafc12ca7a55595d5ee9b75204a # v2.1.0 with: base64-subjects: "${{ needs.hash.outputs.hashes }}" - upload-assets: false - - upload-provenance: - name: Upload provenance to release - needs: [provenance, release] - runs-on: ubuntu-latest - permissions: - contents: write - steps: - - name: Download provenance artifact - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - with: - name: ${{ needs.provenance.outputs.provenance-name }} - - name: Upload to release - env: - GH_TOKEN: ${{ github.token }} - TAG: ${{ needs.release.outputs.tag }} - PROVENANCE_FILE: ${{ needs.provenance.outputs.provenance-name }} - run: | - gh release upload "${TAG}" "${PROVENANCE_FILE}" \ - --repo "${{ github.repository }}" \ - --clobber + upload-assets: true + upload-tag-name: "${{ needs.release.outputs.tag }}" # ──────────────── Publish Docker image ──────────────── publish-docker: diff --git a/.gitignore b/.gitignore index 1fa6b8b..1f27a8f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ *.json !webserver/static/sample-report.json !docs/access-map-viewer/sample-report.json +!testdata/parsers/context_verifier_golden.json +!testdata/parsers/scan_findings_baseline.json !testdata/parsers/tree_sitter_capture_baseline.json *.jsonl *.bson @@ -17,7 +19,10 @@ logs/* *.orig *.rej *.html +!testdata/html_vulnerable.html +!testdata/html_embedded_vulnerable.html !docs/access-map-viewer/index.html +!docs-site/overrides/*.html *.dot fuzz/* !fuzz/Cargo.toml diff --git a/AGENTS.md b/AGENTS.md index 0f6d292..4d39dbc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,16 +22,16 @@ Key capabilities: ## Repository Structure - `src/`: main binary source - `src/cli/commands/`: CLI command implementations -- `src/validation/`: provider-specific credential validators - `src/matcher/`: pattern matching engine - `src/scanner/`: core scanning logic -- `src/parser/`: language-aware parsing (`tree-sitter`) +- `src/parser/`: language-aware context verification (lightweight lexers, `tl` for HTML, `cssparser` for CSS) - `src/reporter/`: TOON/JSON/SARIF/HTML report generation - `src/access_map/`: access mapping analysis - `crates/kingfisher-core/`: shared types and core logic - `crates/kingfisher-rules/`: rule loading and rule data - `crates/kingfisher-rules/data/rules/`: YAML detection rules - `crates/kingfisher-scanner/`: embeddable high-level scanning API +- `crates/kingfisher-scanner/src/validation/`: shared typed and raw credential validators - `tests/`: integration/e2e tests - `testdata/`: test fixtures - `docs/`: user and developer docs @@ -81,18 +81,21 @@ Key capabilities: - `use-mimalloc` (default) - `use-jemalloc` - `system-alloc` -- Validation modules live in `crates/kingfisher-scanner/src/validation/`; optional validation feature sets are defined in `crates/kingfisher-scanner/Cargo.toml` (e.g., `validation-aws`, `validation-gcp`, `validation-database`, `validation-all`). +- Validation modules live in `crates/kingfisher-scanner/src/validation/`; optional validation feature sets are defined in `crates/kingfisher-scanner/Cargo.toml` (e.g., `validation-raw`, `validation-aws`, `validation-gcp`, `validation-database`, `validation-all`). ## Validation and Revocation Policy -- Default rule: define validation logic in rule YAML (`validation:` block), not Rust code. -- Code-based validation in `crates/kingfisher-scanner/src/validation/` is an exception path for cases that cannot be expressed reliably in YAML alone (for example AWS, GCP, Coinbase, MongoDB, and similar complex/provider-specific flows). +- Default rule: define validation logic in rule YAML (`validation:` block), especially `Http` or `Grpc`, not Rust code. +- Typed validators are first-class schema variants (`AWS`, `AzureStorage`, `Coinbase`, `GCP`, `MongoDB`, `MySQL`, `Postgres`, `Jdbc`, `JWT`) for stable, reusable validation families. +- Raw validators use `validation: { type: Raw, content: }` and are the ad-hoc exception path for provider-specific or protocol-specific validation that cannot be expressed reliably in YAML alone. Implement them in `crates/kingfisher-scanner/src/validation/raw.rs`. - Treat Rust validation additions as rare; prefer extending YAML-based validation first. +- If a Rust exception path is required, prefer adding a raw validator before introducing a new typed validator. Add a new typed validator only when it represents a reusable schema-level validation family. +- Do not convert existing typed validators to `Raw` just for consistency. - For rules that include validation, add a `revocation:` section whenever the third-party API safely supports revocation. ## Common Development Tasks - Add a detection rule: follow the workflow below and validate with relevant tests. - Add a CLI command: implement under `src/cli/commands/` and register in the CLI command wiring. -- Add a validator (rare exception path): implement in `crates/kingfisher-scanner/src/validation/` and wire feature flags/dependencies in `crates/kingfisher-scanner/Cargo.toml` only when YAML validation cannot express the required logic. +- Add a validator (rare exception path): implement it in `crates/kingfisher-scanner/src/validation/`, prefer `raw.rs` for one-off provider flows, and wire the narrowest feature/dependencies in `crates/kingfisher-scanner/Cargo.toml` only when YAML validation cannot express the required logic. ## Rule Authoring Workflow Use this when creating or updating rules in `crates/kingfisher-rules/data/rules/`. @@ -105,7 +108,7 @@ Use this when creating or updating rules in `crates/kingfisher-rules/data/rules/ - `pattern_requirements` (e.g., `min_digits`, `min_uppercase`, `min_lowercase`, `min_special_chars`, `ignore_if_contains`) when format constraints are known. - `pattern_requirements.checksum` when provider formats include check digits/signatures. 5. Add `validation` only when a reliable provider/API check exists. -6. Put validation in YAML by default; only use Rust validator logic for rare, justified exceptions. +6. Put validation in YAML by default. If YAML cannot express the check, use an existing typed validator or `type: Raw` exception path; add new Rust validator logic only for rare, justified cases. 7. Add `revocation` when the provider API supports safe revocation and the flow is well understood. 8. If a rule needs context from another match (for example ID + secret pair), use `depends_on_rule` and consider `visible: false` on the helper rule. 9. Verify locally: diff --git a/CHANGELOG.md b/CHANGELOG.md index 6921720..94ec62d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. +## [v1.95.0] +- Added 80+ built-in rules, bringing the bundled ruleset to 820 total. New coverage includes Amazon OAuth, Asaas, multiple Azure credential families, Bitrise, Canva, CockroachDB, eBay, Elastic, hCaptcha, Highnote, Lichess, MailerSend, Onfido, Paddle, Pangea, Persona, Pinterest, Proof, Rootly, Runpod, Telnyx, Thunderstore, Valtown, Volcengine, and more. +- Replaced tree-sitter with a lighter parser-based context verifier built from handwritten lexers plus `tl`/`cssparser`, preserving context-dependent matching while cutting about 19 MB from the release binary. +- Added a `validation: type: Raw` exception path for provider-specific checks, with new raw validators for Azure Batch, FTP, Kraken, LDAP, RabbitMQ, and Redis. Also added stable request-scoped template values plus new Liquid filters for HMAC-SHA384 hex output and timestamp generation. +- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex. Also tightened newly added helper regex to avoid high-match scan regressions, and made preflight-blocked raw validations report as skipped/not attempted instead of failed. + ## [v1.94.0] - Updated vendored `vectorscan-rs` from v0.0.5 (Vectorscan 5.4.11) to v0.0.6 (Vectorscan 5.4.12). The upstream crate now ships pre-extracted sources instead of a tarball+patch, and fixes the `cpu_native` feature flag. Local Windows and musl build patches have been re-applied. - Added more built-in rules diff --git a/Cargo.lock b/Cargo.lock index 475da1d..b5ccddf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -191,6 +191,45 @@ dependencies = [ "wax", ] +[[package]] +name = "asn1-rs" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f6fd5ddaf0351dff5b8da21b2fb4ff8e08ddd02857f0bf69c47639106c0fff0" +dependencies = [ + "asn1-rs-derive", + "asn1-rs-impl", + "displaydoc", + "nom 7.1.3", + "num-traits", + "rusticata-macros", + "thiserror 1.0.69", + "time", +] + +[[package]] +name = "asn1-rs-derive" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "synstructure 0.12.6", +] + +[[package]] +name = "asn1-rs-impl" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -875,11 +914,11 @@ dependencies = [ "hyper-rustls", "hyper-util", "pin-project-lite", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", "tracing", ] @@ -1215,8 +1254,8 @@ dependencies = [ "num", "pin-project-lite", "rand 0.9.2", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "rustls-pki-types", "serde", "serde_derive", @@ -1572,9 +1611,9 @@ checksum = "de0758edba32d61d1fd9f4d69491b47604b91ee2f7e6b33de7e54ca4ebe55dc3" [[package]] name = "color-backtrace" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308329d5d62e877ba02943db3a8e8c052de9fde7ab48283395ba0e6494efbabd" +checksum = "83c39683d44e712e45134c852c21c2f60139c3846047c9dde39cddf7066c78c6" dependencies = [ "backtrace", "termcolor", @@ -1768,6 +1807,16 @@ dependencies = [ "url", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -1826,7 +1875,7 @@ dependencies = [ "crc", "digest 0.10.7", "rustversion", - "spin", + "spin 0.10.0", ] [[package]] @@ -1945,6 +1994,17 @@ dependencies = [ "hybrid-array", ] +[[package]] +name = "cssparser" +version = "0.37.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c9cdaae01d5ed7882b04d795e7f752f46ff52d2fa3b50a20d28c464510bba98" +dependencies = [ + "dtoa-short", + "itoa", + "smallvec", +] + [[package]] name = "ctutils" version = "0.4.0" @@ -2143,6 +2203,20 @@ dependencies = [ "zeroize", ] +[[package]] +name = "der-parser" +version = "8.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbd676fbbab537128ef0278adb5576cf363cff6aa22a7b24effe97347cfab61e" +dependencies = [ + "asn1-rs", + "displaydoc", + "nom 7.1.3", + "num-bigint", + "num-traits", + "rusticata-macros", +] + [[package]] name = "der_derive" version = "0.7.3" @@ -2355,6 +2429,21 @@ dependencies = [ "litrs", ] +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + [[package]] name = "dunce" version = "1.0.5" @@ -2675,6 +2764,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared 0.1.1", +] + [[package]] name = "foreign-types" version = "0.5.0" @@ -2682,7 +2780,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" dependencies = [ "foreign-types-macros", - "foreign-types-shared", + "foreign-types-shared 0.3.1", ] [[package]] @@ -2696,6 +2794,12 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "foreign-types-shared" version = "0.3.1" @@ -2863,7 +2967,7 @@ dependencies = [ "regex", "reqwest 0.13.2", "reqwest-middleware 0.5.1", - "ring", + "ring 0.17.14", "serde", "serde_json", "sha2 0.10.9", @@ -4197,7 +4301,7 @@ dependencies = [ "ipnet", "once_cell", "rand 0.9.2", - "ring", + "ring 0.17.14", "thiserror 2.0.18", "tinyvec", "tokio", @@ -4415,11 +4519,11 @@ dependencies = [ "http 1.4.0", "hyper", "hyper-util", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", "webpki-roots 1.0.6", ] @@ -4959,7 +5063,7 @@ dependencies = [ "base64 0.22.1", "js-sys", "pem", - "ring", + "ring 0.17.14", "serde", "serde_json", "simple_asn1", @@ -4993,7 +5097,7 @@ dependencies = [ [[package]] name = "kingfisher" -version = "1.94.0" +version = "1.95.0" dependencies = [ "anyhow", "asar", @@ -5034,6 +5138,7 @@ dependencies = [ "crc32fast", "crossbeam-channel", "crossbeam-skiplist", + "cssparser", "dashmap", "ed25519-dalek", "fixedbitset", @@ -5089,12 +5194,12 @@ dependencies = [ "reqwest 0.12.28", "reqwest-middleware 0.4.2", "reqwest-middleware 0.5.1", - "ring", + "ring 0.17.14", "roaring", "rusqlite", "rustc-hash", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "schemars 0.8.22", "self_update", "semver", @@ -5105,7 +5210,6 @@ dependencies = [ "sha1 0.11.0", "sha2 0.11.0", "smallvec", - "streaming-iterator", "strum 0.28.0", "strum_macros 0.28.0", "sysinfo", @@ -5118,33 +5222,16 @@ dependencies = [ "thread_local", "tikv-jemallocator", "time", + "tl", "tokei", "tokio", "tokio-postgres", "tokio-postgres-rustls", - "tokio-rustls", + "tokio-rustls 0.26.4", "toon-format", "tracing", "tracing-core", "tracing-subscriber", - "tree-sitter", - "tree-sitter-bash", - "tree-sitter-c", - "tree-sitter-c-sharp", - "tree-sitter-cpp", - "tree-sitter-css", - "tree-sitter-go", - "tree-sitter-html", - "tree-sitter-java", - "tree-sitter-javascript", - "tree-sitter-php", - "tree-sitter-python", - "tree-sitter-regex", - "tree-sitter-ruby", - "tree-sitter-rust", - "tree-sitter-toml-ng", - "tree-sitter-typescript", - "tree-sitter-yaml", "tree_magic_mini", "url", "uuid", @@ -5241,6 +5328,7 @@ dependencies = [ "jsonwebtoken 10.3.0", "kingfisher-core", "kingfisher-rules", + "ldap3", "liquid", "liquid-core", "mongodb", @@ -5255,10 +5343,10 @@ dependencies = [ "rand 0.10.0", "regex", "reqwest 0.12.28", - "ring", + "ring 0.17.14", "rustc-hash", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "schemars 0.8.22", "serde", "serde_json", @@ -5268,9 +5356,11 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "thread_local", + "time", "tokio", "tokio-postgres", "tokio-postgres-rustls", + "tokio-rustls 0.26.4", "tracing", "url", "vectorscan-rs", @@ -5293,6 +5383,45 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "lber" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2df7f9fd9f64cf8f59e1a4a0753fe7d575a5b38d3d7ac5758dcee9357d83ef0a" +dependencies = [ + "bytes", + "nom 7.1.3", +] + +[[package]] +name = "ldap3" +version = "0.11.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "166199a8207874a275144c8a94ff6eed5fcbf5c52303e4d9b4d53a0c7ac76554" +dependencies = [ + "async-trait", + "bytes", + "futures", + "futures-util", + "lazy_static", + "lber", + "log", + "native-tls", + "nom 7.1.3", + "percent-encoding", + "ring 0.16.20", + "rustls 0.21.12", + "rustls-native-certs 0.6.3", + "thiserror 1.0.69", + "tokio", + "tokio-native-tls", + "tokio-rustls 0.24.1", + "tokio-stream", + "tokio-util", + "url", + "x509-parser", +] + [[package]] name = "leb128fmt" version = "0.1.0" @@ -5710,7 +5839,7 @@ dependencies = [ "percent-encoding", "rand 0.9.2", "rustc_version_runtime", - "rustls", + "rustls 0.23.37", "rustversion", "serde", "serde_bytes", @@ -5723,7 +5852,7 @@ dependencies = [ "take_mut", "thiserror 2.0.18", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "typed-builder", "uuid", @@ -5778,14 +5907,14 @@ dependencies = [ "pem", "percent-encoding", "rand 0.9.2", - "rustls", - "rustls-pemfile", + "rustls 0.23.37", + "rustls-pemfile 2.2.0", "serde", "serde_json", "socket2 0.5.10", "thiserror 2.0.18", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "twox-hash", "url", @@ -5819,6 +5948,23 @@ dependencies = [ "uuid", ] +[[package]] +name = "native-tls" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe 0.2.1", + "openssl-sys", + "schannel", + "security-framework 3.7.0", + "security-framework-sys", + "tempfile", +] + [[package]] name = "ndk-context" version = "0.1.1" @@ -6103,7 +6249,7 @@ dependencies = [ "reqwest-middleware 0.4.2", "reqwest-retry", "reqwest-tracing", - "ring", + "ring 0.17.14", "schemars 0.8.22", "serde", "serde_json", @@ -6114,6 +6260,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "oid-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bedf36ffb6ba96c2eb7144ef6270557b52e54b20c0a8e1eb2ff99a6c6959bff" +dependencies = [ + "asn1-rs", +] + [[package]] name = "olpc-cjson" version = "0.1.4" @@ -6141,12 +6296,56 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "openssl" +version = "0.10.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "951c002c75e16ea2c65b8c7e4d3d51d5530d8dfa7d060b4776828c88cfb18ecf" +dependencies = [ + "bitflags 2.11.0", + "cfg-if", + "foreign-types 0.3.2", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "openssl-probe" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" + [[package]] name = "openssl-probe" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" +[[package]] +name = "openssl-sys" +version = "0.9.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57d55af3b3e226502be1526dfdba67ab0e9c96fc293004e79576b2b9edb0dbdb" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "outref" version = "0.5.2" @@ -6759,7 +6958,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", + "rustls 0.23.37", "socket2 0.6.3", "thiserror 2.0.18", "tokio", @@ -6778,9 +6977,9 @@ dependencies = [ "getrandom 0.3.4", "lru-slab", "rand 0.9.2", - "ring", + "ring 0.17.14", "rustc-hash", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -7038,7 +7237,6 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", "http 1.4.0", "http-body 1.0.1", "http-body-util", @@ -7051,15 +7249,15 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http", @@ -7084,6 +7282,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", + "h2", "http 1.4.0", "http-body 1.0.1", "http-body-util", @@ -7097,7 +7296,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "rustls-platform-verifier", "serde", @@ -7105,7 +7304,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http", @@ -7222,6 +7421,21 @@ dependencies = [ "subtle", ] +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin 0.5.2", + "untrusted 0.7.1", + "web-sys", + "winapi", +] + [[package]] name = "ring" version = "0.17.14" @@ -7302,6 +7516,15 @@ dependencies = [ "semver", ] +[[package]] +name = "rusticata-macros" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" +dependencies = [ + "nom 7.1.3", +] + [[package]] name = "rustix" version = "1.1.4" @@ -7315,6 +7538,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring 0.17.14", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.37" @@ -7324,23 +7559,44 @@ dependencies = [ "aws-lc-rs", "log", "once_cell", - "ring", + "ring 0.17.14", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.10", "subtle", "zeroize", ] +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe 0.1.6", + "rustls-pemfile 1.0.4", + "schannel", + "security-framework 2.11.1", +] + [[package]] name = "rustls-native-certs" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63" dependencies = [ - "openssl-probe", + "openssl-probe 0.2.1", "rustls-pki-types", "schannel", - "security-framework", + "security-framework 3.7.0", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", ] [[package]] @@ -7368,16 +7624,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" dependencies = [ - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "jni 0.21.1", "log", "once_cell", - "rustls", - "rustls-native-certs", + "rustls 0.23.37", + "rustls-native-certs 0.8.3", "rustls-platform-verifier-android", - "rustls-webpki", - "security-framework", + "rustls-webpki 0.103.10", + "security-framework 3.7.0", "security-framework-sys", "webpki-root-certs", "windows-sys 0.61.2", @@ -7389,6 +7645,16 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring 0.17.14", + "untrusted 0.9.0", +] + [[package]] name = "rustls-webpki" version = "0.103.10" @@ -7396,7 +7662,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ "aws-lc-rs", - "ring", + "ring 0.17.14", "rustls-pki-types", "untrusted 0.9.0", ] @@ -7534,6 +7800,16 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring 0.17.14", + "untrusted 0.9.0", +] + [[package]] name = "sec1" version = "0.7.3" @@ -7548,6 +7824,19 @@ dependencies = [ "zeroize", ] +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.11.0", + "core-foundation 0.9.4", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + [[package]] name = "security-framework" version = "3.7.0" @@ -7555,7 +7844,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ "bitflags 2.11.0", - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -7584,9 +7873,9 @@ dependencies = [ [[package]] name = "self_update" -version = "0.43.1" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6644febaa58f323b28f7321d04e24d0020d117c27619ab869d6abdf76be9aac6" +checksum = "2e79722b5a505d4ddc77527455a97244e9e8c4c07533ff44cf4421cce7bb6d17" dependencies = [ "either", "flate2", @@ -7595,7 +7884,7 @@ dependencies = [ "log", "quick-xml 0.38.4", "regex", - "reqwest 0.12.28", + "reqwest 0.13.2", "self-replace", "semver", "serde", @@ -7610,9 +7899,9 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.27" +version = "1.0.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" dependencies = [ "serde", "serde_core", @@ -7995,6 +8284,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "spin" version = "0.10.0" @@ -8035,12 +8330,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" -[[package]] -name = "streaming-iterator" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" - [[package]] name = "stringprep" version = "0.1.5" @@ -8182,6 +8471,18 @@ dependencies = [ "futures-core", ] +[[package]] +name = "synstructure" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", + "unicode-xid", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -8498,6 +8799,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tl" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b130bd8a58c163224b44e217b4239ca7b927d82bf6cc2fea1fc561d15056e3f7" + [[package]] name = "tls_codec" version = "0.4.2" @@ -8563,9 +8870,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +checksum = "2bd1c4c0fc4a7ab90fc15ef6daaa3ec3b893f004f915f2392557ed23237820cd" dependencies = [ "bytes", "libc", @@ -8580,15 +8887,25 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.1" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", "syn 2.0.117", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-postgres" version = "0.7.17" @@ -8622,21 +8939,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27d684bad428a0f2481f42241f821db42c54e2dc81d8c00db8536c506b0a0144" dependencies = [ "const-oid 0.9.6", - "ring", - "rustls", + "ring 0.17.14", + "rustls 0.23.37", "tokio", "tokio-postgres", - "tokio-rustls", + "tokio-rustls 0.26.4", "x509-cert", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.37", "tokio", ] @@ -8914,196 +9241,6 @@ dependencies = [ "tracing-log", ] -[[package]] -name = "tree-sitter" -version = "0.26.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "887bd495d0582c5e3e0d8ece2233666169fa56a9644d172fc22ad179ab2d0538" -dependencies = [ - "cc", - "regex", - "regex-syntax", - "serde_json", - "streaming-iterator", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-bash" -version = "0.25.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5ec769279cc91b561d3df0d8a5deb26b0ad40d183127f409494d6d8fc53062" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-c" -version = "0.24.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a3aad8f0129083a59fe8596157552d2bb7148c492d44c21558d68ca1c722707" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-c-sharp" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67f06accca7b45351758663b8215089e643d53bd9a660ce0349314263737fcb0" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-cpp" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-css" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5cbc5e18f29a2c6d6435891f42569525cf95435a3e01c2f1947abcde178686f" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-go" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8560a4d2f835cc0d4d2c2e03cbd0dde2f6114b43bc491164238d333e28b16ea" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-html" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "261b708e5d92061ede329babaaa427b819329a9d427a1d710abb0f67bbef63ee" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-java" -version = "0.23.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-javascript" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68204f2abc0627a90bdf06e605f5c470aa26fdcb2081ea553a04bdad756693f5" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-language" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" - -[[package]] -name = "tree-sitter-php" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d8c17c3ab69052c5eeaa7ff5cd972dd1bc25d1b97ee779fec391ad3b5df5592" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-python" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-regex" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd8a59be9f0ac131fd8f062eaaba14882b2fa5a6a7882a20134cb1d60df2e625" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-ruby" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0484ea4ef6bb9c575b4fdabde7e31340a8d2dbc7d52b321ac83da703249f95" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-rust" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439e577dbe07423ec2582ac62c7531120dbfccfa6e5f92406f93dd271a120e45" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-toml-ng" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9adc2c898ae49730e857d75be403da3f92bb81d8e37a2f918a08dd10de5ebb1" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-typescript" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" -dependencies = [ - "cc", - "tree-sitter-language", -] - -[[package]] -name = "tree-sitter-yaml" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53c223db85f05e34794f065454843b0668ebc15d240ada63e2b5939f43ce7c97" -dependencies = [ - "cc", - "tree-sitter-language", -] - [[package]] name = "tree_magic_mini" version = "3.2.2" @@ -9273,7 +9410,7 @@ dependencies = [ "flate2", "log", "percent-encoding", - "rustls", + "rustls 0.23.37", "rustls-pki-types", "serde", "serde_json", @@ -9371,7 +9508,7 @@ name = "vectorscan-rs" version = "0.0.6" dependencies = [ "bitflags 2.11.0", - "foreign-types", + "foreign-types 0.5.0", "libc", "thiserror 1.0.69", "vectorscan-rs-sys", @@ -9637,7 +9774,7 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe985f41e291eecef5e5c0770a18d28390addb03331c043964d9e916453d6f16" dependencies = [ - "core-foundation", + "core-foundation 0.10.1", "jni 0.22.4", "log", "ndk-context", @@ -10307,6 +10444,23 @@ dependencies = [ "tls_codec", ] +[[package]] +name = "x509-parser" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7069fba5b66b9193bd2c5d3d4ff12b839118f6bcbef5328efafafb5395cf63da" +dependencies = [ + "asn1-rs", + "data-encoding", + "der-parser", + "lazy_static", + "nom 7.1.3", + "oid-registry", + "rusticata-macros", + "thiserror 1.0.69", + "time", +] + [[package]] name = "xattr" version = "1.6.1" @@ -10355,7 +10509,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.117", - "synstructure", + "synstructure 0.13.2", ] [[package]] @@ -10396,7 +10550,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.117", - "synstructure", + "synstructure 0.13.2", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 37e6d19..a9cbb83 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ http = "1.4" [package] name = "kingfisher" -version = "1.94.0" +version = "1.95.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -168,28 +168,11 @@ reqwest-middleware = "0.5.1" reqwest-middleware-octorust = { package = "reqwest-middleware", version = "0.4.2" } tracing-subscriber = {version = "0.3.22", features = ["env-filter"] } tracing-core = "0.1.35" -tree-sitter = "0.26.5" aws-smithy-http-client = "1.1.10" aws-smithy-runtime-api = "1.11.4" aws-smithy-types = "1.4.4" -tree-sitter-bash = "0.25.1" -tree-sitter-c = "0.24.1" -tree-sitter-c-sharp = "0.23.1" -tree-sitter-cpp = "0.23.4" -tree-sitter-css = "0.25.0" -tree-sitter-go = "0.25.0" -tree-sitter-html = "0.23.2" -tree-sitter-java = "0.23.5" -tree-sitter-javascript = "0.25.0" -tree-sitter-php = "0.24.2" -tree-sitter-python = "0.25.0" -tree-sitter-ruby = "0.23.1" -tree-sitter-rust = "0.24.0" -tree-sitter-toml-ng = "0.7.0" -tree-sitter-typescript = "0.23.2" -tree-sitter-yaml = "0.7.2" -streaming-iterator = "0.1.9" -tree-sitter-regex = "0.25.0" +cssparser = { version = "0.37.0", default-features = false } +tl = "0.7.8" tree_magic_mini = "3.2" content_inspector = "0.2.4" rustc-hash = "2.1.1" @@ -223,10 +206,10 @@ bloomfilter = "3.0.1" uuid = "1.19.0" rand = "0.10.0" percent-encoding = "2.3.2" -self_update = { version = "0.43.1", default-features = false, features = ["reqwest", "rustls", "archive-tar", "archive-zip", "compression-flate2"] } +self_update = { version = "0.44.0", default-features = false, features = ["reqwest", "rustls", "archive-tar", "archive-zip", "compression-flate2"] } semver = "1.0.27" globset = "0.4.18" -jsonwebtoken = { version = "10.2.0", features = ["aws-lc-rs"] } +jsonwebtoken = { version = "10.3.0", features = ["aws-lc-rs"] } ipnet = "2.11.0" gouqi = { version = "0.20.0", features = ["async"] } oci-client = { version = "0.16", default-features = false, features = ["rustls-tls"] } diff --git a/README.md b/README.md index decb35a..d2ddc61 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ License - Detection Rules + Detection Rules
@@ -17,7 +17,7 @@ Kingfisher is an open source secret scanner and **live secret validation** tool built in Rust. -It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and **ships with 700+ built-in rules** to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. +It combines Intel's SIMD-accelerated regex engine (Hyperscan) with language-aware parsing to achieve high accuracy at massive scale, and **ships with 800+ built-in rules** to detect, **validate**, and triage leaked API keys, tokens, and credentials before they ever reach production. Designed for offensive security engineers and blue-team defenders alike, Kingfisher helps you scan repositories, cloud storage, chat, docs, and CI pipelines to find and verify exposed secrets quickly. @@ -49,9 +49,9 @@ Kingfisher is a high-performance, open source secret detection tool for source c -### Performance, Accuracy, and 700+ Rules +### Performance, Accuracy, and 800+ Rules - **Performance**: multithreaded, Hyperscan‑powered scanning built for huge codebases -- **Extensible rules**: 700+ built-in rules plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) +- **Extensible rules**: 800+ built-in rules plus YAML-defined custom rules ([docs/RULES.md](/docs/RULES.md)) - **Validate & Revoke**: live validation of discovered secrets, plus direct revocation for supported platforms (GitHub, GitLab, Slack, AWS, GCP, and more) ([docs/USAGE.md](/docs/USAGE.md)) - **Revocation support matrix**: current built-in revocation coverage across providers and rule IDs ([docs/REVOCATION_PROVIDERS.md](/docs/REVOCATION_PROVIDERS.md)) - **Blast Radius Mapping**: instantly map leaked keys to their effective cloud identities and exposed resources with `--access-map`. Supports 39 providers (see table below). @@ -345,7 +345,7 @@ gh attestation verify kingfisher-linux-x64.tgz --repo mongodb/kingfisher # Detection Rules -Kingfisher ships with [700+ built-in rules](crates/kingfisher-rules/data/rules/) covering cloud keys, AI tokens, CI/CD secrets, database credentials, and SaaS API keys. Below is an overview — see the full list in [crates/kingfisher-rules/data/rules/](crates/kingfisher-rules/data/rules/): +Kingfisher ships with [800+ built-in rules](crates/kingfisher-rules/data/rules/) covering cloud keys, AI tokens, CI/CD secrets, database credentials, and SaaS API keys. Below is an overview — see the full list in [crates/kingfisher-rules/data/rules/](crates/kingfisher-rules/data/rules/): | Category | What we catch | |----------|---------------| @@ -362,7 +362,7 @@ Kingfisher ships with [700+ built-in rules](crates/kingfisher-rules/data/rules/) ## Write Custom Rules -Kingfisher ships with 700+ rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. +Kingfisher ships with 800+ rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. However, you may want to add your own custom rules, or modify a detection to better suit your needs / environment. @@ -401,7 +401,7 @@ kingfisher scan /path/to/code kingfisher scan ~/src/myrepo --no-validate # Turbo mode: run as fast as possible by disabling Git commit metadata, Base64 decoding, -# MIME sniffing, language detection, and tree-sitter parsing +# MIME sniffing, language detection, and parser-based context verification # (findings omit commit context, Base64-only matches, MIME type, and language metadata) kingfisher scan ~/src/myrepo --turbo @@ -510,7 +510,7 @@ cat /path/to/file.py | kingfisher scan - kingfisher scan /some/file --max-file-size 500 # Turbo mode: equivalent to --commit-metadata=false --no-base64 and disables MIME sniffing, -# language detection/tree-sitter parsing for maximum speed +# language detection/parser-based context verification for maximum speed # No Git commit metadata (author, date, hash), Base64 decoding, MIME, or language metadata in findings kingfisher scan /path/to/repo --turbo @@ -725,7 +725,7 @@ kingfisher scan /tmp/repo --branch feature-1 \ | [FINGERPRINT.md](docs/FINGERPRINT.md) | Understanding finding fingerprints and deduplication | | [COMPARISON.md](docs/COMPARISON.md) | Benchmark results and performance comparisons | | [PARSING.md](docs/PARSING.md) | Language-aware parsing details | -| [TREE_SITTER.md](docs/TREE_SITTER.md) | Tree-sitter scanning flow, verification gates, and fallback behavior | +| [CONTEXT_VERIFICATION.md](docs/CONTEXT_VERIFICATION.md) | Context-verification flow, gates, and parser backends | # Library Usage @@ -751,7 +751,7 @@ Since then it has evolved far beyond that starting point, introducing live valid - **Live validation** of detected secrets directly within rules - **Hundreds of new built-in rules** and an expanded YAML rule schema - **Baseline management** to suppress known findings over time -- **Tree-sitter parsing** layered on Hyperscan for language-aware detection +- **Parser-based context verification** layered on Hyperscan for language-aware detection - **More scan targets** (GitLab, Bitbucket, Gitea, Jira, Confluence, Slack, Microsoft Teams, S3, GCS, Docker, Hugging Face, etc.) - **Compressed Files**, **SQLite database**, and **Python bytecode (.pyc)** scanning support - **New storage model** (in-memory + Bloom filter, replacing SQLite) diff --git a/crates/kingfisher-rules/data/rules/AGENTS.md b/crates/kingfisher-rules/data/rules/AGENTS.md index cfc2b77..0287d66 100644 --- a/crates/kingfisher-rules/data/rules/AGENTS.md +++ b/crates/kingfisher-rules/data/rules/AGENTS.md @@ -30,6 +30,7 @@ Strongly recommended fields: ## Pattern Quality Rules - Prefer specific anchors/prefixes and provider context over broad generic regex. +- Keep helper/context regex narrow. Avoid patterns that match generic URLs, hostnames, query params, or assignments without strong provider-specific constraints; broad helpers can create huge match counts and cause major memory/time regressions on large repos and git history. - When the token format is generic or common-looking (for example bare 32-hex keys), prefer contextual patterns of the form: provider keyword -> short flexible gap -> key/secret label -> short flexible gap -> token. A good default is: - `\b` - provider identifier (for example `amplitude`, `azure`, `speech`, `translator`) @@ -57,8 +58,11 @@ Strongly recommended fields: ## Validation Policy (Important) - Default: define validation logic in YAML under `validation:`. - Do not move validation logic into Rust unless YAML cannot reliably express it. -- Code-backed validation types (for example AWS, GCP, Coinbase, MongoDB) are notable exceptions and should remain rare. - For new rules, first attempt `Http`/`Grpc` YAML validation before considering exception paths. +- Typed validation kinds such as `AWS`, `AzureStorage`, `Coinbase`, `GCP`, `MongoDB`, `MySQL`, `Postgres`, `Jdbc`, and `JWT` are schema-level validator families. Use them when an existing typed validator already matches the problem. +- `validation: { type: Raw, content: }` is the ad-hoc exception path for provider-specific or protocol-specific flows that cannot be expressed cleanly in YAML. Raw implementations live in `crates/kingfisher-scanner/src/validation/raw.rs`. +- When Rust validation is unavoidable for a one-off provider, prefer adding a raw validator instead of inventing a new typed validator. +- Do not convert existing typed validators to `Raw` just for consistency. ## Revocation Policy - If a rule has validation and the provider API safely supports revocation, add `revocation:` in the same YAML rule. @@ -70,7 +74,7 @@ Strongly recommended fields: 1. Choose the target provider file (or add a new provider file if no suitable file exists). 2. Copy a structurally similar rule from this directory. 3. Implement/adjust `pattern`, `examples`, and filtering (`pattern_requirements`, `min_entropy`). -4. Add YAML `validation` (default path). +4. Add YAML `validation` (default path). Prefer `Http`/`Grpc`; if that fails, use an existing typed validator or `type: Raw` only when justified. 5. Add YAML `revocation` when supported. 6. Add `references` for token format/API behavior. 7. Verify locally (below). @@ -80,6 +84,9 @@ Strongly recommended fields: - `cargo test -p kingfisher-rules` - Broader regression check: - `cargo test --workspace --all-targets` +- Match-volume check on a realistic large target: + - `kingfisher scan --rule-stats` + - Review unexpected high-match helper/generic rules before submitting. - **Warning-free build**: `cargo check` (or `make darwin` / `make linux`) must produce zero warnings. Address all `dead_code`, `unused_*`, and other warnings before submitting. Use `#[allow(dead_code)]` on individual struct fields kept for deserialization completeness, and remove truly unused code. - Behavioral check against sample content: - `kingfisher scan ./testdata --rule --rule-stats` diff --git a/crates/kingfisher-rules/data/rules/adobe.yml b/crates/kingfisher-rules/data/rules/adobe.yml index ec72a85..02ea4cb 100644 --- a/crates/kingfisher-rules/data/rules/adobe.yml +++ b/crates/kingfisher-rules/data/rules/adobe.yml @@ -70,7 +70,58 @@ rules: examples: - | { - "client_credentials": { - "client_id": "a65b0146769d433a835f36660881db50", - "client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5" - }, \ No newline at end of file + "adobe_client_credentials": { + "client_id": "a65b0146769d433a835f36660881db50", + "client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5" + } + } + depends_on_rule: + - rule_id: "kingfisher.adobe.4" + variable: ADOBE_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://ims-na1.adobelogin.com/ims/token/v3 + headers: + Authorization: 'Basic {{ ADOBE_CLIENT_ID | append: ":" | append: TOKEN | b64enc }}' + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: 'code=invalid_code&grant_type=authorization_code' + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Adobe documents revocation for access and refresh + # tokens, not for the OAuth client secret itself. + references: + - https://developer.adobe.com/developer-console/docs/guides/authentication/UserAuthentication/ims + + - name: Adobe OAuth Client ID + id: kingfisher.adobe.4 + pattern: | + (?xi) + \b + adobe + (?:.|[\n\r]){0,64}? + client_id + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{32} + ) + \b + min_entropy: 3.0 + visible: false + examples: + - | + { + "adobe_client_credentials": { + "client_id": "a65b0146769d433a835f36660881db50", + "client_secret": "p8e-ibndcvsmAp9ZgPBZ606FSlYIZVlsZ-g5" + } + } diff --git a/crates/kingfisher-rules/data/rules/agora.yml b/crates/kingfisher-rules/data/rules/agora.yml index 61e1edf..2a328dc 100644 --- a/crates/kingfisher-rules/data/rules/agora.yml +++ b/crates/kingfisher-rules/data/rules/agora.yml @@ -47,6 +47,28 @@ rules: examples: - "agora.app_certificate=397a3af3db1950bdbd84f4e4ec18ebef" - "agora.app_secret = \"127a3af3db1950b8dbd4fe440c28ebef\"" + validation: + type: Http + content: + request: + method: GET + url: https://api.agora.io/dev/v1/projects + headers: + Accept: application/json + Authorization: "Basic {{ AGORA_ID | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"projects"' + - '"vendor_key"' + depends_on_rule: + - rule_id: kingfisher.agora.1 + variable: AGORA_ID references: + - https://docs.agora.io/en/voice-calling/reference/agora-console-rest-api - https://docs.agora.io/en/rtc/restfulapi - - https://docs.agora.io/en/video-calling/reference/authentication-workflow diff --git a/crates/kingfisher-rules/data/rules/amazonoauth.yml b/crates/kingfisher-rules/data/rules/amazonoauth.yml new file mode 100644 index 0000000..b83242b --- /dev/null +++ b/crates/kingfisher-rules/data/rules/amazonoauth.yml @@ -0,0 +1,19 @@ +rules: + - name: Login with Amazon OAuth Client ID + id: kingfisher.amazonoauth.1 + pattern: | + (?x) + \b + ( + amzn1\.application-oa2-client\.[a-f0-9]{20,40} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: high + categories: [api, key] + examples: + - 'AMAZON_CLIENT_ID=amzn1.application-oa2-client.1a2b3c4d5e6f7890abcdef1234567890' + references: + - https://developer.amazon.com/docs/login-with-amazon/authorization-code-grant.html diff --git a/crates/kingfisher-rules/data/rules/asaas.yml b/crates/kingfisher-rules/data/rules/asaas.yml new file mode 100644 index 0000000..1746083 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/asaas.yml @@ -0,0 +1,47 @@ +rules: + - name: Asaas API Token + id: kingfisher.asaas.1 + pattern: | + (?x) + ( + \$aact_(?:prod|hmlg)_[a-zA-Z0-9_-]{20,100} + ) + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'ASAAS_API_KEY=$aact_prod_abcdefghijklmnop1234567890ABCDEF' + - 'api_token: $aact_hmlg_abcdefghijklmnop1234567890ABCDEF' + validation: + type: Http + content: + request: + method: GET + url: > + {%- if TOKEN contains "$aact_hmlg_" -%} + https://api-sandbox.asaas.com/v3/myAccount/commercialInfo/ + {%- else -%} + https://api.asaas.com/v3/myAccount/commercialInfo/ + {%- endif -%} + headers: + Accept: application/json + User-Agent: kingfisher + access_token: "{{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"object"' + - '"commercialInfo"' + # Revocation not added: Asaas documents key deletion in the dashboard and + # parent-driven sub-account key management, but not a self-revoke endpoint + # for the current access_token alone. + references: + - https://docs.asaas.com/docs/authentication-2 + - https://docs.asaas.com/docs/change-the-name-of-a-business-subaccount-via-api diff --git a/crates/kingfisher-rules/data/rules/asana.yml b/crates/kingfisher-rules/data/rules/asana.yml index 0def56b..0c767d9 100644 --- a/crates/kingfisher-rules/data/rules/asana.yml +++ b/crates/kingfisher-rules/data/rules/asana.yml @@ -41,6 +41,32 @@ rules: examples: - "asana :'20c2F0d03201af478ca1aBE9515A1A4FEfb'" - ASANA_PAT = 1234567890abcdef1234567890abcdef12 + depends_on_rule: + - rule_id: kingfisher.asana.1 + variable: ASANA_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://app.asana.com/-/oauth_token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: > + grant_type=authorization_code&client_id={{ ASANA_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&code=invalid_code + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Asana's revoke endpoint deauthorizes refresh tokens, + # not OAuth client secrets. + references: + - https://developers.asana.com/docs/oauth - name: Asana OAuth / Personal Access Token (Legacy) id: kingfisher.asana.3 diff --git a/crates/kingfisher-rules/data/rules/azure.yml b/crates/kingfisher-rules/data/rules/azure.yml index 45fb88d..dd782e1 100644 --- a/crates/kingfisher-rules/data/rules/azure.yml +++ b/crates/kingfisher-rules/data/rules/azure.yml @@ -70,6 +70,30 @@ rules: - | if __name__ == "__main__": ado_pat = "iyfmob6xjrfmit67anxbot64umfx2clwx7dz5ynxi4q2z3uqegvq" + validation: + type: Http + content: + request: + method: GET + url: https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version=7.1 + headers: + Authorization: 'Basic {{ ":" | append: TOKEN | b64enc }}' + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"id"' + - '"displayName"' + # Revocation not added: Azure DevOps PAT lifecycle management is documented + # separately and is not a self-revoke flow driven solely by the PAT itself. + references: + - https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops + - https://learn.microsoft.com/en-us/rest/api/azure/devops/profile/profiles/get?view=azure-devops-rest-7.1 - name: Azure Container Registry URL id: kingfisher.azure.4 pattern: | @@ -114,3 +138,25 @@ rules: variable: ACR_USERNAME references: - https://learn.microsoft.com/en-us/azure/container-registry/container-registry-authentication + + - name: Azure AD Client Secret (Microsoft Entra ID) + id: kingfisher.azure.6 + pattern: | + (?x) + (?:^|['"\x60\s>=:(,]) + ( + [a-zA-Z0-9_~.]{3} + \d + Q~ + [a-zA-Z0-9_~.\-]{31,34} + ) + (?:$|['"\x60\s<),]) + pattern_requirements: + min_digits: 1 + min_entropy: 3.5 + confidence: medium + examples: + - '"aBc4Q~xY9kLmNpQrStUvWxYz01234567890abcd"' + - 'AZURE_CLIENT_SECRET=xY14Q~abcdefghijklmnopqrstuvwxyz01234' + references: + - https://learn.microsoft.com/en-us/entra/identity-platform/quickstart-register-app diff --git a/crates/kingfisher-rules/data/rules/azureapim.yml b/crates/kingfisher-rules/data/rules/azureapim.yml new file mode 100644 index 0000000..8ebe0da --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azureapim.yml @@ -0,0 +1,64 @@ +rules: + - name: Azure API Management Subscription Key + id: kingfisher.azureapim.1 + pattern: | + (?x) + \b + (?: + (?i:(?:apim|api[_\s-]*management)[_\s-]*(?:subscription[_\s-]*key|key)) + | + (?i:Ocp-Apim-Subscription-Key) + ) + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'Ocp-Apim-Subscription-Key: 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d' + - 'APIM_SUBSCRIPTION_KEY=abcdef0123456789abcdef0123456789' + validation: + type: Http + content: + request: + method: GET + url: "{{ APIM_URL }}" + headers: + Ocp-Apim-Subscription-Key: "{{ TOKEN }}" + response_is_html: true + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 201, 202, 204, 400, 404, 405] + - type: StatusMatch + status: [401, 403] + negative: true + depends_on_rule: + - rule_id: kingfisher.azureapim.2 + variable: APIM_URL + references: + - https://learn.microsoft.com/en-us/azure/api-management/api-management-subscriptions + + - name: Azure API Management Gateway URL + id: kingfisher.azureapim.2 + pattern: | + (?xi) + \b + ( + https://[a-z0-9-]+(?:\.developer)?\.azure-api\.net(?:/[^\s"'<>]{0,200})? + ) + min_entropy: 1.0 + confidence: medium + visible: false + examples: + - https://contoso.azure-api.net/echo + - APIM_URL=https://contoso.developer.azure-api.net/api + references: + - https://learn.microsoft.com/en-us/azure/api-management/api-management-subscriptions + - https://learn.microsoft.com/en-us/troubleshoot/azure/api-mgmt/availability/unauthorized-errors-invoke-apis diff --git a/crates/kingfisher-rules/data/rules/azurebatch.yml b/crates/kingfisher-rules/data/rules/azurebatch.yml new file mode 100644 index 0000000..df34ba2 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurebatch.yml @@ -0,0 +1,71 @@ +rules: + - name: Azure Batch Account Key + id: kingfisher.azurebatch.1 + pattern: | + (?x) + \b + (?: + (?i:azure[_\s-]*batch[_\s-]*(?:key|account[_\s-]*key|access[_\s-]*key)) + | + (?i:batch[_\s-]*account[_\s-]*key) + ) + (?:.|[\n\r]){0,16}? + ( + [A-Za-z0-9+/]{86}== + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 + min_special_chars: 1 + min_entropy: 4.0 + confidence: medium + categories: [api, key] + examples: + - 'AZURE_BATCH_KEY=oqb4TdY9T0hphvktd5fJnMiHuQqzVy1jd5sSuOpAbGkaoqTlrHl0BOJN2okcasinVLOJzfDbZo1L+ASt68RAhA==' + validation: + type: Http + content: + request: + method: GET + url: '{{ BATCH_URL }}/applications?api-version=2020-09-01.12.0' + headers: + Accept: application/json + Content-Type: application/json + Date: '{{ REQUEST_RFC1123_DATE }}' + Authorization: | + {%- assign host = BATCH_URL | split: "://" | last | split: "/" | first -%} + {%- assign account_name = host | split: "." | first -%} + {%- assign resource_path = "/" | append: account_name | append: "/applications" | downcase -%} + {%- assign string_to_sign = "GET\n\n\n\n\napplication/json\n" | append: REQUEST_RFC1123_DATE | append: "\n\n\n\n\n\n" | append: resource_path | append: "\napi-version:2020-09-01.12.0" -%} + {%- assign signature = string_to_sign | hmac_sha256_b64key: TOKEN -%} + SharedKey {{ account_name }}:{{ signature }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + depends_on_rule: + - rule_id: kingfisher.azurebatch.2 + variable: BATCH_URL + references: + - https://learn.microsoft.com/en-us/azure/batch/batch-account-create-portal + - https://learn.microsoft.com/en-us/rest/api/batchservice/authenticate-requests-to-the-azure-batch-service + + - name: Azure Batch Account Endpoint + id: kingfisher.azurebatch.2 + pattern: | + (?xi) + \b + ( + https://[a-z0-9-]+\.[a-z0-9-]+\.batch\.azure\.com + ) + \b + min_entropy: 1.0 + confidence: medium + visible: false + examples: + - BATCH_URL=https://mybatch.westus.batch.azure.com + - batchAccountUrl="https://contoso-prod.eastus.batch.azure.com" + references: + - https://learn.microsoft.com/en-us/rest/api/batchservice/authenticate-requests-to-the-azure-batch-service diff --git a/crates/kingfisher-rules/data/rules/azurecognitive.yml b/crates/kingfisher-rules/data/rules/azurecognitive.yml new file mode 100644 index 0000000..41812a5 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurecognitive.yml @@ -0,0 +1,46 @@ +rules: + - name: Azure Cognitive Services / AI Services Key + id: kingfisher.azurecognitive.1 + pattern: | + (?x) + \b + (?: + (?i:azure[_\s-]*(?:cognitive|ai)[_\s-]*(?:service|key)) + | + (?i:cognitive[_\s-]*service[_\s-]*(?:key|secret|subscription)) + | + (?i: + (?:azure[_\s-]*)?(?:anomaly[_\s-]*detector|computer[_\s-]*vision|content[_\s-]*moderator|content[_\s-]*safety + |custom[_\s-]*vision|face[_\s-]*(?:api)?|form[_\s-]*recognizer|document[_\s-]*intelligence + |immersive[_\s-]*reader|language[_\s-]*understanding|luis + |personalizer|qna[_\s-]*maker|text[_\s-]*analytics|video[_\s-]*indexer + |metrics[_\s-]*advisor|health[_\s-]*insights|cognitive[_\s-]*service|ai[_\s-]*service) + [_\s-]*(?:key|api[_\s-]*key|subscription[_\s-]*key|secret) + ) + ) + \b + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - AZURE_COGNITIVE_SERVICE_KEY=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d + - AZURE_COMPUTER_VISION_KEY=abcdef0123456789abcdef0123456789 + - content_moderator_key="1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d" + - AZURE_FACE_KEY=abcdef0123456789abcdef0123456789 + - AZURE_FORM_RECOGNIZER_KEY=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d + - AZURE_LUIS_KEY=abcdef0123456789abcdef0123456789 + - AZURE_QNA_MAKER_KEY=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d + - AZURE_TEXT_ANALYTICS_KEY=abcdef0123456789abcdef0123456789 + references: + - https://learn.microsoft.com/en-us/azure/ai-services/ + - https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/ + - https://learn.microsoft.com/en-us/azure/ai-services/content-moderator/ + - https://learn.microsoft.com/en-us/azure/ai-services/luis/ diff --git a/crates/kingfisher-rules/data/rules/azurecommunication.yml b/crates/kingfisher-rules/data/rules/azurecommunication.yml new file mode 100644 index 0000000..ce44bbf --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurecommunication.yml @@ -0,0 +1,20 @@ +rules: + - name: Azure Communication Services Connection String + id: kingfisher.azurecommunication.1 + pattern: | + (?x) + (?i:endpoint=https://(?:[a-z0-9-]+\.)?(?:communication|comm)\.azure\.com/;accesskey=) + ( + [A-Za-z0-9+/]{40,90}={0,2} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_special_chars: 1 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'endpoint=https://myresource.communication.azure.com/;accesskey=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFGHIJKLMNOPQRS+/==' + references: + - https://learn.microsoft.com/en-us/azure/communication-services/ diff --git a/crates/kingfisher-rules/data/rules/azurecosmosdb.yml b/crates/kingfisher-rules/data/rules/azurecosmosdb.yml new file mode 100644 index 0000000..c1cf6e5 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurecosmosdb.yml @@ -0,0 +1,77 @@ +rules: + - name: Azure CosmosDB Account Key + id: kingfisher.azurecosmosdb.1 + pattern: | + (?x) + \b + (?: + (?i:cosmos(?:db)?[_\s-]*(?:key|account[_\s-]*key|primary[_\s-]*key|secondary[_\s-]*key|master[_\s-]*key)) + | + (?i:azure[_\s-]*cosmos(?:db)?[_\s-]*(?:key|account_key|primary_key|master_key)) + | + (?i:documentdb(?:authkey|key)) + ) + (?:.|[\n\r]){0,16}? + ( + [A-Za-z0-9+/]{86}== + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 + min_special_chars: 1 + min_entropy: 4.0 + confidence: medium + categories: [api, key] + examples: + - AZURE_COSMOSDB_KEY=oqb4TdY9T0hphvktd5fJnMiHuQqzVy1jd5sSuOpAbGkaoqTlrHl0BOJN2okcasinVLOJzfDbZo1L+ASt68RAhA== + - 'DocumentDbAuthKey=B/1EVX2Ui47X09tqU3GI/j+Nko9r5COPm0Hea9tfzitF9MQX9lZZiNO3tYQckWnt+rtlGIWS+sCx+AStkq8ZLg==' + references: + - https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-obtain-account-keys + + - name: Azure CosmosDB Connection String + id: kingfisher.azurecosmosdb.2 + pattern: | + (?x) + (?i:AccountEndpoint=(?Phttps://[a-z0-9-]+\.documents\.azure\.com(?::\d+)?)/?;) + AccountKey= + (?P + (?P + [A-Za-z0-9+/]{86}== + ) + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_special_chars: 1 + min_entropy: 4.0 + confidence: high + categories: [api, key] + examples: + - 'AccountEndpoint=https://myaccount.documents.azure.com:443;AccountKey=oqb4TdY9T0hphvktd5fJnMiHuQqzVy1jd5sSuOpAbGkaoqTlrHl0BOJN2okcasinVLOJzfDbZo1L+ASt68RAhA==;' + validation: + type: Http + content: + request: + method: GET + url: "{{ COSMOS_ENDPOINT }}/dbs" + headers: + Accept: application/json + x-ms-date: '{{ REQUEST_RFC1123_DATE | downcase }}' + x-ms-version: "2018-12-31" + Authorization: | + {%- assign x_ms_date = REQUEST_RFC1123_DATE | downcase -%} + {%- assign string_to_sign = "get\ndbs\n\n" | append: x_ms_date | append: "\n\n" -%} + {%- assign signature = string_to_sign | hmac_sha256_b64key: TOKEN | url_encode -%} + type=master&ver=1.0&sig={{ signature }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + words: + - '"Databases"' + references: + - https://learn.microsoft.com/en-us/azure/cosmos-db/how-to-obtain-account-keys + - https://learn.microsoft.com/en-us/rest/api/cosmos-db/access-control-on-cosmosdb-resources diff --git a/crates/kingfisher-rules/data/rules/azureeventgrid.yml b/crates/kingfisher-rules/data/rules/azureeventgrid.yml new file mode 100644 index 0000000..12b1f19 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azureeventgrid.yml @@ -0,0 +1,28 @@ +rules: + - name: Azure Event Grid Key + id: kingfisher.azureeventgrid.1 + pattern: | + (?x) + \b + (?: + (?i:event[_\s-]*grid[_\s-]*(?:key|access[_\s-]*key|topic[_\s-]*key)) + | + (?i:azure_event_grid[_\s-]*(?:key|access_key|topic_key)) + | + (?i:aeg-sas-key) + ) + (?:.|[\n\r]){0,16}? + ( + [A-Za-z0-9+/]{40,50}={0,2} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'AZURE_EVENT_GRID_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFG==' + - 'aeg-sas-key: AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFG==' + references: + - https://learn.microsoft.com/en-us/azure/event-grid/ diff --git a/crates/kingfisher-rules/data/rules/azurefunctionkey.yml b/crates/kingfisher-rules/data/rules/azurefunctionkey.yml new file mode 100644 index 0000000..f0eddaf --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurefunctionkey.yml @@ -0,0 +1,56 @@ +rules: + - name: Azure Function Key in URL + id: kingfisher.azurefunctionkey.1 + pattern: | + (?x) + ( + (?i:https://[a-z0-9-]+\.azurewebsites\.net/api/)[a-zA-Z0-9_-]+ + (?i:\?code=)[a-zA-Z0-9_/+=-]{20,100} + ) + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'https://myfunc.azurewebsites.net/api/HttpTrigger1?code=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890/+==' + validation: + type: Http + content: + request: + method: GET + url: "{{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 202, 204, 400, 404, 405] + - type: StatusMatch + status: [401, 403] + negative: true + references: + - https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-http-webhook-trigger + + - name: Azure Function Master/Host Key + id: kingfisher.azurefunctionkey.2 + pattern: | + (?x) + \b + (?: + (?i:azure[_\s-]*function[_\s-]*(?:key|master[_\s-]*key|host[_\s-]*key)) + | + (?i:x-functions-key) + ) + (?:.|[\n\r]){0,16}? + ( + [a-zA-Z0-9_/+=-]{40,100} + ) + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'AZURE_FUNCTION_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFGH/+==' + - 'x-functions-key: AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFGH' + references: + - https://learn.microsoft.com/en-us/azure/azure-functions/functions-bindings-http-webhook-trigger diff --git a/crates/kingfisher-rules/data/rules/azurelogicapps.yml b/crates/kingfisher-rules/data/rules/azurelogicapps.yml new file mode 100644 index 0000000..380fca8 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurelogicapps.yml @@ -0,0 +1,22 @@ +rules: + - name: Azure Logic Apps SAS URL + id: kingfisher.azurelogicapps.1 + pattern: | + (?x) + ( + (?i:https://(?:[a-z0-9-]+\.)+logic\.azure\.com) + (?::\d+)?/workflows/[A-Fa-f0-9]+/triggers/[a-zA-Z0-9_-]+/paths/invoke + (?i:\?api-version=)[0-9-]+ + (?i:&sp=)[%a-zA-Z0-9/]+ + (?i:&sv=)[0-9.]+ + (?i:&sig=)[a-zA-Z0-9_/+=-]+ + ) + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: high + categories: [api, key] + examples: + - 'https://prod-00.eastus.logic.azure.com/workflows/abcdef1234567890/triggers/manual/paths/invoke?api-version=2016-10-01&sp=%2Ftriggers%2Fmanual%2Frun&sv=1.0&sig=AbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://learn.microsoft.com/en-us/azure/logic-apps/logic-apps-securing-a-logic-app diff --git a/crates/kingfisher-rules/data/rules/azuremaps.yml b/crates/kingfisher-rules/data/rules/azuremaps.yml new file mode 100644 index 0000000..762e41e --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azuremaps.yml @@ -0,0 +1,42 @@ +rules: + - name: Azure Maps Subscription Key + id: kingfisher.azuremaps.1 + pattern: | + (?x) + \b + (?i:azure[_\s-]*maps[_\s-]*(?:key|subscription[_\s-]*key|api[_\s-]*key|secret)) + (?:.|[\n\r]){0,16}? + ( + [a-zA-Z0-9_-]{32,44} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - AZURE_MAPS_KEY=AbCdEfGhIjKlMnOpQrStUvWxYz123456 + validation: + type: Http + content: + request: + method: GET + url: https://atlas.microsoft.com/geocode?api-version=2025-01-01&addressLine=15127%20NE%2024th%20Street%20Redmond%20WA&countryRegion=US&subscription-key={{ TOKEN }} + headers: + Accept: application/geo+json, application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"FeatureCollection"' + - '"features"' + # Revocation not added: Azure Maps shared-key docs cover rotation and + # authentication, but I did not find a token self-revoke API. + references: + - https://learn.microsoft.com/en-us/azure/azure-maps/how-to-manage-authentication + - https://learn.microsoft.com/en-us/rest/api/maps/search/get-geocoding diff --git a/crates/kingfisher-rules/data/rules/azuremixedreality.yml b/crates/kingfisher-rules/data/rules/azuremixedreality.yml new file mode 100644 index 0000000..6694b91 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azuremixedreality.yml @@ -0,0 +1,28 @@ +rules: + - name: Azure Mixed Reality / Spatial Anchors Key + id: kingfisher.azuremixedreality.1 + pattern: | + (?x) + \b + (?: + (?i: + (?:azure[_\s-]*)?(?:mixed[_\s-]*reality|spatial[_\s-]*anchors?|remote[_\s-]*rendering) + [_\s-]*(?:key|account[_\s-]*key|access[_\s-]*key) + ) + ) + (?:.|[\n\r]){0,16}? + ( + [a-f0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'AZURE_SPATIAL_ANCHORS_KEY=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d' + - 'AZURE_REMOTE_RENDERING_KEY=abcdef0123456789abcdef0123456789' + references: + - https://learn.microsoft.com/en-us/azure/spatial-anchors/ diff --git a/crates/kingfisher-rules/data/rules/azuresastoken.yml b/crates/kingfisher-rules/data/rules/azuresastoken.yml new file mode 100644 index 0000000..448849e --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azuresastoken.yml @@ -0,0 +1,50 @@ +rules: + - name: Azure SAS Token + id: kingfisher.azuresastoken.1 + pattern: | + (?x) + ( + (?i:(?:sv|SharedAccessSignature\s+sr))=[0-9]{4}-[0-9]{2}-[0-9]{2} + (?:&(?i:[a-z]{2,4})=[^&\s"']{1,200}){2,10} + (?i:&sig=)[a-zA-Z0-9%+/=]{20,100} + ) + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'sv=2021-06-08&ss=bfqt&srt=sco&sp=rwdlacupiytfx&se=2024-12-31&st=2024-01-01&spr=https&sig=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890%2BABCDE%3D' + references: + - https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview + + - name: Azure SAS Token in URL + id: kingfisher.azuresastoken.2 + pattern: | + (?x) + ( + (?i:https://[a-z0-9-]+\.(?:blob|queue|table|file|dfs)\.core\.windows\.net/)[^\s"']* + \?[^\s"']*(?i:sig=)[a-zA-Z0-9%+/=]{20,100}[^\s"']* + ) + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'https://mystorageaccount.blob.core.windows.net/mycontainer/myblob?sv=2021-06-08&st=2024-01-01&se=2024-12-31&sr=b&sp=r&sig=AbCdEfGhIjKlMnOp%2BQrStUvWxYz%3D' + validation: + type: Http + content: + request: + method: HEAD + url: "{{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 206, 404] + - type: StatusMatch + status: [401, 403] + negative: true + references: + - https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview diff --git a/crates/kingfisher-rules/data/rules/azuresignalr.yml b/crates/kingfisher-rules/data/rules/azuresignalr.yml new file mode 100644 index 0000000..3b952cc --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azuresignalr.yml @@ -0,0 +1,20 @@ +rules: + - name: Azure SignalR Connection String + id: kingfisher.azuresignalr.1 + pattern: | + (?x) + (?i:Endpoint=https://(?:[a-z0-9-]+\.service\.signalr\.net);AccessKey=) + ( + [A-Za-z0-9+/]{40,90}={0,2} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_special_chars: 1 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'Endpoint=https://myservice.service.signalr.net;AccessKey=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFGHIJKLMNOPQRS+/==;Version=1.0;' + references: + - https://learn.microsoft.com/en-us/azure/azure-signalr/ diff --git a/crates/kingfisher-rules/data/rules/azuresql.yml b/crates/kingfisher-rules/data/rules/azuresql.yml new file mode 100644 index 0000000..e4cb7ba --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azuresql.yml @@ -0,0 +1,46 @@ +rules: + - name: Azure SQL Connection String + id: kingfisher.azuresql.1 + pattern: | + (?x) + (?i:Server=tcp:(?:[a-z0-9-]+\.database\.windows\.net),\d+;) + (?:.|[\n\r]){0,100}? + (?i:Password=)( + [^;'"]{8,128} + ) + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'Server=tcp:myserver.database.windows.net,1433;Initial Catalog=mydb;Persist Security Info=False;User ID=admin;Password=MyP@ssw0rd!123;' + references: + - https://learn.microsoft.com/en-us/azure/azure-sql/database/connect-query-content-reference-guide + + - name: Azure SQL Password Assignment + id: kingfisher.azuresql.2 + pattern: | + (?x) + \b + (?: + (?i:azure[_\s-]*sql[_\s-]*password) + | + (?i:sql_admin_password) + | + (?i:mssql_sa_password) + ) + \b + (?:.|[\n\r]){0,8}? + [=:] + \s*["']? + ([^\s"']{8,128}) + ["']? + min_entropy: 3.0 + confidence: medium + categories: [password] + examples: + - 'AZURE_SQL_PASSWORD=MyStr0ngP@ssword!' + - 'SQL_ADMIN_PASSWORD="Compl3x!Pass#2024"' + references: + - https://learn.microsoft.com/en-us/azure/azure-sql/database/connect-query-content-reference-guide diff --git a/crates/kingfisher-rules/data/rules/azurewebpubsub.yml b/crates/kingfisher-rules/data/rules/azurewebpubsub.yml new file mode 100644 index 0000000..713c46c --- /dev/null +++ b/crates/kingfisher-rules/data/rules/azurewebpubsub.yml @@ -0,0 +1,20 @@ +rules: + - name: Azure Web PubSub Connection String + id: kingfisher.azurewebpubsub.1 + pattern: | + (?x) + (?i:Endpoint=https://(?:[a-z0-9-]+\.webpubsub\.azure\.com);AccessKey=) + ( + [A-Za-z0-9+/]{40,90}={0,2} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_special_chars: 1 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'Endpoint=https://myservice.webpubsub.azure.com;AccessKey=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFGHIJKLMNOPQRS+/==;Version=1.0;' + references: + - https://learn.microsoft.com/en-us/azure/azure-web-pubsub/ diff --git a/crates/kingfisher-rules/data/rules/bitfinex.yml b/crates/kingfisher-rules/data/rules/bitfinex.yml index d78769f..b79baed 100644 --- a/crates/kingfisher-rules/data/rules/bitfinex.yml +++ b/crates/kingfisher-rules/data/rules/bitfinex.yml @@ -47,9 +47,30 @@ rules: examples: - "bitfinex\nsecret = 8d7c3965318b8d20f7648dbda96fbfa23f4d1c449aa" - "bitfinex\napi-secret = 1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1d2" + validation: + type: Http + content: + request: + method: POST + url: https://api.bitfinex.com/v2/auth/r/wallets + headers: + Accept: application/json + Content-Type: application/json + bfx-apikey: "{{ BITFINEX_KEY }}" + bfx-nonce: "{{ REQUEST_UNIX_MILLIS }}" + bfx-signature: | + {%- assign request_path = "/v2/auth/r/wallets" -%} + {%- assign request_body = "{}" -%} + {%- assign signature_payload = "/api" | append: request_path | append: REQUEST_UNIX_MILLIS | append: request_body -%} + {{ signature_payload | hmac_sha384_hex: TOKEN }} + body: "{}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + depends_on_rule: + - rule_id: kingfisher.bitfinex.1 + variable: BITFINEX_KEY references: - - https://docs.bitfinex.com/docs - https://docs.bitfinex.com/docs/rest-auth - # No simple validation: Bitfinex REST API v2 uses HMAC-SHA384 - # request signing with a nonce and payload. Cannot validate with - # a static Bearer/API-key style header. diff --git a/crates/kingfisher-rules/data/rules/bitrise.yml b/crates/kingfisher-rules/data/rules/bitrise.yml new file mode 100644 index 0000000..b27ce22 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/bitrise.yml @@ -0,0 +1,35 @@ +rules: + - name: Bitrise Personal Access Token + id: kingfisher.bitrise.1 + pattern: | + (?x) + \b + (?i:bitrise) + (?:.|[\n\r]){0,24}? + (?i:token|pat|personal[_\s-]*access) + (?:.|[\n\r]){0,16}? + ( + [a-zA-Z0-9_-]{60,120} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'BITRISE_TOKEN=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890AbCdEfGhIjKlMnOpQrStUvWxYz12' + validation: + type: Http + content: + request: + method: GET + url: https://api.bitrise.io/v0.1/me + headers: + Authorization: '{{ TOKEN }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://devcenter.bitrise.io/en/api/authenticating-with-the-bitrise-api.html diff --git a/crates/kingfisher-rules/data/rules/blockprotocol.yml b/crates/kingfisher-rules/data/rules/blockprotocol.yml new file mode 100644 index 0000000..0797b1e --- /dev/null +++ b/crates/kingfisher-rules/data/rules/blockprotocol.yml @@ -0,0 +1,19 @@ +rules: + - name: Block Protocol API Key + id: kingfisher.blockprotocol.1 + pattern: | + (?x) + \b + ( + b10ck5\.[a-zA-Z0-9]{28,36}\.[a-zA-Z0-9]{32,40} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'BLOCK_PROTOCOL_API_KEY=b10ck5.AbCdEfGhIjKlMnOpQrStUvWxYz1234.AbCdEfGhIjKlMnOpQrStUvWxYz12345678' + references: + - https://blockprotocol.org/docs/hub/api diff --git a/crates/kingfisher-rules/data/rules/branchio.yml b/crates/kingfisher-rules/data/rules/branchio.yml index 04cf378..7a0a655 100644 --- a/crates/kingfisher-rules/data/rules/branchio.yml +++ b/crates/kingfisher-rules/data/rules/branchio.yml @@ -45,6 +45,20 @@ rules: - 'branch.init("key_test_plqYW3Aq9Xija1cobGMieipndBzO5y7J");' references: - https://help.branch.io/developers-hub/docs/deep-linking-api + - https://help.branch.io/apidocs/app-api + depends_on_rule: + - rule_id: kingfisher.branchio.3 + variable: BRANCH_SECRET + validation: + type: Http + content: + request: + method: GET + url: "https://api2.branch.io/v1/app/{{ TOKEN }}?branch_secret={{ BRANCH_SECRET }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] - name: Branch.io Secret id: kingfisher.branchio.3 diff --git a/crates/kingfisher-rules/data/rules/canva.yml b/crates/kingfisher-rules/data/rules/canva.yml new file mode 100644 index 0000000..e8d84cb --- /dev/null +++ b/crates/kingfisher-rules/data/rules/canva.yml @@ -0,0 +1,20 @@ +rules: + - name: Canva Connect API Client Secret + id: kingfisher.canva.1 + pattern: | + (?x) + \b + ( + cnvca[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'CANVA_CLIENT_SECRET=cnvcaAbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://www.canva.dev/docs/connect/authentication/ + - https://www.canva.dev/docs/connect/guidelines/security/ diff --git a/crates/kingfisher-rules/data/rules/cfxre.yml b/crates/kingfisher-rules/data/rules/cfxre.yml new file mode 100644 index 0000000..f0325d2 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/cfxre.yml @@ -0,0 +1,19 @@ +rules: + - name: Cfx.re FiveM Server Key + id: kingfisher.cfxre.1 + pattern: | + (?x) + \b + ( + cfxk_[a-zA-Z0-9_-]{20,100} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'sv_licenseKey "cfxk_AbCdEfGhIjKlMnOpQrStUvWxYz1234567890_abcdef"' + references: + - https://docs.fivem.net/docs/server-manual/setting-up-a-server/ diff --git a/crates/kingfisher-rules/data/rules/cockroachlabs.yml b/crates/kingfisher-rules/data/rules/cockroachlabs.yml new file mode 100644 index 0000000..a4b8d7e --- /dev/null +++ b/crates/kingfisher-rules/data/rules/cockroachlabs.yml @@ -0,0 +1,51 @@ +rules: + - name: CockroachDB Cloud API Key + id: kingfisher.cockroachlabs.1 + pattern: | + (?x) + \b + (?: + (?i:cockroach(?:db)?(?:cloud)?) + (?:.|[\n\r]){0,24}? + (?i:api[_\s-]*key|secret|token) + | + (?i:CC_API_KEY) + ) + (?:.|[\n\r]){0,16}? + ( + [A-Z0-9_]{20,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 4 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'COCKROACHDB_API_KEY=B81649_8F7D11A_92BCE13_56782D_C53' + validation: + type: Http + content: + request: + method: GET + url: https://cockroachlabs.cloud/api/v1/clusters?show_inactive=true + headers: + Authorization: Bearer {{ TOKEN }} + Accept: application/json + Cc-Version: "2024-09-16" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"clusters"' + - '"pagination"' + # Revocation not added: the public Cloud API docs describe bearer-token + # authentication for service-account secret keys, but not a documented + # self-revocation endpoint for the current secret key value. + references: + - https://www.cockroachlabs.com/docs/cockroachcloud/cloud-api diff --git a/crates/kingfisher-rules/data/rules/databricks.yml b/crates/kingfisher-rules/data/rules/databricks.yml index 2405d5c..4190d22 100644 --- a/crates/kingfisher-rules/data/rules/databricks.yml +++ b/crates/kingfisher-rules/data/rules/databricks.yml @@ -22,6 +22,26 @@ rules: - secret references: - https://docs.databricks.com/dev-tools/api/latest/authentication.html + - https://docs.databricks.com/en/dev-tools/auth/pat.html + validation: + type: Http + content: + request: + headers: + Authorization: Bearer {{ TOKEN }} + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: https://{{ DOMAIN }}/api/2.0/clusters/list + depends_on_rule: + - rule_id: "kingfisher.databricks.3" + variable: DOMAIN + # Revocation not added: Databricks PAT docs describe token creation and + # use, but I did not find a PAT-only self-revoke endpoint suitable for YAML + # revocation here. - name: Databricks API Token id: kingfisher.databricks.2 @@ -51,7 +71,7 @@ rules: type: StatusMatch url: https://{{ DOMAIN }}/api/2.0/clusters/list depends_on_rule: - - rule_id: "kingfisher.databricks.2" + - rule_id: "kingfisher.databricks.3" variable: DOMAIN - name: Databricks Domain @@ -83,4 +103,4 @@ rules: references: - https://docs.databricks.com/workspace/workspace-details.html - https://docs.gcp.databricks.com/workspace/workspace-details.html - - https://docs.microsoft.com/en-us/azure/databricks/scenarios/what-is-azure-databricks \ No newline at end of file + - https://docs.microsoft.com/en-us/azure/databricks/scenarios/what-is-azure-databricks diff --git a/crates/kingfisher-rules/data/rules/docker.yml b/crates/kingfisher-rules/data/rules/docker.yml index 888e717..2ec5055 100644 --- a/crates/kingfisher-rules/data/rules/docker.yml +++ b/crates/kingfisher-rules/data/rules/docker.yml @@ -45,4 +45,42 @@ rules: response_matcher: - report_response: true - type: StatusMatch - status: [200] \ No newline at end of file + status: [200] + + - name: Docker Swarm Join Token + id: kingfisher.docker.2 + pattern: | + (?x) + \b + ( + SWMTKN-1-[a-z0-9]{50,60}-[a-z0-9]{24,30} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'docker swarm join --token SWMTKN-1-3pu6hszjas19xyp7ghgosyx9k8atbfcr8p2is99znpy26u2lkl-1awxwuwd3z9j1z3puu7rcgdbx 192.168.99.100:2377' + references: + - https://docs.docker.com/engine/swarm/join-nodes/ + + - name: Docker Swarm Unlock Key + id: kingfisher.docker.3 + pattern: | + (?x) + \b + ( + SWMKEY-1-[A-Za-z0-9+/]{40,50} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'docker swarm unlock --key SWMKEY-1-AbCdEfGhIjKlMnOpQrStUvWxYz1234567890ABCDEFG' + references: + - https://docs.docker.com/engine/swarm/swarm_manager_locking/ diff --git a/crates/kingfisher-rules/data/rules/docusign.yml b/crates/kingfisher-rules/data/rules/docusign.yml index 207bdc2..56cf8a1 100644 --- a/crates/kingfisher-rules/data/rules/docusign.yml +++ b/crates/kingfisher-rules/data/rules/docusign.yml @@ -25,7 +25,107 @@ rules: examples: - "docusign.secret_key = 7a39ce6d-94cf-4bf6-9e9e-9213373c15f4" - "docusign\nds_secret = 3d2f18c9-2075-4e78-834b-64f57f8757d0" + validation: + type: Http + content: + request: + method: POST + url: "https://{{ DOCUSIGN_AUTH_HOST }}/oauth/token" + headers: + Accept: application/json + Content-Type: application/x-www-form-urlencoded + body: > + grant_type=authorization_code&code=INVALID_AUTH_CODE&client_id={{ DOCUSIGN_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ REDIRECT_URI | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + match_all_words: false + words: + - invalid_grant + - invalid authorization code + - type: WordMatch + words: + - invalid_client + negative: true + depends_on_rule: + - rule_id: kingfisher.docusign.2 + variable: DOCUSIGN_CLIENT_ID + - rule_id: kingfisher.docusign.3 + variable: DOCUSIGN_AUTH_HOST + - rule_id: kingfisher.docusign.4 + variable: REDIRECT_URI + references: + - https://developers.docusign.com/platform/auth/ + - https://developers.docusign.com/platform/build-integration/ + + - name: DocuSign Integration Key + id: kingfisher.docusign.2 + pattern: | + (?xi) + \b + docusign + (?:.|[\n\r]){0,64}? + (?:integration[_-]?key|client[_-]?id|app[_-]?id)\b + (?:.|[\n\r]){0,16}? + [=:"'\s] + ['"]* + ( + [a-f0-9]{8}- + [a-f0-9]{4}- + [a-f0-9]{4}- + [a-f0-9]{4}- + [a-f0-9]{12} + ) + \b + pattern_requirements: + min_digits: 6 + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - DOCUSIGN_CLIENT_ID=7a39ce6d-94cf-4bf6-9e9e-9213373c15f4 + - 'docusign.integration_key = "3d2f18c9-2075-4e78-834b-64f57f8757d0"' references: - https://developers.docusign.com/platform/build-integration/ - # No public validation endpoint: DocuSign OAuth secret keys cannot be - # validated without a full Authorization Code Grant flow. + + - name: DocuSign Auth Host + id: kingfisher.docusign.3 + pattern: | + (?xi) + \b + ( + account(?:-d)?\.docusign\.com + ) + \b + min_entropy: 1.0 + confidence: medium + visible: false + examples: + - account.docusign.com + - account-d.docusign.com + references: + - https://developers.docusign.com/platform/auth/ + + - name: DocuSign Redirect URI + id: kingfisher.docusign.4 + pattern: | + (?xi) + \b + docusign + (?:.|[\n\r]){0,64}? + (?:redirect[_-]?uri|oauth[_-]?redirect)\b + (?:.|[\n\r]){0,16}? + [=:"'\s] + ( + https?://[^\s"'<>]{6,200} + ) + min_entropy: 1.5 + confidence: medium + visible: false + examples: + - DOCUSIGN_REDIRECT_URI=https://example.com/docusign/callback + - 'docusign.redirect_uri = "https://localhost:3000/oauth/docusign"' + references: + - https://developers.docusign.com/platform/auth/ diff --git a/crates/kingfisher-rules/data/rules/dropbox.yml b/crates/kingfisher-rules/data/rules/dropbox.yml index 9dd5aca..051047a 100644 --- a/crates/kingfisher-rules/data/rules/dropbox.yml +++ b/crates/kingfisher-rules/data/rules/dropbox.yml @@ -33,5 +33,40 @@ rules: - '"account_id":' - '"email":' url: https://api.dropboxapi.com/2/users/get_current_account + references: + - https://www.dropbox.com/developers/documentation/http/documentation#auth + + - name: Dropbox Long-Lived API Token + id: kingfisher.dropbox.2 + pattern: | + (?x) + \b + ( + [a-z0-9]{11} + AAAAAAAAAA + [a-z0-9\-_=]{43} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - 'ab3cd5ef7g9AAAAAAAAAAbcdefghij-klmnopqrstuvwxyz01234567890abcdef' + validation: + type: Http + content: + request: + headers: + Authorization: Bearer {{ TOKEN }} + method: POST + response_matcher: + - report_response: true + - match_all_words: true + type: WordMatch + words: + - '"account_id":' + - '"email":' + url: https://api.dropboxapi.com/2/users/get_current_account references: - https://www.dropbox.com/developers/documentation/http/documentation#auth \ No newline at end of file diff --git a/crates/kingfisher-rules/data/rules/dwolla.yml b/crates/kingfisher-rules/data/rules/dwolla.yml index e1509e1..7c66a3b 100644 --- a/crates/kingfisher-rules/data/rules/dwolla.yml +++ b/crates/kingfisher-rules/data/rules/dwolla.yml @@ -47,7 +47,48 @@ rules: examples: - "dwolla secret = 4d1d407752bfd562bZ=9b7c21f8862fdfc57bc1e45\n" - "dwolla\nclient_secret = 7e4e5297691a673cA=0c7d43b997ec1h8dcaf56\n" + validation: + type: Http + content: + request: + method: POST + url: "{{ DWOLLA_API_BASE }}/token" + headers: + Accept: application/json + Content-Type: application/x-www-form-urlencoded + Authorization: "Basic {{ CLIENT_ID | append: ':' | append: TOKEN | b64enc }}" + body: grant_type=client_credentials + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + words: + - '"access_token"' + depends_on_rule: + - rule_id: kingfisher.dwolla.1 + variable: CLIENT_ID + - rule_id: kingfisher.dwolla.3 + variable: DWOLLA_API_BASE references: - - https://developers.dwolla.com/ - # No simple validation: Dwolla OAuth2 requires both client_id and - # client_secret together for the token endpoint. + - https://developers.dwolla.com/docs/api-reference/tokens/create-an-application-access-token + - https://developers.dwolla.com/docs/balance/auth/application-access-tokens + + - name: Dwolla API Base URL + id: kingfisher.dwolla.3 + visible: false + pattern: | + (?xi) + \b + ( + https://api(?:-sandbox)?\.dwolla\.com + ) + \b + min_entropy: 1.0 + confidence: medium + examples: + - DWOLLA_API_BASE=https://api.dwolla.com + - DWOLLA_BASE_URL="https://api-sandbox.dwolla.com" + references: + - https://developers.dwolla.com/docs/api-reference/tokens/create-an-application-access-token diff --git a/crates/kingfisher-rules/data/rules/ebay.yml b/crates/kingfisher-rules/data/rules/ebay.yml new file mode 100644 index 0000000..3e71f77 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/ebay.yml @@ -0,0 +1,58 @@ +rules: + - name: eBay Production Client ID + id: kingfisher.ebay.1 + pattern: | + (?x) + \b + ( + [a-zA-Z0-9_-]+-[a-zA-Z0-9_-]+-PRD-[a-f0-9]{8,12}-[a-f0-9]{8,12} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'EBAY_CLIENT_ID=MyApp-MyApp-PRD-1a2b3c4d-567890ab' + references: + - https://developer.ebay.com/api-docs/static/oauth-credentials.html + + - name: eBay Sandbox Client ID + id: kingfisher.ebay.2 + pattern: | + (?x) + \b + ( + [a-zA-Z0-9_-]+-[a-zA-Z0-9_-]+-SBX-[a-f0-9]{8,12}-[a-f0-9]{8,12} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'EBAY_SANDBOX_CLIENT_ID=MyApp-MyApp-SBX-1a2b3c4d-567890ab' + references: + - https://developer.ebay.com/api-docs/static/oauth-credentials.html + + - name: eBay Client Secret + id: kingfisher.ebay.3 + pattern: | + (?x) + \b + ( + (?:PRD|SBX)-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4,12} + ) + \b + pattern_requirements: + min_digits: 8 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'EBAY_CLIENT_SECRET=PRD-1a2b3c4d-5e6f-7a8b-9c0d-1e2f3a4b' + - 'EBAY_SANDBOX_SECRET=SBX-1a2b3c4d-5e6f-7a8b-9c0d-1e2f3a4b' + references: + - https://developer.ebay.com/api-docs/static/oauth-credentials.html diff --git a/crates/kingfisher-rules/data/rules/elastic.yml b/crates/kingfisher-rules/data/rules/elastic.yml new file mode 100644 index 0000000..e4040d7 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/elastic.yml @@ -0,0 +1,48 @@ +rules: + - name: Elastic Cloud API Key + id: kingfisher.elastic.1 + pattern: | + (?x) + \b + (?: + (?i:elastic(?:[_\s-]*(?:search|cloud))?) + (?:.|[\n\r]){0,24}? + (?i:api[_\s-]*key|apikey) + | + (?i:EC_API_KEY) + ) + (?:.|[\n\r]){0,16}? + ( + [A-Za-z0-9+/=]{40,120} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'ELASTIC_CLOUD_API_KEY=VnVhQ2ZHY0JDZGJrUW0tZTVhT3g6dWkybHAyYXhUTm1zeWFrdzl0dk5udw==' + references: + - https://www.elastic.co/docs/deploy-manage/api-keys/elastic-cloud-api-keys + + - name: Elasticsearch API Key with Prefix + id: kingfisher.elastic.2 + pattern: | + (?x) + \b + (?i:Authorization:\s*ApiKey\s+) + ( + [A-Za-z0-9+/=]{40,120} + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'Authorization: ApiKey VnVhQ2ZHY0JDZGJrUW0tZTVhT3g6dWkybHAyYXhUTm1zeWFrdzl0dk5udw==' + references: + - https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-create-api-key.html diff --git a/crates/kingfisher-rules/data/rules/flutterwave.yml b/crates/kingfisher-rules/data/rules/flutterwave.yml index ae0819d..da74a8c 100644 --- a/crates/kingfisher-rules/data/rules/flutterwave.yml +++ b/crates/kingfisher-rules/data/rules/flutterwave.yml @@ -38,6 +38,27 @@ rules: - FLW_SECRET_KEY=FLWSECK_TEST-a514d8f1abd080db1502a144f22954dc-X - 'Authorization: Bearer FLWSECK_TEST-5b1f0a33de9c41748c2a7e9b51d3c6af-X' - seckey=FLWSECK-e6db11d1f8a6208de8cb2f94e293450e-X + validation: + type: Http + content: + request: + method: POST + url: https://idp.flutterwave.com/realms/flutterwave/protocol/openid-connect/token + headers: + Accept: application/json + Content-Type: application/x-www-form-urlencoded + body: > + client_id={{ CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&grant_type=client_credentials + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + words: + - '"access_token"' + depends_on_rule: + - rule_id: kingfisher.flutterwave.1 + variable: CLIENT_ID references: - https://developer.flutterwave.com/docs/authentication - - https://developer.flutterwave.com/v2.0/reference/api-request-and-response-standards diff --git a/crates/kingfisher-rules/data/rules/ftp.yml b/crates/kingfisher-rules/data/rules/ftp.yml index 7371036..a239e6f 100644 --- a/crates/kingfisher-rules/data/rules/ftp.yml +++ b/crates/kingfisher-rules/data/rules/ftp.yml @@ -4,12 +4,16 @@ rules: pattern: | (?xi) \b - ftps?:// - [^:@\s]{1,64} - : - ([^@\s]{6,128}) - @ - [^\s/"']{4,128} + (?P + (?Pftps?):// + (?P[^:@\s]{1,64}) + : + (?P[^@\s]{6,128}) + @ + (?P[^\s/"':]{4,128}) + (?::(?P\d{2,5}))? + (?:/[^\s"']*)? + ) pattern_requirements: min_digits: 2 min_entropy: 2.5 @@ -17,6 +21,9 @@ rules: examples: - "ftp://johndoe:pg9stqu2018@files.example.edu.cn" - "BACKUP_URL=ftps://backupuser:S5ec4rePassWord@ftp.corp.example.com" + validation: + type: Raw + content: ftp references: - https://datatracker.ietf.org/doc/html/rfc959 - # No public validation endpoint: FTP servers are instance-specific. + - https://datatracker.ietf.org/doc/html/rfc4217 diff --git a/crates/kingfisher-rules/data/rules/gitlab.yml b/crates/kingfisher-rules/data/rules/gitlab.yml index 1f49d88..2ac05bb 100644 --- a/crates/kingfisher-rules/data/rules/gitlab.yml +++ b/crates/kingfisher-rules/data/rules/gitlab.yml @@ -191,3 +191,285 @@ rules: - type: StatusMatch status: [204] url: https://gitlab.com/api/v4/personal_access_tokens/self + + - name: GitLab CI/CD Job Token + id: kingfisher.gitlab.5 + pattern: | + (?x) + \b + ( + glcbt- + [0-9a-zA-Z]{1,5} + _ + [0-9a-zA-Z_-]{20} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glcbt-ab12_xY9kLmNpQrStUvWxYz01 + - 'CI_JOB_TOKEN=glcbt-a1b2c_3dEfGhIjKlMnOpQrStUv' + references: + - https://docs.gitlab.com/ci/jobs/ci_job_token/ + - https://docs.gitlab.com/api/jobs/ + validation: + type: Http + content: + request: + method: GET + url: https://gitlab.com/api/v4/job + headers: + JOB-TOKEN: '{{ TOKEN }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"id"' + - '"status"' + # Revocation not added: CI/CD job tokens are short-lived and automatically + # invalidated when the job finishes. + + - name: GitLab Deploy Token + id: kingfisher.gitlab.6 + pattern: | + (?x) + \b + ( + gldt- + [0-9a-zA-Z_-]{20} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - gldt-xY9kLmNpQrStUvWxYz01 + - 'DEPLOY_TOKEN=gldt-3dEfGhIjK4MnOpQrStUv' + references: + - https://docs.gitlab.com/user/project/deploy_tokens/ + + - name: GitLab Feature Flag Client Token + id: kingfisher.gitlab.7 + pattern: | + (?x) + \b + ( + glffct- + [0-9a-zA-Z_-]{20} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glffct-xY9kLmNpQrStUvWxYz01 + references: + - https://docs.gitlab.com/operations/feature_flags/ + + - name: GitLab Feed Token + id: kingfisher.gitlab.8 + pattern: | + (?x) + \b + ( + glft- + [0-9a-zA-Z_-]{20} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glft-xY9kLmNpQrStUvWxYz01 + references: + - https://docs.gitlab.com/user/profile/contributions_calendar/#feed-token + + - name: GitLab Incoming Mail Token + id: kingfisher.gitlab.9 + pattern: | + (?x) + \b + ( + glimt- + [0-9a-zA-Z_-]{25} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glimt-xY9kLmNpQrStUvWxYz0123456 + references: + - https://docs.gitlab.com/administration/incoming_email/ + + - name: GitLab Kubernetes Agent Token + id: kingfisher.gitlab.10 + pattern: | + (?x) + \b + ( + glagent- + [0-9a-zA-Z_-]{50} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glagent-xY9kLmNpQrStUvWxYz01234567890abcdefghijklmnopqrstu + references: + - https://docs.gitlab.com/user/clusters/agent/ + + - name: GitLab OAuth Application Secret + id: kingfisher.gitlab.11 + pattern: | + (?x) + \b + ( + gloas- + [0-9a-zA-Z_-]{64} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - gloas-xY9kLmNpQrStUvWxYz01234567890abcdefghijklmnopqrstuvwxyz012345678 + references: + - https://docs.gitlab.com/integration/oauth_provider/ + + - name: GitLab Runner Authentication Token + id: kingfisher.gitlab.12 + pattern: | + (?x) + \b + ( + glrt- + [0-9a-zA-Z_-]{20} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glrt-xY9kLmNpQrStUvWxYz01 + - | + gitlab-runner register \ + --url https://gitlab.com \ + --token glrt-3dEfGhIjK4MnOpQrStUv + references: + - https://docs.gitlab.com/runner/register/ + validation: + type: Http + content: + request: + method: POST + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: token={{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: 200 + - type: WordMatch + words: + - '"token is missing"' + - '"403 Forbidden"' + negative: true + url: https://gitlab.com/api/v4/runners/verify + + - name: GitLab Runner Authentication Token - Routable Format + id: kingfisher.gitlab.13 + pattern: | + (?x) + \b + ( + glrt-t + \d + _ + [0-9a-zA-Z_-]{27,300} + \. + [0-9a-z]{2} + [0-9a-z]{7} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 4.0 + confidence: medium + examples: + - glrt-t1_xY9kLmNpQrStUvWxYz01234567890.01abc1234 + references: + - https://docs.gitlab.com/runner/register/ + validation: + type: Http + content: + request: + method: POST + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: token={{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: 200 + - type: WordMatch + words: + - '"token is missing"' + - '"403 Forbidden"' + negative: true + url: https://gitlab.com/api/v4/runners/verify + + - name: GitLab SCIM Token + id: kingfisher.gitlab.14 + pattern: | + (?x) + \b + ( + glsoat- + [0-9a-zA-Z_-]{20} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - glsoat-xY9kLmNpQrStUvWxYz01 + references: + - https://docs.gitlab.com/api/scim/ + + - name: GitLab Session Cookie + id: kingfisher.gitlab.15 + pattern: | + (?x) + (?:^|[;\s]) + _gitlab_session= + ( + [0-9a-f]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + examples: + - '_gitlab_session=a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6' + - 'Cookie: _gitlab_session=0f1e2d3c4b5a69788796a5b4c3d2e1f0' + references: + - https://docs.gitlab.com/ diff --git a/crates/kingfisher-rules/data/rules/google.yml b/crates/kingfisher-rules/data/rules/google.yml index c6c9e66..4cfbde6 100644 --- a/crates/kingfisher-rules/data/rules/google.yml +++ b/crates/kingfisher-rules/data/rules/google.yml @@ -23,6 +23,32 @@ rules: confidence: medium examples: - 'const CLIENTSECRET = "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"' + depends_on_rule: + - rule_id: "kingfisher.google.1" + variable: GOOGLE_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: > + code=invalid_code&client_id={{ GOOGLE_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&grant_type=authorization_code + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Google's OAuth revocation endpoint revokes tokens, + # not client secrets. + references: + - https://developers.google.com/identity/protocols/oauth2/web-server - name: Google OAuth Client Secret id: kingfisher.google.3 @@ -36,6 +62,32 @@ rules: examples: - " //$google_client_secret = 'fnhqAakzWrX-mtFQ4PRdMoy0';" - " 'clientSecret' : 'Ufvuj-d6alhwGKvvLh_8Nq0K'" + depends_on_rule: + - rule_id: "kingfisher.google.1" + variable: GOOGLE_CLIENT_ID + validation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/token + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: > + code=invalid_code&client_id={{ GOOGLE_CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&redirect_uri={{ "https://example.com/oauth/callback" | url_encode }}&grant_type=authorization_code + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + words: + - invalid_client + negative: true + # Revocation not added: Google's OAuth revocation endpoint revokes tokens, + # not client secrets. + references: + - https://developers.google.com/identity/protocols/oauth2/web-server - name: Google OAuth Access Token id: kingfisher.google.4 @@ -61,6 +113,42 @@ rules: - | -- Clear login if it's a new connection. --propertyTable.access_token = 'ya29.Ci_UA7aEsvT6-oVI8f96kvB6i8oO13WgdZUviLaCVtpEPYZqhQcQycR-u2X9xtmYGA' + validation: + type: Http + content: + request: + method: GET + url: https://www.googleapis.com/oauth2/v3/tokeninfo?access_token={{ TOKEN | url_encode }} + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"aud"' + - '"expires_in"' + revocation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/revoke + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: token={{ TOKEN | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://developers.google.com/identity/openid-connect/openid-connect + - https://developers.google.com/data-portability/user-guide/quickstart + - https://developers.google.com/identity/protocols/oauth2/web-server - name: Google OAuth Credentials id: kingfisher.google.6 @@ -118,4 +206,4 @@ rules: match_all_words: true words: - '"models"' - - '"name"' \ No newline at end of file + - '"name"' diff --git a/crates/kingfisher-rules/data/rules/googleoauth2.yml b/crates/kingfisher-rules/data/rules/googleoauth2.yml index ddbd412..be4664b 100644 --- a/crates/kingfisher-rules/data/rules/googleoauth2.yml +++ b/crates/kingfisher-rules/data/rules/googleoauth2.yml @@ -30,5 +30,20 @@ rules: - type: WordMatch words: - '"email":' + revocation: + type: Http + content: + request: + method: POST + url: https://oauth2.googleapis.com/revoke + headers: + Content-Type: application/x-www-form-urlencoded + Accept: application/json + body: token={{ TOKEN | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] references: - - https://developers.google.com/identity/protocols/oauth2 \ No newline at end of file + - https://developers.google.com/identity/protocols/oauth2 + - https://developers.google.com/identity/protocols/oauth2/web-server diff --git a/crates/kingfisher-rules/data/rules/hcaptcha.yml b/crates/kingfisher-rules/data/rules/hcaptcha.yml new file mode 100644 index 0000000..cc1a129 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/hcaptcha.yml @@ -0,0 +1,24 @@ +rules: + - name: hCaptcha Site Verify Secret Key + id: kingfisher.hcaptcha.1 + pattern: | + (?x) + \b + ( + (?: + 0x[a-fA-F0-9]{40} + | + ES_[a-fA-F0-9]{32} + ) + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'HCAPTCHA_SECRET=0x0000000000000000000000000000000000000000' + - 'hcaptcha_secret: ES_abcdef1234567890abcdef1234567890' + references: + - https://docs.hcaptcha.com/ diff --git a/crates/kingfisher-rules/data/rules/highnote.yml b/crates/kingfisher-rules/data/rules/highnote.yml new file mode 100644 index 0000000..c8ddf35 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/highnote.yml @@ -0,0 +1,53 @@ +rules: + - name: Highnote API Key + id: kingfisher.highnote.1 + pattern: | + (?x) + \b + (?i:highnote) + (?:.|[\n\r]){0,24}? + \b + ( + (?:sk|rk)_(?:live|test)_[a-zA-Z0-9]{20,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'HIGHNOTE_API_KEY=sk_live_AbCdEfGhIjKlMnOpQrStUvWxYz1234' + - 'highnote_key: rk_test_AbCdEfGhIjKlMnOpQrStUvWxYz1234' + validation: + type: Http + content: + request: + method: POST + url: > + {%- if TOKEN contains "_test_" -%} + https://api.us.test.highnote.com/graphql + {%- else -%} + https://api.us.highnote.com/graphql + {%- endif -%} + headers: + Authorization: "Basic {{ TOKEN | b64enc }}" + Content-Type: application/json + Accept: application/json + body: '{"query":"query { ping }"}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: true + words: + - '"data"' + - '"ping"' + - '"pong"' + # Revocation not added: the public Highnote docs I found describe API key + # usage and rotation guidance, but not an API endpoint to revoke the + # current key directly. + references: + - https://docs.highnote.com/docs/developers/api/using-the-api diff --git a/crates/kingfisher-rules/data/rules/hop.yml b/crates/kingfisher-rules/data/rules/hop.yml new file mode 100644 index 0000000..06fbd0b --- /dev/null +++ b/crates/kingfisher-rules/data/rules/hop.yml @@ -0,0 +1,38 @@ +rules: + - name: HOP Project Token + id: kingfisher.hop.1 + pattern: | + (?x) + \b + ( + ptk_[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'HOP_TOKEN=ptk_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://docs.hop.io/reference/rest-api + + - name: HOP Personal Access Token + id: kingfisher.hop.2 + pattern: | + (?x) + (?:^|['"\x60\s>=:(,]) + ( + hop_pat_[a-zA-Z0-9_-]{20,80} + ) + (?:$|['"\x60\s<),]) + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'HOP_PAT="hop_pat_AbCdEfGhIjKlMnOpQrStUvWxYz123456"' + references: + - https://docs.hop.io/reference/rest-api diff --git a/crates/kingfisher-rules/data/rules/iterative.yml b/crates/kingfisher-rules/data/rules/iterative.yml new file mode 100644 index 0000000..9d132ae --- /dev/null +++ b/crates/kingfisher-rules/data/rules/iterative.yml @@ -0,0 +1,19 @@ +rules: + - name: Iterative DVC Studio Access Token + id: kingfisher.iterative.1 + pattern: | + (?x) + \b + ( + isat_[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'DVC_STUDIO_TOKEN=isat_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://dvc.org/doc/command-reference/studio/token diff --git a/crates/kingfisher-rules/data/rules/kraken.yml b/crates/kingfisher-rules/data/rules/kraken.yml index bd3fc2e..2f547cd 100644 --- a/crates/kingfisher-rules/data/rules/kraken.yml +++ b/crates/kingfisher-rules/data/rules/kraken.yml @@ -26,6 +26,45 @@ rules: examples: - KRAKEN_API_SECRET=dGhpcy1sb29rcy1saWtlLWEtYmFzZTY0LWtyYWtlbi1zZWNyZXQtdGhhdC1pcy1sb25nLWVub3VnaA== - kraken_secret="Aq1Bq2Cr3Ds4Et5Fu6Gv7Hw8Ix9Jy0Kz1La2Mb3Nc4Od5Pe6Qf7Rg8Sh9Ti0Uj1Vk2Wm3Xn4Yo5Za6Bc7" + validation: + type: Raw + content: kraken + depends_on_rule: + - rule_id: kingfisher.kraken.2 + variable: KRAKEN_API_KEY references: - https://docs.kraken.com/api/docs/guides/spot-rest-auth/ - https://docs.kraken.com/api/docs/rest-api/get-account-balance/ + + - name: Kraken API Key + id: kingfisher.kraken.2 + pattern: | + (?xi) + \b + kraken + (?:.|[\n\r]){0,32}? + (?: + api[_-]?key | + key | + public[_-]?key + ) + (?:.|[\n\r]){0,12}? + ( + [A-Za-z0-9]{16,64} + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 4 + ignore_if_contains: + - your_api_key + - xxxxxx + min_entropy: 3.0 + confidence: medium + visible: false + examples: + - KRAKEN_API_KEY=Ab12Cd34Ef56Gh78Ij90Kl12Mn34Op56 + - 'kraken_key: 5A6b7C8d9E0f1G2h3I4J5K6L7M8N9P0Q' + references: + - https://docs.kraken.com/api/docs/guides/spot-rest-auth/ diff --git a/crates/kingfisher-rules/data/rules/kucoin.yml b/crates/kingfisher-rules/data/rules/kucoin.yml index c2d92ed..dbf142d 100644 --- a/crates/kingfisher-rules/data/rules/kucoin.yml +++ b/crates/kingfisher-rules/data/rules/kucoin.yml @@ -57,6 +57,63 @@ rules: examples: - KUCOIN_API_SECRET=7d70f6c7-42e9-4261-8a8d-8ca2d5028d4f - 'kucoin_secret: a1b2c3d4-e5f6-7890-abcd-ef1234567890' + validation: + type: Http + content: + request: + method: GET + url: https://api.kucoin.com/api/v1/accounts + headers: + Accept: application/json + Content-Type: application/json + KC-API-KEY: "{{ KUCOIN_KEY }}" + KC-API-TIMESTAMP: "{{ REQUEST_UNIX_MILLIS }}" + KC-API-KEY-VERSION: "2" + KC-API-PASSPHRASE: '{%- assign passphrase = KUCOIN_PASSPHRASE | hmac_sha256: TOKEN -%}{{ passphrase }}' + KC-API-SIGN: '{%- assign prehash = REQUEST_UNIX_MILLIS | append: "GET" | append: "/api/v1/accounts" -%}{{ prehash | hmac_sha256: TOKEN }}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"data"' + - '"code":"200000"' + depends_on_rule: + - rule_id: kingfisher.kucoin.1 + variable: KUCOIN_KEY + - rule_id: kingfisher.kucoin.3 + variable: KUCOIN_PASSPHRASE references: - https://www.kucoin.com/docs-new/authentication + - name: KuCoin API Passphrase + id: kingfisher.kucoin.3 + pattern: | + (?xi) + \b + kucoin + (?:.|[\n\r]){0,32}? + (?: + api[_-]?passphrase | + passphrase + ) + (?:.|[\n\r]){0,12}? + ( + [A-Za-z0-9!@\#$%^&*()_+=./:-]{6,64} + ) + \b + pattern_requirements: + ignore_if_contains: + - your_passphrase + - xxxxxx + min_entropy: 2.5 + confidence: medium + visible: false + examples: + - KUCOIN_API_PASSPHRASE=my-strong-passphrase + - 'kucoin_passphrase: S3cur3Passphrase123' + references: + - https://www.kucoin.com/docs-new/authentication diff --git a/crates/kingfisher-rules/data/rules/langfuse.yml b/crates/kingfisher-rules/data/rules/langfuse.yml index 3a010d8..845e6cb 100644 --- a/crates/kingfisher-rules/data/rules/langfuse.yml +++ b/crates/kingfisher-rules/data/rules/langfuse.yml @@ -2,10 +2,10 @@ rules: - name: Langfuse Secret Key id: kingfisher.langfuse.1 pattern: | - (?xi) + (?x) \b ( - sk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} + sk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} ) \b pattern_requirements: @@ -42,10 +42,10 @@ rules: - name: Langfuse Public Key id: kingfisher.langfuse.2 pattern: | - (?xi) + (?x) \b ( - pk-lf-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} + pk-lf-[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12} ) \b pattern_requirements: @@ -57,9 +57,6 @@ rules: examples: - pk-lf-a1b2c3d4-e5f6-7890-abcd-ef1234567890 - 'LANGFUSE_PUBLIC_KEY="pk-lf-9f8e7d6c-5b4a-3210-fedc-ba0987654321"' - negative_examples: - - pk-lf-test - - pk-lf- references: - https://langfuse.com/docs/sdk/typescript - https://langfuse.com/docs/get-started diff --git a/crates/kingfisher-rules/data/rules/ldap.yml b/crates/kingfisher-rules/data/rules/ldap.yml index 4176173..719a099 100644 --- a/crates/kingfisher-rules/data/rules/ldap.yml +++ b/crates/kingfisher-rules/data/rules/ldap.yml @@ -1,4 +1,33 @@ rules: + - name: LDAP Bind URI Credentials + id: kingfisher.ldap.2 + pattern: | + (?xi) + \b + (?P + (?Pldaps?):// + (?P[^:@\s]{1,128}) + : + (?P[^@\s]{6,128}) + @ + (?P[^\s/"':]{4,128}) + (?::(?P\d{2,5}))? + (?:/[^\s"']*)? + ) + pattern_requirements: + min_digits: 1 + min_entropy: 2.5 + confidence: medium + examples: + - ldap://cn=admin,dc=example,dc=com:Tr0ub4dor%263!@ldap.example.com:389 + - ldaps://uid=svc-reader,ou=people,dc=corp,dc=example,dc=com:S3cur3BindPass@directory.corp.example.com + validation: + type: Raw + content: ldap + references: + - https://datatracker.ietf.org/doc/html/rfc4511 + - https://datatracker.ietf.org/doc/html/rfc4516 + - name: LDAP Credentials id: kingfisher.ldap.1 pattern: | @@ -22,6 +51,4 @@ rules: - "ldap_pwd = 'Tr0ub4dor&3!'" - "LDAP_PASSWORD=s3cur3P@ssw0rd\n" references: - - https://tools.ietf.org/html/rfc2251 - # No public validation endpoint: LDAP servers are self-hosted; - # the host, port, and DN are instance-specific. + - https://datatracker.ietf.org/doc/html/rfc4511 diff --git a/crates/kingfisher-rules/data/rules/lichess.yml b/crates/kingfisher-rules/data/rules/lichess.yml new file mode 100644 index 0000000..efe968c --- /dev/null +++ b/crates/kingfisher-rules/data/rules/lichess.yml @@ -0,0 +1,31 @@ +rules: + - name: Lichess Personal Access Token + id: kingfisher.lichess.1 + pattern: | + (?x) + \b + ( + lip_[a-zA-Z0-9_]{16,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'LICHESS_TOKEN=lip_AbCdEfGhIjKlMnOpQr12' + validation: + type: Http + content: + request: + method: GET + url: https://lichess.org/api/account + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://lichess.org/api diff --git a/crates/kingfisher-rules/data/rules/localstack.yml b/crates/kingfisher-rules/data/rules/localstack.yml new file mode 100644 index 0000000..7f610d4 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/localstack.yml @@ -0,0 +1,21 @@ +rules: + - name: LocalStack Simulated AWS Access Key + id: kingfisher.localstack.1 + pattern: | + (?x) + \b + ( + (?:LSIA|LKIA)[A-Z0-9]{16,} + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 4 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'AWS_ACCESS_KEY_ID=LSIAQAAAAAAVNCBMPN59' + - 'aws_access_key=LKIAQAAAAAAVNCBMPN59' + references: + - https://docs.localstack.cloud/aws/capabilities/config/credentials/ diff --git a/crates/kingfisher-rules/data/rules/mailersend.yml b/crates/kingfisher-rules/data/rules/mailersend.yml new file mode 100644 index 0000000..7401624 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/mailersend.yml @@ -0,0 +1,32 @@ +rules: + - name: MailerSend API Token + id: kingfisher.mailersend.1 + pattern: | + (?x) + \b + ( + mlsn\.[a-zA-Z0-9]{30,100} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'MAILERSEND_API_TOKEN=mlsn.AbCdEfGhIjKlMnOpQrStUvWxYz1234567890AbCdEfGhIj' + validation: + type: Http + content: + request: + method: GET + url: https://api.mailersend.com/v1/api-quota + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://www.mailersend.com/help/managing-api-tokens + - https://developers.mailersend.com/ diff --git a/crates/kingfisher-rules/data/rules/onfido.yml b/crates/kingfisher-rules/data/rules/onfido.yml new file mode 100644 index 0000000..6f84055 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/onfido.yml @@ -0,0 +1,32 @@ +rules: + - name: Onfido API Token + id: kingfisher.onfido.1 + pattern: | + (?x) + \b + ( + api_(?:live|sandbox)\.[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'ONFIDO_API_TOKEN=api_live.AbCdEfGhIjKlMnOpQrStUvWxYz123456' + - 'onfido_token: api_sandbox.AbCdEfGhIjKlMnOpQrStUvWxYz123456' + validation: + type: Http + content: + request: + method: GET + url: https://api.eu.onfido.com/v3.6/ping + headers: + Authorization: Token token={{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://documentation.identity.entrust.com/api/latest/ diff --git a/crates/kingfisher-rules/data/rules/openvsx.yml b/crates/kingfisher-rules/data/rules/openvsx.yml new file mode 100644 index 0000000..981ea9c --- /dev/null +++ b/crates/kingfisher-rules/data/rules/openvsx.yml @@ -0,0 +1,19 @@ +rules: + - name: OpenVSX Access Token + id: kingfisher.openvsx.1 + pattern: | + (?x) + \b + ( + (?:ovsxat|ovsxp)_[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'OVSX_PAT=ovsxat_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://github.com/eclipse/openvsx/wiki/Publishing-Extensions diff --git a/crates/kingfisher-rules/data/rules/paddle.yml b/crates/kingfisher-rules/data/rules/paddle.yml new file mode 100644 index 0000000..17851c0 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/paddle.yml @@ -0,0 +1,34 @@ +rules: + - name: Paddle API Key + id: kingfisher.paddle.1 + pattern: | + (?x) + \b + ( + pdl_(?:live|sdbx)_apikey_[a-z0-9_]{30,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 4 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'PADDLE_API_KEY=pdl_live_apikey_01gtgztp8f4kek3yd4g1wrksa3_q6tgtjyvoiz7ldtxt65bx7_aqo' + - 'paddle_key: pdl_sdbx_apikey_01h9fjk2z4qqw8n3m7xr5bc6y1_p3rstuvwxyz' + validation: + type: Http + content: + request: + method: GET + url: https://api.paddle.com/event-types + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://developer.paddle.com/api-reference/about/api-keys + - https://developer.paddle.com/api-reference/about/authentication diff --git a/crates/kingfisher-rules/data/rules/pangea.yml b/crates/kingfisher-rules/data/rules/pangea.yml new file mode 100644 index 0000000..0300170 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/pangea.yml @@ -0,0 +1,34 @@ +rules: + - name: Pangea Service Token + id: kingfisher.pangea.1 + pattern: | + (?x) + \b + ( + pts_[a-z0-9]{20,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'PANGEA_TOKEN=pts_gqmqvvxk4yhirapuhw6bs7nswu' + - 'pangea_token: pts_hpbc3klkkq54tigu4osc5eygthxps6vf' + validation: + type: Http + content: + request: + method: POST + url: https://audit.aws.us.pangea.cloud/v1/log + headers: + Authorization: Bearer {{ TOKEN }} + Content-Type: application/json + body: '{"event":{"message":"test"}}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 401, 403] + references: + - https://pangea.cloud/docs/ diff --git a/crates/kingfisher-rules/data/rules/persona.yml b/crates/kingfisher-rules/data/rules/persona.yml new file mode 100644 index 0000000..db610d9 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/persona.yml @@ -0,0 +1,34 @@ +rules: + - name: Persona API Key + id: kingfisher.persona.1 + pattern: | + (?x) + \b + ( + persona_(?:production|sandbox)_[a-z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 4 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'PERSONA_API_KEY=persona_production_abc123def456ghi789jkl012mno345pqr' + - 'api_key: persona_sandbox_abc123def456ghi789jkl012mno345pqr' + validation: + type: Http + content: + request: + method: GET + url: https://withpersona.com/api/v1/accounts + headers: + Authorization: Bearer {{ TOKEN }} + Persona-Version: "2023-01-05" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://docs.withpersona.com/api-keys diff --git a/crates/kingfisher-rules/data/rules/pinterest.yml b/crates/kingfisher-rules/data/rules/pinterest.yml new file mode 100644 index 0000000..04a815f --- /dev/null +++ b/crates/kingfisher-rules/data/rules/pinterest.yml @@ -0,0 +1,50 @@ +rules: + - name: Pinterest Access Token + id: kingfisher.pinterest.1 + pattern: | + (?x) + \b + ( + pina_[a-zA-Z0-9_-]{20,200} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'PINTEREST_ACCESS_TOKEN=pina_AbCdEfGhIjKlMnOpQrStUvWxYz1234567890' + validation: + type: Http + content: + request: + method: GET + url: https://api.pinterest.com/v5/user_account + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://developers.pinterest.com/docs/api/v5/ + + - name: Pinterest Refresh Token + id: kingfisher.pinterest.2 + pattern: | + (?x) + \b + ( + pinr_[a-zA-Z0-9._-]{20,500} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'PINTEREST_REFRESH_TOKEN=pinr_eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123' + references: + - https://developers.pinterest.com/docs/api/v5/oauth-token/ diff --git a/crates/kingfisher-rules/data/rules/polar.yml b/crates/kingfisher-rules/data/rules/polar.yml new file mode 100644 index 0000000..15b3eee --- /dev/null +++ b/crates/kingfisher-rules/data/rules/polar.yml @@ -0,0 +1,20 @@ +rules: + - name: Polar Personal Access Token + id: kingfisher.polar.1 + pattern: | + (?x) + \b + ( + polar_(?:at|oat|rt)_[a-zA-Z0-9_-]{20,100} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'POLAR_TOKEN=polar_at_AbCdEfGhIjKlMnOpQrStUvWxYz1234' + - 'polar_org_token: polar_oat_AbCdEfGhIjKlMnOpQrStUvWx12' + references: + - https://docs.polar.sh/api/authentication diff --git a/crates/kingfisher-rules/data/rules/posthog.yml b/crates/kingfisher-rules/data/rules/posthog.yml index 904c202..6f65b0e 100644 --- a/crates/kingfisher-rules/data/rules/posthog.yml +++ b/crates/kingfisher-rules/data/rules/posthog.yml @@ -57,6 +57,22 @@ rules: examples: - "pha_XgrXUnvwyoPLmjwHES5lc8scZUtheBpa1QV1qmssutB" - "pha_35kHVLA1E068nvrwUTgabkh8xvGGTpSpsVjGcpVNfis" + validation: + type: Http + content: + request: + method: GET + url: https://app.posthog.com/api/users/@me/ + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: "application/json" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + # Revocation not added: I did not find a documented token self-revoke + # endpoint for OAuth access tokens in the public PostHog API docs. references: - https://posthog.com/docs/api - https://github.com/PostHog/posthog/blob/e408aac5debe02b39a6a67cfd028f16a2ca7bc90/posthog/models/utils.py#L260-L290 diff --git a/crates/kingfisher-rules/data/rules/proof.yml b/crates/kingfisher-rules/data/rules/proof.yml new file mode 100644 index 0000000..6fddb22 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/proof.yml @@ -0,0 +1,49 @@ +rules: + - name: Proof API Key + id: kingfisher.proof.1 + pattern: | + (?x) + \b + ( + prf_(?:(?:cli_)?test_)?[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'PROOF_API_KEY=prf_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + - 'proof_key: prf_test_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + - 'proof_key: prf_cli_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + - 'proof_key: prf_cli_test_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + validation: + type: Http + content: + request: + method: POST + url: > + {%- if TOKEN contains "_test_" -%} + https://api.fairfax.proof.com/v1/transactions + {%- else -%} + https://api.proof.com/v1/transactions + {%- endif -%} + headers: + ApiKey: "{{ TOKEN }}" + Content-Type: application/json + Accept: application/json + body: '{}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [422] + - type: WordMatch + words: + - signer + # Revocation not added: the public Proof docs describe dashboard key + # management and secret-scanning guidance, but not a self-revoke API. + references: + - https://dev.proof.com/docs/api-keys + - https://dev.proof.com/docs/environments + - https://dev.proof.com/reference/createtransaction diff --git a/crates/kingfisher-rules/data/rules/rabbitmq.yml b/crates/kingfisher-rules/data/rules/rabbitmq.yml index 293ec81..9545676 100644 --- a/crates/kingfisher-rules/data/rules/rabbitmq.yml +++ b/crates/kingfisher-rules/data/rules/rabbitmq.yml @@ -3,22 +3,25 @@ rules: id: kingfisher.rabbitmq.1 pattern: | (?xi) - (?: - amqps? + (?P + (?Pamqps?) + :\/\/ + (?P[\S]{3,50}) + : + (?P[\S]{3,50}) + @ + (?P[-.%\w]+) + (?::(?P\d{2,5}))? + (?:\/(?P[-.%\w\/]+))? ) - :\/\/ - [\S]{3,50} - : - ( - [\S]{3,50} - ) - @ - [-.%\w\/:]+ \b min_entropy: 3.5 confidence: medium examples: - amqp://user:password@rabbitmq.example.com/queue - amqps://admin:3eCa3P@192.168.1.10:5671/vhost + validation: + type: Raw + content: rabbitmq references: - https://www.rabbitmq.com/uri-spec.html diff --git a/crates/kingfisher-rules/data/rules/rainforestpay.yml b/crates/kingfisher-rules/data/rules/rainforestpay.yml new file mode 100644 index 0000000..1376c76 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/rainforestpay.yml @@ -0,0 +1,21 @@ +rules: + - name: Rainforest Pay API Key + id: kingfisher.rainforestpay.1 + pattern: | + (?x) + \b + ( + (?:sbx_)?apikey_[a-f0-9]{64} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'RAINFOREST_API_KEY=apikey_1ad1c535b0c0093e7b9bf093d7e3444cd0e2ddefab36199216f555c3efa65d63' + - 'api_key: sbx_apikey_2bd2c646e7e1194f8c9cf194e8f4555de1f3eefbbc472003276666d4efb76e74' + references: + - https://docs.rainforestpay.com/docs/api-keys + - https://docs.rainforestpay.com/reference/authentication diff --git a/crates/kingfisher-rules/data/rules/redis.yml b/crates/kingfisher-rules/data/rules/redis.yml index 3d6d5b8..3c52e5f 100644 --- a/crates/kingfisher-rules/data/rules/redis.yml +++ b/crates/kingfisher-rules/data/rules/redis.yml @@ -5,13 +5,15 @@ rules: # Host supports hostnames, IPv4, and IPv6 in brackets pattern: | (?xi) - (?: redis | rediss | redis\+sentinel ) :// - (?: (?P[a-zA-Z0-9%;._~!$&'()*+,;=-]*) - : - )? - (?P[a-zA-Z0-9%;._~!$&'()*+,;:=/+-]{8,}) - @ (?P(?:\[[0-9a-fA-F:.]+\]|[a-zA-Z0-9_.-]{1,})) (?: :(?P\d{1,5}))? - (?: / (?P\d{1,2}))? + (?P + (?: redis | rediss | redis\+sentinel ) :// + (?: (?P[a-zA-Z0-9%;._~!$&'()*+,;=-]*) + : + )? + (?P[a-zA-Z0-9%;._~!$&'()*+,;:=/+-]{8,}) + @ (?P(?:\[[0-9a-fA-F:.]+\]|[a-zA-Z0-9_.-]{1,})) (?: :(?P\d{1,5}))? + (?: / (?P\d{1,2}))? + ) \b pattern_requirements: @@ -42,6 +44,9 @@ rules: - https://redis.io/docs/latest/develop/clients/redis-py/connect/ - https://redis.io/docs/latest/commands/auth/ - https://github.com/redis/redis-py/blob/master/redis/client.py + validation: + type: Raw + content: redis - id: kingfisher.redis.2 name: Python Redis Client Debug Output diff --git a/crates/kingfisher-rules/data/rules/ringcentral.yml b/crates/kingfisher-rules/data/rules/ringcentral.yml index 170089f..9244dc2 100644 --- a/crates/kingfisher-rules/data/rules/ringcentral.yml +++ b/crates/kingfisher-rules/data/rules/ringcentral.yml @@ -51,5 +51,77 @@ rules: - 'RINGCENTRAL_CLIENT_SECRET="xY9zW8vU7tS6rQ5pO4nM3l"' negative_examples: - 'RINGCENTRAL_URL="https://platform.ringcentral.com"' + validation: + type: Http + content: + request: + method: POST + url: "{{ RINGCENTRAL_BASE_URL }}/restapi/oauth/token" + headers: + Accept: application/json + Content-Type: application/x-www-form-urlencoded + Authorization: "Basic {{ CLIENT_ID | append: ':' | append: TOKEN | b64enc }}" + body: > + grant_type=authorization_code&code=INVALID_AUTH_CODE&redirect_uri={{ REDIRECT_URI | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + match_all_words: false + words: + - invalid_grant + - authentication_error + - type: WordMatch + words: + - invalid_client + negative: true + depends_on_rule: + - rule_id: kingfisher.ringcentral.1 + variable: CLIENT_ID + - rule_id: kingfisher.ringcentral.3 + variable: RINGCENTRAL_BASE_URL + - rule_id: kingfisher.ringcentral.4 + variable: REDIRECT_URI references: - - https://developers.ringcentral.com/api-reference/ + - https://developers.ringcentral.com/guide/authentication/auth-code-flow + + - name: RingCentral OAuth Base URL + id: kingfisher.ringcentral.3 + pattern: | + (?xi) + \b + ( + https://platform(?:\.devtest)?\.ringcentral\.com + ) + \b + min_entropy: 1.0 + confidence: medium + visible: false + examples: + - RINGCENTRAL_BASE_URL=https://platform.ringcentral.com + - RINGCENTRAL_SANDBOX_URL=https://platform.devtest.ringcentral.com + references: + - https://developers.ringcentral.com/guide/authentication/auth-code-flow + + - name: RingCentral Redirect URI + id: kingfisher.ringcentral.4 + pattern: | + (?xi) + \b + ring.?central + (?:.|[\n\r]){0,64}? + (?:redirect[_-]?uri|oauth[_-]?redirect)\b + (?:.|[\n\r]){0,16}? + [=:"'\s] + ( + https?://[^\s"'<>]{6,200} + ) + min_entropy: 1.5 + confidence: medium + visible: false + examples: + - RINGCENTRAL_REDIRECT_URI=https://example.com/ringcentral/callback + - 'ringcentral.redirect_uri = "https://localhost:8080/oauth/ringcentral"' + references: + - https://developers.ringcentral.com/guide/authentication/auth-code-flow diff --git a/crates/kingfisher-rules/data/rules/rootly.yml b/crates/kingfisher-rules/data/rules/rootly.yml new file mode 100644 index 0000000..8fe48ac --- /dev/null +++ b/crates/kingfisher-rules/data/rules/rootly.yml @@ -0,0 +1,31 @@ +rules: + - name: Rootly API Key + id: kingfisher.rootly.1 + pattern: | + (?x) + \b + ( + rootly_[a-f0-9]{64} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'ROOTLY_API_KEY=rootly_abcdef1234567890abcdef1234567890abcdef1234567890abcdef1234567890' + validation: + type: Http + content: + request: + method: GET + url: https://api.rootly.com/v1/users/me + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://rootly.com/api diff --git a/crates/kingfisher-rules/data/rules/runpod.yml b/crates/kingfisher-rules/data/rules/runpod.yml new file mode 100644 index 0000000..f8e913a --- /dev/null +++ b/crates/kingfisher-rules/data/rules/runpod.yml @@ -0,0 +1,36 @@ +rules: + - name: RunPod API Key + id: kingfisher.runpod.1 + pattern: | + (?x) + \b + ( + rpa_[a-zA-Z0-9]{20,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'RUNPOD_API_KEY=rpa_ABC123DEF456GHI789JKL012MNO345PQR678' + validation: + type: Http + content: + request: + method: POST + url: https://api.runpod.io/graphql + headers: + Authorization: Bearer {{ TOKEN }} + Content-Type: application/json + body: '{"query":"{ myself { id } }"}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + match_all_words: true + words: ['"myself"'] + references: + - https://docs.runpod.io/get-started/api-keys diff --git a/crates/kingfisher-rules/data/rules/snowflake.yml b/crates/kingfisher-rules/data/rules/snowflake.yml index 5f266af..7585c8d 100644 --- a/crates/kingfisher-rules/data/rules/snowflake.yml +++ b/crates/kingfisher-rules/data/rules/snowflake.yml @@ -24,3 +24,76 @@ rules: - https://docs.snowflake.com/en/ - https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api # Snowflake credentials are endpoint-specific; no public REST endpoint for standalone validation. + + - name: Snowflake Programmatic Access Token + id: kingfisher.snowflake.2 + pattern: | + (?x) + \b + (?: + (?i:snowflake[_\s-]*(?:programmatic[_\s-]*)?(?:access[_\s-]*)?token) + | + (?i:SF_TOKEN) + ) + \b + (?:.|[\n\r]){0,16}? + [=:] + \s*["']? + ( + [a-zA-Z0-9_-]{100,500} + ) + ["']? + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'SNOWFLAKE_TOKEN=AbCdEfGhIjKlMnOpQrStUvWxYz1234567890AbCdEfGhIjKlMnOpQrStUvWxYz1234567890AbCdEfGhIjKlMnOpQrStUvWxYz12' + validation: + type: Http + content: + request: + method: POST + url: "https://{{ SNOWFLAKE_HOST }}/api/v2/statements" + headers: + Accept: application/json + Content-Type: application/json + Authorization: "Bearer {{ TOKEN }}" + X-Snowflake-Authorization-Token-Type: PROGRAMMATIC_ACCESS_TOKEN + body: '{"statement":"select 1","timeout":5}' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 202] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"statementHandle"' + - '"resultSetMetaData"' + depends_on_rule: + - rule_id: kingfisher.snowflake.3 + variable: SNOWFLAKE_HOST + references: + - https://docs.snowflake.com/en/user-guide/programmatic-access-tokens + - https://docs.snowflake.com/en/developer-guide/sql-api/submitting-requests + + - name: Snowflake Account Host + id: kingfisher.snowflake.3 + pattern: | + (?xi) + \b + ( + [a-z0-9_-]+(?:\.[a-z0-9_-]+)*\.snowflakecomputing\.com + ) + \b + min_entropy: 1.0 + confidence: medium + visible: false + examples: + - account = "xy12345.us-east-1.snowflakecomputing.com" + - host=acme-prod.eu-west-1.aws.snowflakecomputing.com + references: + - https://docs.snowflake.com/en/user-guide/programmatic-access-tokens + - https://docs.snowflake.com/en/developer-guide/sql-api/submitting-requests diff --git a/crates/kingfisher-rules/data/rules/tableau.yml b/crates/kingfisher-rules/data/rules/tableau.yml index d47df68..57127c5 100644 --- a/crates/kingfisher-rules/data/rules/tableau.yml +++ b/crates/kingfisher-rules/data/rules/tableau.yml @@ -13,11 +13,12 @@ rules: X-Tableau-Auth (?:.|[\n\r]){0,16}? ) - ( - [A-Za-z0-9+/]{12,24} + (?: + (?P[A-Za-z0-9+/]{12,24} (?:={1,2})? + ) : - [A-Za-z0-9+/=_-]{24,48} + (?P[A-Za-z0-9+/=_-]{24,48}) ) pattern_requirements: min_digits: 2 @@ -28,7 +29,87 @@ rules: examples: - "tableau_auth = TSC.PersonalAccessTokenAuth('prod_svc', 'WLQKWBs1TnuBx4G7gIzz/w==:yDwZ74EWDPIgU6cSlz8RDJHp7CV2rtFP', 'companysite')" - 'curl -H "X-Tableau-Auth:oJzK8bqwPTnmSl1/E2+aXw==:ZvTsRqFmKpWuLdNhYcBjXiGe" https://tableau.example.com/api/3.17/sites' + validation: + type: Http + content: + request: + method: POST + url: "{{ TABLEAU_SERVER }}/api/3.28/auth/signin" + headers: + Accept: application/json + Content-Type: application/json + body: > + {"credentials":{"personalAccessTokenName":"{{ TABLEAU_PAT_NAME }}","personalAccessTokenSecret":"{{ TOKEN }}","site":{"contentUrl":"{{ TABLEAU_SITE | default: "" }}"}}} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"token"' + - '"site"' + depends_on_rule: + - rule_id: kingfisher.tableau.2 + variable: TABLEAU_SERVER + - rule_id: kingfisher.tableau.3 + variable: TABLEAU_SITE references: - - https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref.htm + - https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm - https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm - # Tableau PATs are instance-specific; no universal public endpoint for standalone validation. + + - name: Tableau Server URL + id: kingfisher.tableau.2 + pattern: | + (?xi) + \b + ( + https://(?: + (?:[a-z0-9-]+\.)?online\.tableau\.com + | + (?:[a-z0-9-]+\.)*tableau(?:\.[a-z0-9-]+)+ + ) + ) + (?: + /api/\d+\.\d+ + )? + (?: + /[^\s"'<>]{0,120} + )? + min_entropy: 1.5 + confidence: medium + visible: false + examples: + - https://tableau.example.com + - https://10ax.online.tableau.com + - server="https://analytics.tableau.example.com" + references: + - https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm + + - name: Tableau Site Content URL + id: kingfisher.tableau.3 + pattern: | + (?xi) + \b + (?: + tableau[_-]?(?:site|content[_-]?url) + | + tableau + (?:.|[\n\r]){0,48}? + (?:site|content[_-]?url) + ) + (?:.|[\n\r]){0,12}? + [=:"'\s] + ( + [A-Za-z0-9._-]{1,64} + ) + \b + min_entropy: 1.0 + confidence: medium + visible: false + examples: + - tableau_site=companysite + - tableau_content_url="default" + references: + - https://help.tableau.com/current/api/rest_api/en-us/REST/rest_api_ref_authentication.htm diff --git a/crates/kingfisher-rules/data/rules/telnyx.yml b/crates/kingfisher-rules/data/rules/telnyx.yml new file mode 100644 index 0000000..79f5dc9 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/telnyx.yml @@ -0,0 +1,31 @@ +rules: + - name: Telnyx API V2 Key + id: kingfisher.telnyx.1 + pattern: | + (?x) + \b + ( + KEY[0-9A-Za-z_-]{55} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'TELNYX_API_KEY=KEYabcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRS' + validation: + type: Http + content: + request: + method: GET + url: https://api.telnyx.com/v2/balance + headers: + Authorization: Bearer {{ TOKEN }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + references: + - https://developers.telnyx.com/development/api-fundamentals/authentication diff --git a/crates/kingfisher-rules/data/rules/thunderstore.yml b/crates/kingfisher-rules/data/rules/thunderstore.yml new file mode 100644 index 0000000..50a8d81 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/thunderstore.yml @@ -0,0 +1,19 @@ +rules: + - name: Thunderstore API Token + id: kingfisher.thunderstore.1 + pattern: | + (?x) + \b + ( + tss_[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + categories: [api, key] + examples: + - 'THUNDERSTORE_TOKEN=tss_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://thunderstore.io/api/docs/ diff --git a/crates/kingfisher-rules/data/rules/trello.yml b/crates/kingfisher-rules/data/rules/trello.yml index fbe8821..1678f28 100644 --- a/crates/kingfisher-rules/data/rules/trello.yml +++ b/crates/kingfisher-rules/data/rules/trello.yml @@ -27,5 +27,59 @@ rules: examples: - TRELLO_TOKEN=0a1b2c3d4e5f6g7h8i9j0k1l2m3n4p5q - trello_access_token="Ab12Cd34Ef56Gh78Ij90Kl12Mn34Op56" + validation: + type: Http + content: + request: + method: GET + url: "https://api.trello.com/1/members/me?key={{ TRELLO_KEY | url_encode }}&token={{ TOKEN | url_encode }}" + headers: + Accept: application/json + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"id"' + - '"username"' + depends_on_rule: + - rule_id: kingfisher.trello.2 + variable: TRELLO_KEY references: - https://developer.atlassian.com/cloud/trello/guides/rest-api/api-introduction/ + - https://developer.atlassian.com/cloud/trello/guides/rest-api/authorization/ + + - name: Trello API Key + id: kingfisher.trello.2 + visible: false + pattern: | + (?xi) + \b + trello + (?:.|[\n\r]){0,32}? + (?: + api[_-]?key | + app[_-]?key | + key + ) + (?:.|[\n\r]){0,12}? + ( + [A-Za-z0-9]{32} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 6 + ignore_if_contains: + - yourkey + - placeholder + min_entropy: 3.1 + confidence: medium + examples: + - TRELLO_KEY=0a1b2c3d4e5f6g7h8i9j0k1l2m3n4p5q + - trello_api_key="Ab12Cd34Ef56Gh78Ij90Kl12Mn34Op56" + references: + - https://developer.atlassian.com/cloud/trello/guides/rest-api/authorization/ diff --git a/crates/kingfisher-rules/data/rules/ubidots.yml b/crates/kingfisher-rules/data/rules/ubidots.yml index b0b1928..ff7a694 100644 --- a/crates/kingfisher-rules/data/rules/ubidots.yml +++ b/crates/kingfisher-rules/data/rules/ubidots.yml @@ -16,7 +16,25 @@ rules: examples: - "API_KEY = \"BBUS-kDvT2Vrm6JThnHZvgzNyO2K7DAHdWs12abc\"" - "UBIDOTS_TOKEN=BBUS-AbCdEfGhIjKlMnOpQrStUvWxYz0123456" + validation: + type: Http + content: + request: + method: GET + url: https://industrial.api.ubidots.com/api/v2.0/devices + headers: + Accept: application/json + X-Auth-Token: "{{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + - type: WordMatch + match_all_words: false + words: + - '"count"' + - '"results"' references: - - https://docs.ubidots.com/v1.6/reference/authentication - # No API validation available: Ubidots API keys generate tokens but cannot - # themselves be validated against a public endpoint. + - https://docs.ubidots.com/reference/authentication + - https://docs.ubidots.com/reference/get-devices diff --git a/crates/kingfisher-rules/data/rules/valtown.yml b/crates/kingfisher-rules/data/rules/valtown.yml new file mode 100644 index 0000000..fdaa9b5 --- /dev/null +++ b/crates/kingfisher-rules/data/rules/valtown.yml @@ -0,0 +1,19 @@ +rules: + - name: Val Town API Token + id: kingfisher.valtown.1 + pattern: | + (?x) + \b + ( + vtwn_[a-zA-Z0-9_-]{20,80} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: high + categories: [api, key] + examples: + - 'VALTOWN_TOKEN=vtwn_AbCdEfGhIjKlMnOpQrStUvWxYz123456' + references: + - https://docs.val.town/api/authentication/ diff --git a/crates/kingfisher-rules/data/rules/volcengine.yml b/crates/kingfisher-rules/data/rules/volcengine.yml new file mode 100644 index 0000000..68e5dfd --- /dev/null +++ b/crates/kingfisher-rules/data/rules/volcengine.yml @@ -0,0 +1,19 @@ +rules: + - name: VolcEngine Access Key ID + id: kingfisher.volcengine.1 + pattern: | + (?x) + \b + ( + AKLT[a-zA-Z0-9_-]{16,60} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.0 + confidence: medium + categories: [api, key] + examples: + - 'VOLCENGINE_ACCESS_KEY=AKLTabcdefghijklmnop1234567890' + references: + - https://www.volcengine.com/docs/6291/65568 diff --git a/crates/kingfisher-rules/data/rules/webex.yml b/crates/kingfisher-rules/data/rules/webex.yml index a66d125..3cf82c4 100644 --- a/crates/kingfisher-rules/data/rules/webex.yml +++ b/crates/kingfisher-rules/data/rules/webex.yml @@ -47,6 +47,57 @@ rules: examples: - "webex.secret = 8ab9b3c77035e1121e2d7d64529749682b3ce5b93dc1f1e6677f0800dcf00d1e" - "webex\nclient_secret=1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b1c2d3e4f5a6b7c8d9e0f1a2b" + validation: + type: Http + content: + request: + method: POST + url: https://webexapis.com/v1/access_token + headers: + Accept: application/json + Content-Type: application/x-www-form-urlencoded + body: > + grant_type=authorization_code&client_id={{ CLIENT_ID | url_encode }}&client_secret={{ TOKEN | url_encode }}&code=INVALID_AUTH_CODE&redirect_uri={{ REDIRECT_URI | url_encode }} + response_matcher: + - report_response: true + - type: StatusMatch + status: [400] + - type: WordMatch + match_all_words: false + words: + - invalid_grant + - Invalid authorization code + - type: WordMatch + words: + - invalid_client + negative: true + depends_on_rule: + - rule_id: kingfisher.webex.1 + variable: CLIENT_ID + - rule_id: kingfisher.webex.3 + variable: REDIRECT_URI references: - - https://developer.webex.com/docs/platform-introduction + - https://developer.webex.com/create/docs/authentication - https://developer.webex.com/docs/integrations + + - name: Webex Redirect URI + id: kingfisher.webex.3 + pattern: | + (?xi) + \b + webex + (?:.|[\n\r]){0,64}? + (?:redirect[_-]?uri|oauth[_-]?redirect)\b + (?:.|[\n\r]){0,16}? + [=:"'\s] + ( + https?://[^\s"'<>]{6,200} + ) + min_entropy: 1.5 + confidence: medium + visible: false + examples: + - WEBEX_REDIRECT_URI=https://example.com/webex/callback + - 'webex.redirect_uri = "https://localhost:3000/oauth/webex"' + references: + - https://developer.webex.com/create/docs/authentication diff --git a/crates/kingfisher-rules/src/liquid_filters.rs b/crates/kingfisher-rules/src/liquid_filters.rs index 77fba6e..cf6b73e 100644 --- a/crates/kingfisher-rules/src/liquid_filters.rs +++ b/crates/kingfisher-rules/src/liquid_filters.rs @@ -12,7 +12,10 @@ use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use rand::{distr::Alphanumeric, RngExt}; use sha1::Sha1; use sha2::{Digest, Sha256, Sha384}; -use time::{format_description::well_known::Iso8601, OffsetDateTime}; +use time::{ + format_description::well_known::{Iso8601, Rfc2822}, + OffsetDateTime, +}; use uuid::Uuid; // ----------------------------------------------------------------------------- @@ -297,6 +300,42 @@ impl Filter for HmacSha384Filter { } } +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "hmac_sha384_hex", + description = "HMAC-SHA384 - returns lowercase hex.", + parameters(Hmac384Args), + parsed(HmacSha384HexFilter) +)] +pub struct HmacSha384Hex; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "hmac_sha384_hex"] +struct HmacSha384HexFilter { + #[parameters] + args: Hmac384Args, +} + +impl Filter for HmacSha384HexFilter { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + use std::fmt::Write as _; + + let args = self.args.evaluate(runtime)?; + let key = args.key.to_kstr(); + + let mut mac = Hmac::::new_from_slice(key.as_bytes()).unwrap(); + mac.update(input.to_kstr().as_bytes()); + + let bytes = mac.finalize().into_bytes(); + let mut hex = String::with_capacity(bytes.len() * 2); + for byte in bytes { + let _ = write!(&mut hex, "{byte:02x}"); + } + + Ok(Value::scalar(hex)) + } +} + // ── random_string ──────────────────────────────── #[derive(Debug, FilterParameters)] struct RandomStringArgs { @@ -903,6 +942,15 @@ static_filter!( } ); +// {{ "" | unix_timestamp_ms }} +static_filter!( + /// Current Unix epoch milliseconds. + UnixTimestampMsFilter, "unix_timestamp_ms", + |_input: &dyn ValueView| -> i64 { + (OffsetDateTime::now_utc().unix_timestamp_nanos() / 1_000_000) as i64 + } +); + // {{ "" | iso_timestamp_no_frac }} static_filter!( /// Current ISO-8601 timestamp (UTC) with no fractional seconds. @@ -933,6 +981,21 @@ static_filter!( } ); +// {{ "" | rfc1123_date }} +static_filter!( + /// Current RFC-1123 timestamp in GMT. + Rfc1123DateFilter, "rfc1123_date", + |_input: &dyn ValueView| -> String { + let rendered = OffsetDateTime::now_utc() + .format(&Rfc2822) + .unwrap_or_else(|_| "Thu, 01 Jan 1970 00:00:00 +0000".into()); + rendered + .strip_suffix(" +0000") + .map(|prefix| format!("{prefix} GMT")) + .unwrap_or(rendered) + } +); + // ----------------------------------------------------------------------------- // Request Uniqueness // ----------------------------------------------------------------------------- @@ -953,8 +1016,10 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { .filter(UrlEncodeFilter::default()) .filter(JsonEscapeFilter::default()) .filter(UnixTimestampFilter::default()) + .filter(UnixTimestampMsFilter::default()) .filter(IsoTimestampFilter::default()) .filter(IsoTimestampNoFracFilter::default()) + .filter(Rfc1123DateFilter::default()) .filter(UuidFilter::default()) .filter(JwtHeaderFilter::default()) .filter(B64EncFilter::default()) @@ -974,6 +1039,7 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { .filter(HmacSha256B64Key::default()) .filter(HmacSha1::default()) .filter(HmacSha384::default()) + .filter(HmacSha384Hex::default()) } #[cfg(test)] @@ -1148,6 +1214,24 @@ mod tests { assert_eq!(render(r#"{{ "payload" | hmac_sha384: "topsecret" }}"#), expect); } + #[test] + fn hmac_sha384_hex_filter() { + use std::fmt::Write as _; + + let key = b"topsecret"; + let data = b"payload"; + let mut mac = Hmac::::new_from_slice(key).unwrap(); + mac.update(data); + + let bytes = mac.finalize().into_bytes(); + let mut expect = String::with_capacity(bytes.len() * 2); + for byte in bytes { + let _ = write!(&mut expect, "{byte:02x}"); + } + + assert_eq!(render(r#"{{ "payload" | hmac_sha384_hex: "topsecret" }}"#), expect); + } + // ------------------------------------------------------------------------- // Random string // ------------------------------------------------------------------------- @@ -1174,6 +1258,13 @@ mod tests { assert!((now - tmpl_val).abs() < 5, "timestamp differs by >5 s"); } + #[test] + fn unix_timestamp_ms_filter_is_nowish() { + let tmpl_val: i64 = render(r#"{{ "" | unix_timestamp_ms }}"#).parse().unwrap(); + let now = (OffsetDateTime::now_utc().unix_timestamp_nanos() / 1_000_000) as i64; + assert!((now - tmpl_val).abs() < 5_000, "timestamp differs by >5 s"); + } + #[test] fn iso_timestamp_filter_parses() { let out = render(r#"{{ "" | iso_timestamp }}"#); @@ -1192,6 +1283,14 @@ mod tests { let v = render(r#"{{ "" | uuid }}"#); assert!(uuid_re.is_match(&v)); } + + #[test] + fn rfc1123_date_filter_format() { + let out = render(r#"{{ "" | rfc1123_date }}"#); + assert!(out.ends_with(" GMT"), "unexpected RFC-1123 date: {out}"); + let normalized = out.replace(" GMT", " +0000"); + assert!(OffsetDateTime::parse(&normalized, &Rfc2822).is_ok()); + } // ------------------------------------------------------------------------- // Replace filter // ------------------------------------------------------------------------- diff --git a/crates/kingfisher-rules/src/rules_database.rs b/crates/kingfisher-rules/src/rules_database.rs index 3a28fac..5ba76ea 100644 --- a/crates/kingfisher-rules/src/rules_database.rs +++ b/crates/kingfisher-rules/src/rules_database.rs @@ -60,21 +60,7 @@ impl RulesDatabase { let mut reason_codes: Vec<&'static str> = Vec::new(); - let has_self_identifying_prefix = [ - "ccipat_", - "xoxb-", - "xoxa-", - "xoxp-", - "xapp-", - "ghp_", - "github_pat_", - "sk_live_", - "sk_test_", - "ltai", - "akia", - ] - .iter() - .any(|m| normalized.contains(m)); + let has_self_identifying_prefix = has_self_identifying_shape(&normalized); if has_self_identifying_prefix { reason_codes.push("self_identifying_prefix"); return RuleMatchProfile { @@ -307,6 +293,33 @@ impl RulesDatabase { } } +fn has_self_identifying_shape(normalized_pattern: &str) -> bool { + let literal_markers = [ + "ccipat_", + "xapp-", + "ghp_", + "github_pat_", + "sk_live_", + "sk_test_", + "ltai", + "akia", + "aizasy", + "pypi-ageichlwas5vcmc", + "https://hooks\\.slack\\.com/services/", + ]; + + literal_markers.iter().any(|needle| normalized_pattern.contains(needle)) + || normalized_pattern.contains("xox[pbarose]") + || normalized_pattern.contains("xoxe\\.xox[bparose]-") + || normalized_pattern.contains("xoxe-\\d-") + || (normalized_pattern.contains("-----begin\\s") + && normalized_pattern.contains("private\\skey") + && normalized_pattern.contains("-----end\\s")) + || (normalized_pattern.contains("-----begin\\ ") + && normalized_pattern.contains("private\\ key") + && normalized_pattern.contains("-----end\\ ")) +} + fn has_generic_token_class(normalized_pattern: &str) -> bool { [ "[a-za-z0-9]{", @@ -436,6 +449,57 @@ mod test_rule_match_profiles { assert!(profile.reason_codes.contains(&"self_identifying_prefix")); } + #[test] + fn classifies_google_api_key_rule_as_self_identifying() { + let rule = mk_rule("kingfisher.google.7", r"(?xi)\b(AIzaSy[A-Za-z0-9_-]{33})"); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); + } + + #[test] + fn classifies_slack_token_charclass_rule_as_self_identifying() { + let rule = mk_rule( + "kingfisher.slack.2", + r"(?xi)\b(xox[pbarose][-0-9]{0,3}-[0-9a-z]{6,15}-[0-9a-z]{6,15}-[-0-9a-z]{6,66})\b", + ); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); + } + + #[test] + fn classifies_slack_webhook_rule_as_self_identifying() { + let rule = mk_rule( + "kingfisher.slack.4", + r"(?xi)\b(https://hooks\.slack\.com/services/T[a-z0-9_-]{8,12}/B[a-z0-9_-]{8,12}/[a-z0-9_-]{20,30})", + ); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); + } + + #[test] + fn classifies_pypi_token_rule_as_self_identifying() { + let rule = mk_rule("kingfisher.pypi.1", r"(?x)(pypi-AgEIcHlwaS5vcmc[A-Za-z0-9_-]{50,})\b"); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); + } + + #[test] + fn classifies_private_key_envelope_rules_as_self_identifying() { + let rule = mk_rule( + "kingfisher.privkey.2", + r"(?xims)(-----BEGIN\s(?:RSA|PGP|DSA|OPENSSH|ENCRYPTED|EC)?\s{0,1}PRIVATE\sKEY-----[a-z0-9 /+=\r\n\\n]{32,}?-----END\s(?:RSA|PGP|DSA|OPENSSH|ENCRYPTED|EC)?\s{0,1}PRIVATE\sKEY-----)", + ); + let profile = RulesDatabase::classify_rule_profile(&rule); + assert_eq!(profile.kind, RuleDetectionProfileKind::SelfIdentifying); + + let pem_rule = mk_rule( + "kingfisher.pem.1", + r#"(?x)-----BEGIN\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----\s*((?:[a-zA-Z0-9+/=\s"',]|\\r|\\n){50,})\s*-----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}-----"#, + ); + let pem_profile = RulesDatabase::classify_rule_profile(&pem_rule); + assert_eq!(pem_profile.kind, RuleDetectionProfileKind::SelfIdentifying); + } + #[test] fn classifies_context_dependent_generic_rule() { let rule = mk_rule( diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml index 09521ca..743b0d1 100644 --- a/crates/kingfisher-scanner/Cargo.toml +++ b/crates/kingfisher-scanner/Cargo.toml @@ -24,6 +24,22 @@ validation-http = [ "dep:liquid-core", "dep:quick-xml", "dep:sha1", + "dep:time", +] + +# Provider/protocol-specific validation flows that need custom network logic. +validation-raw = [ + "validation-http", + "dep:chrono", + "dep:hmac", + "dep:sha2", + "dep:hex", + "dep:url", + "dep:percent-encoding", + "dep:rustls", + "dep:rustls-native-certs", + "dep:tokio-rustls", + "dep:ldap3", ] # AWS credential validation @@ -94,6 +110,7 @@ validation-database = [ # All validation features validation-all = [ "validation", + "validation-raw", "validation-aws", "validation-azure", "validation-coinbase", @@ -153,11 +170,12 @@ tracing.workspace = true reqwest = { version = "0.12", default-features = false, features = [ "json", "gzip", "brotli", "deflate", "stream", "rustls-tls", "rustls-tls-native-roots", "multipart" ], optional = true } -tokio = { version = "1.48", features = ["net", "time", "sync"], optional = true } +tokio = { version = "1.51", features = ["net", "time", "sync", "io-util"], optional = true } liquid = { version = "0.26", optional = true } liquid-core = { version = "0.26", optional = true } quick-xml = { version = "0.39", features = ["serde", "serialize"], optional = true } sha1 = { workspace = true, optional = true } +time = { workspace = true, optional = true } chrono = { version = "0.4.42", optional = true } hmac = { workspace = true, optional = true } sha2 = { workspace = true, optional = true } @@ -165,7 +183,7 @@ pem = { version = "3.0.6", optional = true } percent-encoding = { workspace = true, optional = true } ring = { version = "0.17", optional = true } -jsonwebtoken = { version = "10.2.0", features = ["aws-lc-rs"], optional = true } +jsonwebtoken = { version = "10.3.0", features = ["aws-lc-rs"], optional = true } p256 = { version = "0.13.2", optional = true } ed25519-dalek = { version = "2.2", features = ["pkcs8"], optional = true } hex = { workspace = true, optional = true } @@ -176,7 +194,7 @@ tokio-postgres = { version = "0.7", default-features = false, features = ["runti tokio-postgres-rustls = { version = "0.13.0", optional = true } rustls = { version = "0.23.35", optional = true } rustls-native-certs = { version = "0.8.2", optional = true } - +tokio-rustls = { version = "0.26.4", optional = true } # AWS validation aws-config = { version = "1.8.14", default-features = false, features = ["default-https-client", "rt-tokio"], optional = true } aws-credential-types = { version = "1.2.12", optional = true } @@ -190,7 +208,15 @@ base32 = { version = "0.5", optional = true } byteorder = { version = "1.5", optional = true } rand = { version = "0.10", optional = true } +[target.'cfg(all(windows, target_arch = "aarch64"))'.dependencies] +# ldap3's rustls backend still pulls ring 0.16, which fails to build on Windows ARM64. +# Use the platform TLS backend there to keep the raw LDAP validator available. +ldap3 = { version = "0.11.5", default-features = false, features = ["tls-native"], optional = true } + +[target.'cfg(not(all(windows, target_arch = "aarch64")))'.dependencies] +ldap3 = { version = "0.11.5", default-features = false, features = ["tls-rustls"], optional = true } + [dev-dependencies] pretty_assertions = "1.4" tempfile = "3.23" -tokio = { version = "1.48", features = ["macros", "rt"] } +tokio = { version = "1.51", features = ["macros", "rt"] } diff --git a/crates/kingfisher-scanner/src/scanner.rs b/crates/kingfisher-scanner/src/scanner.rs index dc8ef6c..cca0ecb 100644 --- a/crates/kingfisher-scanner/src/scanner.rs +++ b/crates/kingfisher-scanner/src/scanner.rs @@ -14,6 +14,8 @@ use crate::finding::{Finding, FindingLocation}; use crate::primitives; use crate::scanner_pool::ScannerPool; +const RAW_MATCH_LOOKBACK: usize = 64 * 1024; + /// Configuration options for the scanner. #[derive(Debug, Clone)] pub struct ScannerConfig { @@ -26,7 +28,7 @@ pub struct ScannerConfig { /// Override the minimum entropy threshold for all rules. pub min_entropy_override: Option, - /// Language hint for tree-sitter parsing (e.g., "python", "javascript"). + /// Language hint for parser-based context verification (e.g., "python", "javascript"). pub language_hint: Option, /// Whether to redact secrets in findings. @@ -167,9 +169,14 @@ impl Scanner { // Process matches through regex let mut findings = Vec::new(); let mut seen_matches: FxHashSet = FxHashSet::default(); - let mut previous_spans: FxHashMap> = FxHashMap::default(); + let mut seen_raw_match_ends: FxHashSet<(usize, usize)> = FxHashSet::default(); + let mut previous_full_spans: FxHashMap> = FxHashMap::default(); for (rule_id, start, end) in raw_matches.into_iter().rev() { + let _ = start; // Block-mode Vectorscan reports `from` as 0 unless SOM is enabled. + if !seen_raw_match_ends.insert((rule_id, end)) { + continue; + } let rule = match self.rules_db.get_rule(rule_id) { Some(r) => r, None => continue, @@ -180,16 +187,18 @@ impl Scanner { Err(_) => continue, }; - let current_span = OffsetSpan::from_range(start..end); - - // Check for overlapping spans - if !primitives::record_match(&mut previous_spans, rule_id, current_span) { - continue; - } - - let haystack = &bytes[start..end]; + let scan_start = end.saturating_sub(RAW_MATCH_LOOKBACK); + let haystack = &bytes[scan_start..end]; for captures in anchored_regex.captures_iter(haystack) { + let full_capture = captures.get(0).unwrap(); + let full_capture_span = OffsetSpan::from_range( + (scan_start + full_capture.start())..(scan_start + full_capture.end()), + ); + if !primitives::record_match(&mut previous_full_spans, rule_id, full_capture_span) { + continue; + } + // Get the primary secret value let secret_capture = primitives::find_secret_capture(&anchored_regex, &captures); let secret_bytes = secret_capture.as_bytes(); @@ -203,20 +212,20 @@ impl Scanner { } // Compute match key for dedup + let offset_start = scan_start + secret_capture.start(); + let offset_end = scan_start + secret_capture.end(); let match_key = primitives::compute_match_key( secret_bytes, rule.id().as_bytes(), - start + secret_capture.start(), - start + secret_capture.end(), + offset_start, + offset_end, ); if !seen_matches.insert(match_key) { continue; } // Build the finding - let offset_span = OffsetSpan::from_range( - (start + secret_capture.start())..(start + secret_capture.end()), - ); + let offset_span = OffsetSpan::from_range(offset_start..offset_end); let source_span = loc_mapping.get_source_span(&offset_span); let secret = if self.config.redact_secrets { diff --git a/crates/kingfisher-scanner/src/validation/http_validation.rs b/crates/kingfisher-scanner/src/validation/http_validation.rs index 2dc70de..255edd3 100644 --- a/crates/kingfisher-scanner/src/validation/http_validation.rs +++ b/crates/kingfisher-scanner/src/validation/http_validation.rs @@ -3,6 +3,7 @@ use std::{collections::BTreeMap, future::Future, net::IpAddr, str::FromStr, time use anyhow::{anyhow, Error, Result}; use http::StatusCode; use liquid::Object; +use liquid_core::Value; use quick_xml::de::from_str as xml_from_str; use reqwest::{ header, @@ -11,6 +12,7 @@ use reqwest::{ }; use serde::de::IgnoredAny; use sha1::{Digest, Sha1}; +use time::{format_description::well_known::Rfc2822, OffsetDateTime}; use tokio::{net::lookup_host, time::sleep}; use tracing::debug; @@ -34,12 +36,11 @@ use kingfisher_rules::ResponseMatcher; /// Build a deterministic cache key from the immutable parts of an HTTP request. pub fn generate_http_cache_key_parts( method: &str, - url: &Url, + url: &str, headers: &BTreeMap, body: Option<&str>, ) -> String { let method = method.to_uppercase(); - let url = url.as_str(); let mut hasher = Sha1::new(); hasher.update(method.as_bytes()); @@ -68,6 +69,52 @@ pub fn parse_http_method(method_str: &str) -> Result { Method::from_str(method_str).map_err(|_| format!("Invalid HTTP method: {}", method_str)) } +fn format_rfc1123(now: OffsetDateTime) -> String { + let rendered = + now.format(&Rfc2822).unwrap_or_else(|_| "Thu, 01 Jan 1970 00:00:00 +0000".to_string()); + rendered.strip_suffix(" +0000").map(|prefix| format!("{prefix} GMT")).unwrap_or(rendered) +} + +pub fn is_auto_provided_request_var(var: &str) -> bool { + matches!(var, "REQUEST_RFC1123_DATE" | "REQUEST_UNIX_MILLIS") +} + +/// Clone `globals` and add stable request-scoped values for templated request rendering. +/// +/// These values are computed once so the same generated timestamp can be reused across the URL, +/// headers, body, and multipart parts of a single request. +pub fn with_request_template_globals(globals: &Object) -> Object { + let mut out = globals.clone(); + let now = OffsetDateTime::now_utc(); + + if !out.contains_key("REQUEST_RFC1123_DATE") { + out.insert("REQUEST_RFC1123_DATE".into(), Value::scalar(format_rfc1123(now))); + } + if !out.contains_key("REQUEST_UNIX_MILLIS") { + out.insert( + "REQUEST_UNIX_MILLIS".into(), + Value::scalar((now.unix_timestamp_nanos() / 1_000_000).to_string()), + ); + } + + out +} + +/// Clone `globals` and add stable placeholder values for request-scoped template vars that +/// would otherwise make HTTP validation cache keys vary per execution. +pub fn with_cache_key_template_globals(globals: &Object) -> Object { + let mut out = globals.clone(); + + if !out.contains_key("REQUEST_RFC1123_DATE") { + out.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("REQUEST_RFC1123_DATE")); + } + if !out.contains_key("REQUEST_UNIX_MILLIS") { + out.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("REQUEST_UNIX_MILLIS")); + } + + out +} + /// Build a reqwest RequestBuilder using the provided parameters. pub fn build_request_builder( client: &Client, @@ -566,7 +613,57 @@ pub async fn check_url_resolvable_safe(url: &Url) -> Result<(), Box 0); + } + + #[test] + fn request_template_globals_preserve_explicit_overrides() { + let mut globals = Object::new(); + globals.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("custom-date")); + globals.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("123")); + + let rendered = with_request_template_globals(&globals); + + assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "custom-date"); + assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "123"); + } + + #[test] + fn cache_key_template_globals_use_stable_placeholders() { + let globals = Object::new(); + let rendered = with_cache_key_template_globals(&globals); + + assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "REQUEST_RFC1123_DATE"); + assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "REQUEST_UNIX_MILLIS"); + } + + #[test] + fn cache_key_template_globals_preserve_explicit_overrides() { + let mut globals = Object::new(); + globals.insert("REQUEST_RFC1123_DATE".into(), Value::scalar("custom-date")); + globals.insert("REQUEST_UNIX_MILLIS".into(), Value::scalar("123")); + + let rendered = with_cache_key_template_globals(&globals); + + assert_eq!(rendered.get("REQUEST_RFC1123_DATE").unwrap().to_kstr(), "custom-date"); + assert_eq!(rendered.get("REQUEST_UNIX_MILLIS").unwrap().to_kstr(), "123"); + } #[test] fn rejects_ipv4_loopback() { diff --git a/crates/kingfisher-scanner/src/validation/mod.rs b/crates/kingfisher-scanner/src/validation/mod.rs index 443f821..b242e90 100644 --- a/crates/kingfisher-scanner/src/validation/mod.rs +++ b/crates/kingfisher-scanner/src/validation/mod.rs @@ -20,6 +20,8 @@ //! - **Azure**: Azure Storage credential validation (requires `validation-azure` feature) //! - **Databases**: MongoDB, MySQL, Postgres, JDBC (requires `validation-database` feature) //! - **JWT**: JWT token validation (requires `validation-jwt` feature) +//! - **Raw**: provider/protocol-specific validators that need custom logic +//! (requires `validation-raw` feature) mod utils; mod validation_body; @@ -54,6 +56,9 @@ pub mod mysql; #[cfg(feature = "validation-database")] pub mod postgres; +#[cfg(feature = "validation-raw")] +pub mod raw; + // Re-exports pub use utils::{find_closest_variable, process_captures}; pub use validation_body::{as_str, clone_as_string, from_string, ValidationResponseBody}; @@ -62,9 +67,12 @@ pub use validation_body::{as_str, clone_as_string, from_string, ValidationRespon pub use http_validation::{ build_request_builder, check_url_resolvable, generate_http_cache_key_parts, is_ssrf_safe_ip, parse_http_method, process_headers, retry_multipart_request, retry_request, validate_response, - SsrfBlockedError, + with_request_template_globals, SsrfBlockedError, }; +#[cfg(feature = "validation-raw")] +pub use raw::{required_vars as raw_required_vars, validate_raw, RawValidationOutcome}; + #[cfg(feature = "validation-http")] #[allow(deprecated)] pub use http_validation::check_url_resolvable_safe; diff --git a/crates/kingfisher-scanner/src/validation/raw.rs b/crates/kingfisher-scanner/src/validation/raw.rs new file mode 100644 index 0000000..3436272 --- /dev/null +++ b/crates/kingfisher-scanner/src/validation/raw.rs @@ -0,0 +1,639 @@ +//! Provider-specific raw validators for secret formats that need custom protocol logic. + +use std::{ + collections::BTreeSet, + sync::{Arc, OnceLock}, + time::{Duration, SystemTime, UNIX_EPOCH}, +}; + +use anyhow::{anyhow, Context, Result}; +use base64::{engine::general_purpose::STANDARD as B64, Engine}; +use hmac::{digest::KeyInit, Hmac, Mac}; +use http::StatusCode; +use ldap3::LdapConnSettings; +use liquid::Object; +use liquid_core::ValueView; +use once_cell::sync::OnceCell; +use percent_encoding::percent_decode_str; +use reqwest::Client; +use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier}; +use rustls::crypto::{ring, verify_tls12_signature, verify_tls13_signature, CryptoProvider}; +use rustls::pki_types::{CertificateDer, ServerName, UnixTime}; +use rustls::{ClientConfig, DigitallySignedStruct, RootCertStore, SignatureScheme}; +use sha2::{Digest, Sha256, Sha512}; +use tokio::{ + io::{AsyncBufReadExt, AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, BufStream}, + net::TcpStream, + time::timeout, +}; +use tokio_rustls::TlsConnector; +use url::Url; + +use crate::validation::http_validation::check_url_resolvable; + +pub struct RawValidationOutcome { + pub valid: bool, + pub status: StatusCode, + pub body: String, +} + +static INIT_PROVIDER: OnceCell<()> = OnceCell::new(); +static LAX_PROVIDER: OnceLock> = OnceLock::new(); + +fn ensure_crypto_provider() { + INIT_PROVIDER.get_or_init(|| { + let _ = CryptoProvider::install_default(ring::default_provider()); + }); +} + +#[derive(Debug)] +struct LaxCertVerifier(Arc); + +impl ServerCertVerifier for LaxCertVerifier { + fn verify_server_cert( + &self, + _end_entity: &CertificateDer<'_>, + _intermediates: &[CertificateDer<'_>], + _server_name: &ServerName<'_>, + _ocsp_response: &[u8], + _now: UnixTime, + ) -> std::result::Result { + Ok(ServerCertVerified::assertion()) + } + + fn verify_tls12_signature( + &self, + message: &[u8], + cert: &CertificateDer<'_>, + dss: &DigitallySignedStruct, + ) -> std::result::Result { + verify_tls12_signature(message, cert, dss, &self.0.signature_verification_algorithms) + } + + fn verify_tls13_signature( + &self, + message: &[u8], + cert: &CertificateDer<'_>, + dss: &DigitallySignedStruct, + ) -> std::result::Result { + verify_tls13_signature(message, cert, dss, &self.0.signature_verification_algorithms) + } + + fn supported_verify_schemes(&self) -> Vec { + self.0.signature_verification_algorithms.supported_schemes() + } +} + +pub fn required_vars(kind: &str) -> BTreeSet { + let mut vars = BTreeSet::new(); + vars.insert("TOKEN".to_string()); + + match kind { + "azurebatch" => { + vars.insert("BATCH_URL".to_string()); + } + "kraken" => { + vars.insert("KRAKEN_API_KEY".to_string()); + } + _ => {} + } + + vars +} + +pub async fn validate_raw( + kind: &str, + globals: &Object, + client: &Client, + use_lax_tls: bool, + allow_internal_ips: bool, +) -> Result { + if let Some(url) = raw_validation_target_url(kind, globals)? { + if let Err(e) = check_url_resolvable(&url, allow_internal_ips).await { + return Ok(RawValidationOutcome { + valid: false, + status: StatusCode::PRECONDITION_REQUIRED, + body: format!( + "Validation skipped - raw validation target blocked or not resolvable: {e}" + ), + }); + } + } + + match kind { + "azurebatch" => validate_azure_batch(globals, client).await, + "ftp" => validate_ftp(globals, use_lax_tls).await, + "kraken" => validate_kraken(globals, client).await, + "ldap" => validate_ldap(globals, use_lax_tls).await, + "rabbitmq" => validate_rabbitmq(globals, use_lax_tls).await, + "redis" => validate_redis(globals, use_lax_tls).await, + other => Ok(RawValidationOutcome { + valid: false, + status: StatusCode::NOT_IMPLEMENTED, + body: format!("Raw validator `{other}` is not implemented."), + }), + } +} + +fn raw_validation_target_url(kind: &str, globals: &Object) -> Result> { + match kind { + "azurebatch" => string_var(globals, "BATCH_URL") + .map(|s| Url::parse(&s).context("invalid BATCH_URL")) + .transpose(), + "ftp" | "ldap" | "rabbitmq" | "redis" => string_var(globals, "TOKEN") + .map(|s| Url::parse(&s).context("invalid raw validation URI")) + .transpose(), + _ => Ok(None), + } +} + +fn string_var(globals: &Object, name: &str) -> Option { + globals.get(name).map(|v| v.to_kstr().to_string()).filter(|s| !s.is_empty()) +} + +fn decode_userinfo(input: &str) -> String { + percent_decode_str(input).decode_utf8_lossy().to_string() +} + +fn current_unix_millis() -> String { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| Duration::from_millis(0)) + .as_millis() + .to_string() +} + +fn rfc1123_now() -> String { + chrono::Utc::now().format("%a, %d %b %Y %H:%M:%S GMT").to_string() +} + +fn build_root_store() -> Result { + let mut roots = RootCertStore::empty(); + let native = rustls_native_certs::load_native_certs(); + for cert in native.certs { + roots.add(cert).map_err(|e| anyhow!("failed to add native root cert: {e:?}"))?; + } + Ok(roots) +} + +fn lax_provider() -> Arc { + LAX_PROVIDER.get_or_init(|| Arc::new(ring::default_provider())).clone() +} + +fn tls_connector(use_lax_tls: bool) -> Result { + let cfg = if use_lax_tls { + ensure_crypto_provider(); + ClientConfig::builder() + .dangerous() + .with_custom_certificate_verifier(Arc::new(LaxCertVerifier(lax_provider()))) + .with_no_client_auth() + } else { + ClientConfig::builder().with_root_certificates(build_root_store()?).with_no_client_auth() + }; + Ok(TlsConnector::from(Arc::new(cfg))) +} + +trait AsyncStream: AsyncRead + AsyncWrite + Unpin + Send {} +impl AsyncStream for T where T: AsyncRead + AsyncWrite + Unpin + Send {} +type DynStream = Box; + +async fn connect_plain(host: &str, port: u16) -> Result { + let stream = timeout(Duration::from_secs(10), TcpStream::connect((host, port))) + .await + .context("connection timed out")??; + Ok(Box::new(stream)) +} + +async fn connect_tls(host: &str, port: u16, use_lax_tls: bool) -> Result { + let stream = timeout(Duration::from_secs(10), TcpStream::connect((host, port))) + .await + .context("connection timed out")??; + let server_name = + ServerName::try_from(host.to_string()).map_err(|_| anyhow!("invalid TLS host: {host}"))?; + let tls = + timeout(Duration::from_secs(10), tls_connector(use_lax_tls)?.connect(server_name, stream)) + .await + .context("TLS handshake timed out")??; + Ok(Box::new(tls)) +} + +async fn connect_from_url( + url: &Url, + tls_default_port: u16, + plain_default_port: u16, + use_lax_tls: bool, +) -> Result { + let host = url.host_str().ok_or_else(|| anyhow!("URL is missing host"))?; + let tls = matches!(url.scheme(), "ftps" | "amqps" | "rediss" | "ldaps"); + let port = url.port().unwrap_or(if tls { tls_default_port } else { plain_default_port }); + if tls { + connect_tls(host, port, use_lax_tls).await + } else { + connect_plain(host, port).await + } +} + +async fn validate_azure_batch(globals: &Object, client: &Client) -> Result { + let endpoint = string_var(globals, "BATCH_URL").ok_or_else(|| anyhow!("missing BATCH_URL"))?; + let account_key = string_var(globals, "TOKEN").ok_or_else(|| anyhow!("missing TOKEN"))?; + let parsed = Url::parse(&endpoint).context("invalid BATCH_URL")?; + let host = parsed.host_str().ok_or_else(|| anyhow!("BATCH_URL is missing host"))?; + let account_name = host + .split('.') + .next() + .filter(|s| !s.is_empty()) + .ok_or_else(|| anyhow!("failed to derive Batch account name from host"))?; + + let api_version = "2020-09-01.12.0"; + let url = format!("{endpoint}/applications?api-version={api_version}"); + let date = rfc1123_now(); + let string_to_sign = format!( + "GET\n\n\n\n\napplication/json\n{}\n\n\n\n\n\n{}\napi-version:{}", + date, + format!("/{account_name}/applications").to_lowercase(), + api_version + ); + + let key = B64.decode(account_key.as_bytes()).context("Azure Batch key is not valid base64")?; + let mut mac = as KeyInit>::new_from_slice(&key) + .map_err(|e| anyhow!("invalid HMAC key: {e}"))?; + mac.update(string_to_sign.as_bytes()); + let signature = B64.encode(mac.finalize().into_bytes()); + + let resp = client + .get(&url) + .header("Content-Type", "application/json") + .header("Date", &date) + .header("Authorization", format!("SharedKey {account_name}:{signature}")) + .send() + .await + .context("Azure Batch validation request failed")?; + + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + let valid = status == StatusCode::OK; + + Ok(RawValidationOutcome { valid, status, body }) +} + +async fn validate_ftp(globals: &Object, use_lax_tls: bool) -> Result { + let token = string_var(globals, "TOKEN").ok_or_else(|| anyhow!("missing TOKEN"))?; + let url = Url::parse(&token).context("invalid FTP URI")?; + let host = url.host_str().ok_or_else(|| anyhow!("FTP URI is missing host"))?; + let username = decode_userinfo(url.username()); + let password = + decode_userinfo(url.password().ok_or_else(|| anyhow!("FTP URI is missing password"))?); + let scheme = url.scheme().to_ascii_lowercase(); + + let mut stream = if scheme == "ftp" { + BufStream::new(connect_plain(host, url.port().unwrap_or(21)).await?) + } else { + let port = url.port().unwrap_or(990); + if url.port().unwrap_or(990) == 990 { + BufStream::new(connect_tls(host, port, use_lax_tls).await?) + } else { + let tcp = timeout(Duration::from_secs(10), TcpStream::connect((host, port))) + .await + .context("connection timed out")??; + let mut plain = BufStream::new(tcp); + let _ = read_ftp_reply(&mut plain).await?; + plain.write_all(b"AUTH TLS\r\n").await?; + plain.flush().await?; + let (code, auth_body) = read_ftp_reply(&mut plain).await?; + if code != 234 { + return Ok(RawValidationOutcome { + valid: false, + status: StatusCode::UNAUTHORIZED, + body: auth_body, + }); + } + let tcp = plain.into_inner(); + let server_name = ServerName::try_from(host.to_string()) + .map_err(|_| anyhow!("invalid TLS host: {host}"))?; + let tls = timeout( + Duration::from_secs(10), + tls_connector(use_lax_tls)?.connect(server_name, tcp), + ) + .await + .context("TLS handshake timed out")??; + BufStream::new(Box::new(tls) as DynStream) + } + }; + + let _ = read_ftp_reply(&mut stream).await?; + stream.write_all(format!("USER {username}\r\n").as_bytes()).await?; + stream.flush().await?; + let (user_code, user_body) = read_ftp_reply(&mut stream).await?; + if user_code == 230 { + return Ok(RawValidationOutcome { valid: true, status: StatusCode::OK, body: user_body }); + } + if user_code != 331 { + return Ok(RawValidationOutcome { + valid: false, + status: StatusCode::UNAUTHORIZED, + body: user_body, + }); + } + + stream.write_all(format!("PASS {password}\r\n").as_bytes()).await?; + stream.flush().await?; + let (pass_code, pass_body) = read_ftp_reply(&mut stream).await?; + let _ = stream.write_all(b"QUIT\r\n").await; + let _ = stream.flush().await; + + Ok(RawValidationOutcome { + valid: pass_code == 230, + status: if pass_code == 230 { StatusCode::OK } else { StatusCode::UNAUTHORIZED }, + body: pass_body, + }) +} + +async fn read_ftp_reply(stream: &mut BufStream) -> Result<(u16, String)> +where + S: AsyncRead + AsyncWrite + Unpin, +{ + let mut body = String::new(); + let mut code_prefix: Option = None; + + loop { + let mut line = String::new(); + let read = timeout(Duration::from_secs(10), stream.read_line(&mut line)) + .await + .context("FTP server did not reply in time")??; + if read == 0 { + return Err(anyhow!("FTP server closed the connection")); + } + + body.push_str(&line); + let trimmed = line.trim_end_matches(['\r', '\n']); + if trimmed.len() < 4 { + continue; + } + + let code = &trimmed[0..3]; + if !code.chars().all(|c| c.is_ascii_digit()) { + continue; + } + + match trimmed.as_bytes()[3] { + b' ' => return Ok((code.parse().unwrap_or(0), body)), + b'-' => { + code_prefix = Some(code.to_string()); + } + _ => {} + } + + if let Some(prefix) = &code_prefix { + if trimmed.starts_with(prefix) && trimmed.as_bytes()[3] == b' ' { + return Ok((code.parse().unwrap_or(0), body)); + } + } + } +} + +async fn validate_kraken(globals: &Object, client: &Client) -> Result { + let api_key = + string_var(globals, "KRAKEN_API_KEY").ok_or_else(|| anyhow!("missing KRAKEN_API_KEY"))?; + let api_secret = string_var(globals, "TOKEN").ok_or_else(|| anyhow!("missing TOKEN"))?; + let secret = B64.decode(api_secret.as_bytes()).context("Kraken secret is not valid base64")?; + + let nonce = current_unix_millis(); + let body = format!("nonce={nonce}"); + let mut sha = Sha256::new(); + sha.update(format!("{nonce}{body}").as_bytes()); + let shasum = sha.finalize(); + + let path = "/0/private/Balance"; + let mut mac = as KeyInit>::new_from_slice(&secret) + .map_err(|e| anyhow!("invalid HMAC key: {e}"))?; + let mut payload = Vec::with_capacity(path.len() + shasum.len()); + payload.extend_from_slice(path.as_bytes()); + payload.extend_from_slice(&shasum); + mac.update(&payload); + let signature = B64.encode(mac.finalize().into_bytes()); + + let resp = client + .post(format!("https://api.kraken.com{path}")) + .header("Content-Type", "application/x-www-form-urlencoded") + .header("API-Key", api_key) + .header("API-Sign", signature) + .body(body) + .send() + .await + .context("Kraken validation request failed")?; + + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + let valid = status == StatusCode::OK && body.contains(r#""error":[]"#); + + Ok(RawValidationOutcome { valid, status, body }) +} + +async fn validate_ldap(globals: &Object, use_lax_tls: bool) -> Result { + let token = string_var(globals, "TOKEN").ok_or_else(|| anyhow!("missing TOKEN"))?; + let url = Url::parse(&token).context("invalid LDAP URI")?; + let scheme = url.scheme().to_ascii_lowercase(); + let host = url.host_str().ok_or_else(|| anyhow!("LDAP URI is missing host"))?; + let port = url.port().unwrap_or(if scheme == "ldaps" { 636 } else { 389 }); + let bind_dn = if let Some(bind_dn) = string_var(globals, "LDAP_BIND_DN") { + bind_dn + } else { + decode_userinfo(url.username()) + }; + let password = if let Some(password) = string_var(globals, "LDAP_PASSWORD") { + password + } else { + decode_userinfo(url.password().ok_or_else(|| anyhow!("LDAP URI is missing password"))?) + }; + + let ldap_url = format!("{scheme}://{host}:{port}"); + let settings = LdapConnSettings::new().set_no_tls_verify(use_lax_tls); + let (conn, mut ldap) = ldap3::LdapConnAsync::with_settings(settings, &ldap_url) + .await + .with_context(|| format!("failed to connect to LDAP server {ldap_url}"))?; + ldap3::drive!(conn); + let bind_result = ldap.simple_bind(&bind_dn, &password).await; + let _ = ldap.unbind().await; + + match bind_result { + Ok(res) => match res.success() { + Ok(_) => Ok(RawValidationOutcome { + valid: true, + status: StatusCode::OK, + body: "LDAP bind succeeded.".to_string(), + }), + Err(err) => Ok(RawValidationOutcome { + valid: false, + status: StatusCode::UNAUTHORIZED, + body: err.to_string(), + }), + }, + Err(err) => Ok(RawValidationOutcome { + valid: false, + status: StatusCode::BAD_GATEWAY, + body: err.to_string(), + }), + } +} + +async fn validate_rabbitmq(globals: &Object, use_lax_tls: bool) -> Result { + let token = string_var(globals, "TOKEN").ok_or_else(|| anyhow!("missing TOKEN"))?; + let url = Url::parse(&token).context("invalid AMQP URI")?; + let _host = url.host_str().ok_or_else(|| anyhow!("AMQP URI is missing host"))?; + let username = decode_userinfo(url.username()); + let password = + decode_userinfo(url.password().ok_or_else(|| anyhow!("AMQP URI is missing password"))?); + + let mut stream = connect_from_url(&url, 5671, 5672, use_lax_tls).await?; + timeout(Duration::from_secs(10), stream.write_all(b"AMQP\x00\x00\x09\x01")) + .await + .context("failed to write AMQP protocol header")??; + timeout(Duration::from_secs(10), stream.flush()).await.context("flush timed out")??; + + let (_, _, start_payload) = read_amqp_frame(&mut stream).await?; + let (class_id, method_id) = amqp_method_ids(&start_payload)?; + if class_id != 10 || method_id != 10 { + return Ok(RawValidationOutcome { + valid: false, + status: StatusCode::BAD_GATEWAY, + body: format!("unexpected AMQP frame {class_id}.{method_id}"), + }); + } + + let start_ok = build_amqp_start_ok_frame(&username, &password); + timeout(Duration::from_secs(10), stream.write_all(&start_ok)) + .await + .context("failed to write AMQP start-ok frame")??; + timeout(Duration::from_secs(10), stream.flush()).await.context("flush timed out")??; + + let (_, _, next_payload) = read_amqp_frame(&mut stream).await?; + let (class_id, method_id) = amqp_method_ids(&next_payload)?; + let valid = class_id == 10 && method_id == 30; + Ok(RawValidationOutcome { + valid, + status: if valid { StatusCode::OK } else { StatusCode::UNAUTHORIZED }, + body: format!("received AMQP method frame {class_id}.{method_id}"), + }) +} + +fn build_amqp_start_ok_frame(username: &str, password: &str) -> Vec { + let mut payload = Vec::new(); + payload.extend_from_slice(&10u16.to_be_bytes()); + payload.extend_from_slice(&11u16.to_be_bytes()); + payload.extend_from_slice(&0u32.to_be_bytes()); // empty client properties table + + payload.extend_from_slice(&(5u32).to_be_bytes()); + payload.extend_from_slice(b"PLAIN"); + + let mut response = Vec::with_capacity(username.len() + password.len() + 2); + response.push(0); + response.extend_from_slice(username.as_bytes()); + response.push(0); + response.extend_from_slice(password.as_bytes()); + payload.extend_from_slice(&(response.len() as u32).to_be_bytes()); + payload.extend_from_slice(&response); + + payload.extend_from_slice(&(5u32).to_be_bytes()); + payload.extend_from_slice(b"en_US"); + + let mut frame = Vec::with_capacity(payload.len() + 8); + frame.push(1); // method frame + frame.extend_from_slice(&0u16.to_be_bytes()); + frame.extend_from_slice(&(payload.len() as u32).to_be_bytes()); + frame.extend_from_slice(&payload); + frame.push(0xCE); + frame +} + +async fn read_amqp_frame(stream: &mut DynStream) -> Result<(u8, u16, Vec)> { + let mut header = [0u8; 7]; + timeout(Duration::from_secs(10), stream.read_exact(&mut header)) + .await + .context("timed out while reading AMQP frame header")??; + let frame_type = header[0]; + let channel = u16::from_be_bytes([header[1], header[2]]); + let size = u32::from_be_bytes([header[3], header[4], header[5], header[6]]) as usize; + let mut payload = vec![0u8; size]; + timeout(Duration::from_secs(10), stream.read_exact(&mut payload)) + .await + .context("timed out while reading AMQP frame payload")??; + let mut end = [0u8; 1]; + timeout(Duration::from_secs(10), stream.read_exact(&mut end)) + .await + .context("timed out while reading AMQP frame terminator")??; + if end[0] != 0xCE { + return Err(anyhow!("invalid AMQP frame terminator")); + } + Ok((frame_type, channel, payload)) +} + +fn amqp_method_ids(payload: &[u8]) -> Result<(u16, u16)> { + if payload.len() < 4 { + return Err(anyhow!("AMQP payload too short")); + } + Ok((u16::from_be_bytes([payload[0], payload[1]]), u16::from_be_bytes([payload[2], payload[3]]))) +} + +async fn validate_redis(globals: &Object, use_lax_tls: bool) -> Result { + let token = string_var(globals, "TOKEN").ok_or_else(|| anyhow!("missing TOKEN"))?; + let url = Url::parse(&token).context("invalid Redis URI")?; + let username = if let Some(username) = string_var(globals, "USERNAME") { + username + } else if !url.username().is_empty() { + decode_userinfo(url.username()) + } else { + String::new() + }; + let password = if let Some(password) = string_var(globals, "PASSWORD") { + password + } else { + decode_userinfo(url.password().ok_or_else(|| anyhow!("Redis URI is missing password"))?) + }; + + let mut stream = BufStream::new(connect_from_url(&url, 6380, 6379, use_lax_tls).await?); + let auth_cmd = if username.is_empty() { + format!("*2\r\n$4\r\nAUTH\r\n${}\r\n{}\r\n", password.len(), password) + } else { + format!( + "*3\r\n$4\r\nAUTH\r\n${}\r\n{}\r\n${}\r\n{}\r\n", + username.len(), + username, + password.len(), + password + ) + }; + stream.write_all(auth_cmd.as_bytes()).await?; + stream.flush().await?; + let auth_reply = read_resp_line(&mut stream).await?; + if !auth_reply.starts_with("+OK") { + return Ok(RawValidationOutcome { + valid: false, + status: StatusCode::UNAUTHORIZED, + body: auth_reply, + }); + } + + stream.write_all(b"*1\r\n$4\r\nPING\r\n").await?; + stream.flush().await?; + let ping_reply = read_resp_line(&mut stream).await?; + Ok(RawValidationOutcome { + valid: ping_reply.starts_with("+PONG"), + status: if ping_reply.starts_with("+PONG") { + StatusCode::OK + } else { + StatusCode::UNAUTHORIZED + }, + body: ping_reply, + }) +} + +async fn read_resp_line(stream: &mut BufStream) -> Result +where + S: AsyncRead + AsyncWrite + Unpin, +{ + let mut line = String::new(); + timeout(Duration::from_secs(10), stream.read_line(&mut line)) + .await + .context("Redis server did not reply in time")??; + Ok(line) +} diff --git a/data/default/rule_cleanup/count_rules.py b/data/default/rule_cleanup/count_rules.py index a390464..f9363ac 100644 --- a/data/default/rule_cleanup/count_rules.py +++ b/data/default/rule_cleanup/count_rules.py @@ -27,8 +27,8 @@ DEFAULT_RULES_DIR = ( def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description=( - "Count total rules and detector rules. " - "Detector rules are rules that do not " + "Count total rules and standalone detector rules. " + "Standalone detector rules are rules that do not " "declare depends_on_rule." ) ) @@ -38,6 +38,14 @@ def parse_args() -> argparse.Namespace: default=DEFAULT_RULES_DIR, help="Directory containing rule YAML files (default: %(default)s)", ) + parser.add_argument( + "--list-validators", + action="store_true", + help=( + "Print the IDs of standalone detectors with and " + "without a validator" + ), + ) return parser.parse_args() @@ -59,6 +67,14 @@ def iter_rule_entries(path: Path) -> list[dict]: return entries +def rule_identifier(rule: dict, path: Path, index: int) -> str: + if isinstance(rule.get("id"), str) and rule["id"].strip(): + return rule["id"] + if isinstance(rule.get("name"), str) and rule["name"].strip(): + return rule["name"] + return f"{path.stem}#{index}" + + def main() -> int: args = parse_args() rules_dir = args.rules_dir.resolve() @@ -74,6 +90,8 @@ def main() -> int: total_rules = 0 dependent_rules = 0 + standalone_with_validator: list[str] = [] + standalone_without_validator: list[str] = [] for path in rule_files: try: @@ -86,14 +104,44 @@ def main() -> int: dependent_rules += sum( 1 for rule in rules if rule.get("depends_on_rule") ) + for index, rule in enumerate(rules, start=1): + if rule.get("depends_on_rule"): + continue - detector_rules = total_rules - dependent_rules + identifier = rule_identifier(rule, path, index) + if rule.get("validation"): + standalone_with_validator.append(identifier) + else: + standalone_without_validator.append(identifier) + + standalone_detector_rules = total_rules - dependent_rules print(f"Rules directory: {rules_dir}") - print(f"Rule files: {len(rule_files)}") print(f"Total rules: {total_rules}") print(f"Dependent rules: {dependent_rules}") - print(f"Detectors: {detector_rules}") + print(f"Standalone detectors: {standalone_detector_rules}") + print( + "Standalone detectors with validator: " + f"{len(standalone_with_validator)}" + ) + print( + "Standalone detectors without validator: " + f"{len(standalone_without_validator)}" + ) + + if args.list_validators: + print( + "\nStandalone detectors with validator " + f"({len(standalone_with_validator)}):" + ) + for name in standalone_with_validator: + print(f" {name}") + print( + "\nStandalone detectors without validator " + f"({len(standalone_without_validator)}):" + ) + for name in standalone_without_validator: + print(f" {name}") return 0 diff --git a/docs-site/docs/assets/images/binary-size-comparison.png b/docs-site/docs/assets/images/binary-size-comparison.png new file mode 100644 index 0000000..1353d6d Binary files /dev/null and b/docs-site/docs/assets/images/binary-size-comparison.png differ diff --git a/docs-site/docs/assets/stylesheets/extra.css b/docs-site/docs/assets/stylesheets/extra.css index b3df9bd..c590f93 100644 --- a/docs-site/docs/assets/stylesheets/extra.css +++ b/docs-site/docs/assets/stylesheets/extra.css @@ -99,6 +99,7 @@ max-width: 700px; margin: 0 auto 2rem; color: var(--md-default-fg-color--light); + font-size: 1rem; line-height: 1.6; } @@ -127,11 +128,13 @@ .kf-feature h3 { margin-top: 0; + font-size: 1.3rem; color: var(--md-primary-fg-color); } .kf-feature p { color: var(--md-default-fg-color--light); + font-size: 0.85rem; line-height: 1.6; margin-bottom: 0; } diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 6439801..0e5334f 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,6 +7,12 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. +## [v1.95.0] +- Added 80+ built-in rules, bringing the bundled ruleset to 820 total. New coverage includes Amazon OAuth, Asaas, multiple Azure credential families, Bitrise, Canva, CockroachDB, eBay, Elastic, hCaptcha, Highnote, Lichess, MailerSend, Onfido, Paddle, Pangea, Persona, Pinterest, Proof, Rootly, Runpod, Telnyx, Thunderstore, Valtown, Volcengine, and more. +- Replaced tree-sitter with a lighter parser-based context verifier built from handwritten lexers plus `tl`/`cssparser`, preserving context-dependent matching while cutting about 19 MB from the release binary. +- Added a `validation: type: Raw` exception path for provider-specific checks, with new raw validators for Azure Batch, FTP, Kraken, LDAP, RabbitMQ, and Redis. Also added stable request-scoped template values plus new Liquid filters for HMAC-SHA384 hex output and timestamp generation. +- Expanded live validation coverage for several built-in rules, including Agora, Bitfinex, DocuSign, Dwolla, GitLab, KuCoin, RingCentral, Snowflake, Tableau, Trello, and Webex. Also tightened newly added helper regex to avoid high-match scan regressions, and made preflight-blocked raw validations report as skipped/not attempted instead of failed. + ## [v1.94.0] - Updated vendored `vectorscan-rs` from v0.0.5 (Vectorscan 5.4.11) to v0.0.6 (Vectorscan 5.4.12). The upstream crate now ships pre-extracted sources instead of a tarball+patch, and fixes the `cpu_native` feature flag. Local Windows and musl build patches have been re-applied. - Added more built-in rules diff --git a/docs-site/docs/features/parsing.md b/docs-site/docs/features/parsing.md index fc90e8d..dc0c04c 100644 --- a/docs-site/docs/features/parsing.md +++ b/docs-site/docs/features/parsing.md @@ -5,43 +5,50 @@ description: "Language-aware secret detection using tree-sitter parsing for 13+ # Kingfisher Source Code Parsing -Kingfisher leverages tree-sitter as an extra layer of analysis when scanning source files written in supported programming languages. In practice, after its initial regex-based scan (powered by Vectorscan/Hyperscan), Kingfisher can run a targeted verification pass for context-dependent rules. +Kingfisher uses a parser-based context verifier as a second pass on supported source files. After its initial regex scan (powered by Vectorscan/Hyperscan), it extracts assignment-style snippets from code and configuration files to confirm that generic keyword+token matches appear in plausible contexts. -If so, it creates a Checker (see below) that uses tree‐sitter to parse the file and run language‐specific queries. This additional pass refines the detection by capturing more structured patterns—such as secret-like tokens—that might be obscured or spread over code constructs. +The implementation favors lightweight extractors over full AST parsing: + +- **Handwritten lexers** for common programming and config languages — comment-aware stripping followed by regex-based `key = value` extraction +- **`tl`** for HTML — attribute values, element text, and embedded ` + + + + "#; + let mut texts = Vec::new(); + stream_context_candidates(source, &Language::Html, |text| { + texts.push(text.to_string()); + true + }) + .unwrap(); - println!("tree-sitter capture delta report (current vs baseline):"); - for (key, actual) in ¤t { - let expected = baseline.get(key).copied().unwrap_or(0); - let delta = (*actual as isize) - (expected as isize); - println!(" {key}: current={actual}, baseline={expected}, delta={delta:+}"); - } + assert!( + texts.iter().any(|text| text.contains("auth0_client_secret = secret-value")), + "expected uppercase script tag to be handled like lowercase script" + ); + assert!( + texts.iter().any(|text| text.contains("content =")), + "expected uppercase style tag to emit CSS declaration candidates" + ); + } + + #[test] + fn html_comment_only_script_context_is_ignored() { + let source = br#" + + + +
visible text
+ + + "#; + let mut texts = Vec::new(); + stream_context_candidates(source, &Language::Html, |text| { + texts.push(text.to_string()); + true + }) + .unwrap(); + + assert!( + !texts.iter().any(|text| text.contains("AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM")), + "expected commented-out script secrets to stay ignored" + ); + assert!( + texts.iter().any(|text| text.contains("div = visible text")), + "expected visible non-script HTML text to remain available for verification" + ); + } + + #[test] + fn comment_only_python_context_is_ignored() { + let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let source = fs::read(root.join("testdata/parsers/comment_only_context.py")).unwrap(); + let mut texts = Vec::new(); + stream_context_candidates(&source, &Language::Python, |text| { + texts.push(text.to_string()); + true + }) + .unwrap(); + assert!(texts.is_empty()); } } diff --git a/src/parser/css.rs b/src/parser/css.rs new file mode 100644 index 0000000..34245d5 --- /dev/null +++ b/src/parser/css.rs @@ -0,0 +1,187 @@ +use std::cell::Cell; + +use anyhow::Result; +use cssparser::{ + parse_important, AtRuleParser, CowRcStr, DeclarationParser, ParseError, Parser, ParserInput, + ParserState, RuleBodyItemParser, RuleBodyParser, StyleSheetParser, ToCss, Token, +}; + +pub(super) fn stream_context_candidates(source: &[u8], sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let css = String::from_utf8_lossy(source); + if css.trim().is_empty() { + return Ok(()); + } + + let mut input = ParserInput::new(&css); + let mut parser = Parser::new(&mut input); + let stopped = Cell::new(false); + let mut collector = Collector { sink, stopped: &stopped }; + let mut stylesheet = StyleSheetParser::new(&mut parser, &mut collector); + while !stopped.get() { + if stylesheet.next().is_none() { + break; + } + } + Ok(()) +} + +struct Collector<'a, F> { + sink: &'a mut F, + stopped: &'a Cell, +} + +impl<'a, F> Collector<'a, F> +where + F: FnMut(&str) -> bool, +{ + fn emit(&mut self, name: &str, value: &str) { + if self.stopped.get() { + return; + } + let candidate = format!("{name} = {value}"); + self.stopped.set(!(self.sink)(&candidate)); + } +} + +impl<'i, F> DeclarationParser<'i> for Collector<'_, F> +where + F: FnMut(&str) -> bool, +{ + type Declaration = (); + type Error = (); + + fn parse_value<'t>( + &mut self, + name: CowRcStr<'i>, + input: &mut Parser<'i, 't>, + _declaration_start: &ParserState, + ) -> Result<(), ParseError<'i, ()>> { + let mut values = Vec::new(); + let mut important = false; + loop { + let start = input.state(); + let token = match input.next_including_whitespace().cloned() { + Ok(token) => token, + Err(_) => break, + }; + + if token == Token::Delim('!') { + input.reset(&start); + if parse_important(input).is_ok() && input.is_exhausted() { + important = true; + break; + } + input.reset(&start); + } + + collect_token_values(token, input, &mut values); + } + + if values.is_empty() && !important { + return Ok(()); + } + + if values.is_empty() && important { + values.push("important".to_string()); + } + + for value in values { + self.emit(&name, &value); + if self.stopped.get() { + break; + } + } + Ok(()) + } +} + +impl<'i, F> AtRuleParser<'i> for Collector<'_, F> +where + F: FnMut(&str) -> bool, +{ + type Prelude = (); + type AtRule = (); + type Error = (); +} + +impl<'i, F> cssparser::QualifiedRuleParser<'i> for Collector<'_, F> +where + F: FnMut(&str) -> bool, +{ + type Prelude = (); + type QualifiedRule = (); + type Error = (); + + fn parse_prelude<'t>(&mut self, input: &mut Parser<'i, 't>) -> Result<(), ParseError<'i, ()>> { + while input.next_including_whitespace().is_ok() {} + Ok(()) + } + + fn parse_block<'t>( + &mut self, + _prelude: (), + _start: &ParserState, + input: &mut Parser<'i, 't>, + ) -> Result<(), ParseError<'i, ()>> { + let stopped = self.stopped; + let mut rule_body = RuleBodyParser::new(input, self); + while !stopped.get() { + if rule_body.next().is_none() { + break; + } + } + Ok(()) + } +} + +impl RuleBodyItemParser<'_, (), ()> for Collector<'_, F> +where + F: FnMut(&str) -> bool, +{ + fn parse_qualified(&self) -> bool { + true + } + + fn parse_declarations(&self) -> bool { + true + } +} + +fn collect_token_values<'i, 't>( + token: Token<'i>, + input: &mut Parser<'i, 't>, + values: &mut Vec, +) { + match token { + Token::QuotedString(value) => values.push(value.to_string()), + Token::UnquotedUrl(value) => values.push(value.to_string()), + Token::Ident(value) => values.push(value.to_string()), + Token::Hash(value) | Token::IDHash(value) => values.push(value.to_string()), + Token::Number { .. } + | Token::Percentage { .. } + | Token::Dimension { .. } + | Token::Function(_) => { + values.push(token.to_css_string()); + if matches!(token, Token::Function(_)) { + let _ = input.parse_nested_block(|nested| { + while let Ok(next) = nested.next_including_whitespace().cloned() { + collect_token_values(next, nested, values); + } + Ok::<(), ParseError<'i, ()>>(()) + }); + } + } + Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => { + let _ = input.parse_nested_block(|nested| { + while let Ok(next) = nested.next_including_whitespace().cloned() { + collect_token_values(next, nested, values); + } + Ok::<(), ParseError<'i, ()>>(()) + }); + } + _ => {} + } +} diff --git a/src/parser/html.rs b/src/parser/html.rs new file mode 100644 index 0000000..f0ad094 --- /dev/null +++ b/src/parser/html.rs @@ -0,0 +1,95 @@ +use anyhow::Result; +use tl::{HTMLTag, Node, Parser, ParserOptions}; + +use super::{css, lexer, Language}; + +pub(super) fn stream_context_candidates(source: &[u8], sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let html = String::from_utf8_lossy(source); + if html.trim().is_empty() { + return Ok(()); + } + + let dom = match tl::parse(&html, ParserOptions::default()) { + Ok(dom) => dom, + Err(_) => return Ok(()), + }; + let parser = dom.parser(); + + for node in dom.nodes() { + let Some(tag) = node.as_tag() else { + continue; + }; + let tag_name = tag.name().as_utf8_str().to_string(); + let normalized_tag_name = tag_name.to_ascii_lowercase(); + + for (key, value) in tag.attributes().iter() { + let Some(value) = value else { + continue; + }; + let candidate = format!("{key} = {value}"); + if !sink(&candidate) { + return Ok(()); + } + } + + match normalized_tag_name.as_str() { + "script" => { + let script_text = tag.inner_text(parser); + let script_text = script_text.trim(); + if !script_text.is_empty() { + lexer::stream_context_candidates( + script_text.as_bytes(), + &Language::JavaScript, + sink, + )?; + } + } + "style" => { + let style_text = tag.inner_text(parser); + let style_text = style_text.trim(); + if !style_text.is_empty() { + css::stream_context_candidates(style_text.as_bytes(), sink)?; + } + } + _ => { + let inner_text = text_without_embedded_code(tag, parser); + if !inner_text.is_empty() && !sink(&format!("{tag_name} = {inner_text}")) { + return Ok(()); + } + } + } + } + + Ok(()) +} + +fn text_without_embedded_code(tag: &HTMLTag<'_>, parser: &Parser<'_>) -> String { + let mut text = String::new(); + collect_visible_text(tag, parser, &mut text); + text.trim().to_string() +} + +fn collect_visible_text(tag: &HTMLTag<'_>, parser: &Parser<'_>, out: &mut String) { + for handle in tag.children().top().iter() { + let Some(node) = handle.get(parser) else { + continue; + }; + + match node { + Node::Raw(raw) => out.push_str(raw.as_utf8_str().as_ref()), + Node::Comment(_) => {} + Node::Tag(child) => { + let child_name = child.name().as_utf8_str(); + if child_name.eq_ignore_ascii_case("script") + || child_name.eq_ignore_ascii_case("style") + { + continue; + } + collect_visible_text(&child, parser, out); + } + } + } +} diff --git a/src/parser/lexer.rs b/src/parser/lexer.rs new file mode 100644 index 0000000..e729d82 --- /dev/null +++ b/src/parser/lexer.rs @@ -0,0 +1,1276 @@ +use anyhow::Result; +use once_cell::sync::Lazy; +use regex::Regex; + +use super::Language; + +static ASSIGNMENT_LITERAL_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?P[A-Za-z_@$][\w$@.:>-]*) + \s* + (?P:=|=>|=|\+=) + \s* + (?P + @"(?s:(?:[^"]|"")*)" + | + "(?:[^"\\]|\\.)*" + | + '(?:[^'\\]|\\.)*' + | + `[^`]*` + | + [+-]?\d+(?:\.\d+)? + ) + "#, + ) + .unwrap() +}); + +static ASSIGNMENT_ANY_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?P[A-Za-z_@$][\w$@.:>-]*) + \s* + (?P:=|=>|=|\+=) + \s* + (?P.+) + "#, + ) + .unwrap() +}); + +static TYPED_ASSIGNMENT_LITERAL_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?P[A-Za-z_@$][\w$@.-]*) + \s*:\s*[^=]+? + =\s* + (?P + @"(?s:(?:[^"]|"")*)" + | + "(?:[^"\\]|\\.)*" + | + '(?:[^'\\]|\\.)*' + | + `[^`]*` + | + [+-]?\d+(?:\.\d+)? + ) + "#, + ) + .unwrap() +}); + +static PAIR_LITERAL_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?: + ^ + | + [\{\[,]\s* + | + ,\s* + ) + (?P"(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'|[A-Za-z_@$][\w$@.-]*) + \s*:\s* + (?P + "(?:[^"\\]|\\.)*" + | + '(?:[^'\\]|\\.)*' + | + `[^`]*` + | + [+-]?\d+(?:\.\d+)? + ) + "#, + ) + .unwrap() +}); + +static TYPE_LITERAL_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?P[A-Za-z_@$][\w$@.-]*) + \s*:\s* + (?P + "(?:[^"\\]|\\.)*" + | + '(?:[^'\\]|\\.)*' + | + `[^`]*` + ) + "#, + ) + .unwrap() +}); + +static CALL_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?: + (?P[A-Za-z_@$][\w$@.:>-]*)\s*(?::=|=)\s* + )? + (?P(?:new\s+)?[A-Za-z_@$][\w$@.:>-]*) + \s* + \((?P[^)]*)\) + "#, + ) + .unwrap() +}); + +static BRACE_LIST_ASSIGN_RE: Lazy = Lazy::new(|| { + Regex::new( + r#"(?x) + (?P[A-Za-z_@$][\w$@.:>-]*) + \s*=\s* + \{(?P[^}]*)\} + "#, + ) + .unwrap() +}); + +pub(super) fn stream_context_candidates( + source: &[u8], + language: &Language, + sink: &mut F, +) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let text = String::from_utf8_lossy(source); + if text.is_empty() { + return Ok(()); + } + + match language { + Language::Bash => extract_bash(&text, sink), + Language::Python => extract_python(&text, sink), + Language::Ruby => extract_ruby(&text, sink), + Language::Php => extract_php(&text, sink), + Language::Yaml => extract_yaml(&text, sink), + Language::Toml => extract_toml(&text, sink), + Language::JavaScript => extract_javascript_like(&text, false, sink), + Language::TypeScript => extract_javascript_like(&text, true, sink), + Language::Rust => extract_rust(&text, sink), + Language::C | Language::CSharp | Language::Cpp | Language::Go | Language::Java => { + extract_c_style(&text, language, sink) + } + Language::Css | Language::Html => Ok(()), + } +} + +fn extract_bash(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::shell()); + for line in cleaned.lines() { + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +fn extract_python(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::python()); + for line in cleaned.lines() { + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + if emit_pairs(line, true, sink).is_break() { + return Ok(()); + } + if emit_calls(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +fn extract_ruby(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::hash_only()); + for line in cleaned.lines() { + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + if emit_assignment_lists(line, false, sink).is_break() { + return Ok(()); + } + if emit_calls(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +fn extract_php(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::php()); + for line in cleaned.lines() { + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + if emit_assignment_lists(line, false, sink).is_break() { + return Ok(()); + } + if emit_calls(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +fn extract_yaml(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::hash_only()); + for line in cleaned.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('-') && !trimmed.contains(':') { + continue; + } + if let Some((key, value)) = split_mapping_pair(trimmed) { + let key = key.trim_start_matches('-').trim(); + if emit_value(key, value, true, true, sink).is_break() { + return Ok(()); + } + } + } + Ok(()) +} + +fn extract_toml(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::hash_only()); + for line in cleaned.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('[') { + continue; + } + if let Some((key, value)) = split_assignment(trimmed, '=') { + if emit_value(key, value, true, false, sink).is_break() { + return Ok(()); + } + } + } + Ok(()) +} + +fn extract_javascript_like(text: &str, include_type_literals: bool, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::c_style().with_backticks()); + for line in cleaned.lines() { + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + if emit_pairs(line, false, sink).is_break() { + return Ok(()); + } + if include_type_literals && emit_type_literals(line, sink).is_break() { + return Ok(()); + } + if emit_assignment_lists(line, false, sink).is_break() { + return Ok(()); + } + if emit_calls(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +fn extract_rust(text: &str, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let cleaned = strip_comments(text, CommentStyle::c_style()); + for line in cleaned.lines() { + if emit_typed_assignment_literals(line, sink).is_break() { + return Ok(()); + } + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + if emit_calls(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +fn extract_c_style(text: &str, language: &Language, sink: &mut F) -> Result<()> +where + F: FnMut(&str) -> bool, +{ + let style = match language { + Language::CSharp => CommentStyle::c_style().with_verbatim_strings(), + _ => CommentStyle::c_style(), + }; + let cleaned = strip_comments(text, style); + for line in cleaned.lines() { + if emit_assignment_literals(line, false, sink).is_break() { + return Ok(()); + } + if emit_brace_list_assignments(line, sink).is_break() { + return Ok(()); + } + if matches!(language, Language::Cpp) && looks_like_cpp_ctor_initializer_line(line) { + continue; + } + if emit_calls(line, false, sink).is_break() { + return Ok(()); + } + } + Ok(()) +} + +#[derive(Clone, Copy)] +enum Flow { + Continue, + Break, +} + +impl Flow { + fn is_break(self) -> bool { + matches!(self, Self::Break) + } +} + +fn emit_assignment_literals(line: &str, keep_full_key: bool, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + for caps in ASSIGNMENT_LITERAL_RE.captures_iter(line) { + let Some(key) = caps.name("key").map(|m| m.as_str()) else { + continue; + }; + let Some(value) = caps.name("value").map(|m| m.as_str()) else { + continue; + }; + if emit_value(key, value, keep_full_key, false, sink).is_break() { + return Flow::Break; + } + } + Flow::Continue +} + +fn emit_typed_assignment_literals(line: &str, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + for caps in TYPED_ASSIGNMENT_LITERAL_RE.captures_iter(line) { + let Some(key) = caps.name("key").map(|m| m.as_str()) else { + continue; + }; + let Some(value) = caps.name("value").map(|m| m.as_str()) else { + continue; + }; + if emit_value(key, value, false, false, sink).is_break() { + return Flow::Break; + } + } + Flow::Continue +} + +fn emit_assignment_lists(line: &str, keep_full_key: bool, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + if let Some(caps) = ASSIGNMENT_ANY_RE.captures(line) { + let Some(key) = caps.name("key").map(|m| m.as_str()) else { + return Flow::Continue; + }; + let Some(rhs) = caps.name("rhs").map(|m| m.as_str()) else { + return Flow::Continue; + }; + if rhs.contains(',') || rhs.contains('[') || rhs.contains('{') { + for value in extract_literal_values(rhs, false) { + if emit_value(key, &value, keep_full_key, false, sink).is_break() { + return Flow::Break; + } + } + } + } + Flow::Continue +} + +fn emit_brace_list_assignments(line: &str, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + for caps in BRACE_LIST_ASSIGN_RE.captures_iter(line) { + let Some(key) = caps.name("key").map(|m| m.as_str()) else { + continue; + }; + let Some(body) = caps.name("body").map(|m| m.as_str()) else { + continue; + }; + for value in extract_literal_values(body, false) { + if emit_value(key, &value, false, false, sink).is_break() { + return Flow::Break; + } + } + } + Flow::Continue +} + +fn emit_pairs(line: &str, keep_full_key: bool, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + for caps in PAIR_LITERAL_RE.captures_iter(line) { + let Some(key) = caps.name("key").map(|m| m.as_str()) else { + continue; + }; + let Some(value) = caps.name("value").map(|m| m.as_str()) else { + continue; + }; + if emit_value(key, value, keep_full_key, false, sink).is_break() { + return Flow::Break; + } + } + Flow::Continue +} + +fn emit_type_literals(line: &str, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + for caps in TYPE_LITERAL_RE.captures_iter(line) { + let Some(key) = caps.name("key").map(|m| m.as_str()) else { + continue; + }; + let Some(value) = caps.name("value").map(|m| m.as_str()) else { + continue; + }; + if emit_value(key, value, false, false, sink).is_break() { + return Flow::Break; + } + } + Flow::Continue +} + +fn emit_calls(line: &str, keep_full_assign_key: bool, sink: &mut F) -> Flow +where + F: FnMut(&str) -> bool, +{ + for caps in CALL_RE.captures_iter(line) { + let assign_key = caps.name("assign").map(|m| m.as_str()); + let Some(call) = caps.name("call").map(|m| m.as_str()) else { + continue; + }; + let Some(args) = caps.name("args").map(|m| m.as_str()) else { + continue; + }; + + let values = extract_literal_values(args, false); + if values.is_empty() { + continue; + } + + if let Some(key) = assign_key { + for value in &values { + if emit_value(key, value, keep_full_assign_key, false, sink).is_break() { + return Flow::Break; + } + } + } + + let call_name = normalize_call_name(call); + for value in &values { + if emit_value(&call_name, value, true, false, sink).is_break() { + return Flow::Break; + } + } + + if values.len() >= 2 { + let first = values[0].trim_matches('"').trim_matches('\''); + let second = &values[1]; + if looks_like_embedded_key(first) + && emit_value(first, second, true, false, sink).is_break() + { + return Flow::Break; + } + } + } + Flow::Continue +} + +fn emit_value( + key: &str, + value: &str, + keep_full_key: bool, + allow_bare: bool, + sink: &mut F, +) -> Flow +where + F: FnMut(&str) -> bool, +{ + let key = normalize_key(key, keep_full_key); + let value = normalize_value(value, allow_bare); + if key.is_empty() || value.is_empty() { + return Flow::Continue; + } + let candidate = format!("{key} = {value}"); + if sink(&candidate) { + Flow::Continue + } else { + Flow::Break + } +} + +fn normalize_key(key: &str, keep_full_key: bool) -> String { + let mut key = key.trim().trim_start_matches('$').trim_start_matches('@').to_string(); + if (key.starts_with('"') && key.ends_with('"')) + || (key.starts_with('\'') && key.ends_with('\'')) + { + key = key[1..key.len() - 1].to_string(); + } + if keep_full_key { + return key; + } + key.rsplit(['.', ':', '>']) + .find(|segment| !segment.is_empty()) + .unwrap_or(&key) + .trim_matches('-') + .to_string() +} + +fn normalize_value(value: &str, allow_bare: bool) -> String { + let trimmed = value.trim().trim_end_matches([',', ';']); + if trimmed.is_empty() { + return String::new(); + } + + if let Some(stripped) = trim_wrapped_literal(trimmed) { + return stripped; + } + + if allow_bare || looks_like_number(trimmed) { + return trimmed.trim_matches([')', ']', '}']).to_string(); + } + + String::new() +} + +fn trim_wrapped_literal(value: &str) -> Option { + if value.starts_with("@\"") && value.ends_with('"') && value.len() >= 3 { + return Some(value[2..value.len() - 1].replace("\"\"", "\"")); + } + if value.starts_with('"') && value.ends_with('"') && value.len() >= 2 { + return Some(value[1..value.len() - 1].to_string()); + } + if value.starts_with('\'') && value.ends_with('\'') && value.len() >= 2 { + return Some(value[1..value.len() - 1].to_string()); + } + if value.starts_with('`') && value.ends_with('`') && value.len() >= 2 { + return Some(value[1..value.len() - 1].to_string()); + } + None +} + +fn normalize_call_name(call: &str) -> String { + let call = call.trim().trim_start_matches("new ").trim(); + call.rsplit(['.', ':', '>']) + .find(|segment| !segment.is_empty()) + .unwrap_or(call) + .trim_matches('-') + .to_string() +} + +fn looks_like_embedded_key(value: &str) -> bool { + !value.is_empty() + && value.chars().all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.' | '=')) +} + +fn looks_like_number(value: &str) -> bool { + value.chars().all(|ch| ch.is_ascii_digit() || ch == '.' || ch == '-' || ch == '+') +} + +fn looks_like_cpp_ctor_initializer_line(line: &str) -> bool { + let trimmed = line.trim_start(); + trimmed.contains(") :") || trimmed.starts_with(':') || trimmed.starts_with(',') +} + +fn extract_literal_values(input: &str, allow_bare: bool) -> Vec { + let bytes = input.as_bytes(); + let mut values = Vec::new(); + let mut idx = 0; + + while idx < bytes.len() { + match bytes[idx] { + b' ' | b'\t' | b'\r' | b'\n' | b',' => { + idx += 1; + } + b'@' if idx + 1 < bytes.len() && bytes[idx + 1] == b'"' => { + let start = idx; + idx += 2; + while idx < bytes.len() { + if bytes[idx] == b'"' { + if idx + 1 < bytes.len() && bytes[idx + 1] == b'"' { + idx += 2; + continue; + } + idx += 1; + break; + } + idx += 1; + } + values.push(input[start..idx].to_string()); + } + b'"' | b'\'' | b'`' => { + let quote = bytes[idx]; + let start = idx; + idx += 1; + while idx < bytes.len() { + if bytes[idx] == b'\\' && quote != b'`' { + idx += 2; + continue; + } + if bytes[idx] == quote { + idx += 1; + break; + } + idx += 1; + } + values.push(input[start..idx].to_string()); + } + b'[' | b'(' | b'{' => { + let (close, start) = match bytes[idx] { + b'[' => (b']', idx + 1), + b'(' => (b')', idx + 1), + _ => (b'}', idx + 1), + }; + idx += 1; + let mut depth = 1usize; + let inner_start = start; + while idx < bytes.len() && depth > 0 { + match bytes[idx] { + b'"' | b'\'' | b'`' => { + let quote = bytes[idx]; + idx += 1; + while idx < bytes.len() { + if bytes[idx] == b'\\' && quote != b'`' { + idx += 2; + continue; + } + if bytes[idx] == quote { + idx += 1; + break; + } + idx += 1; + } + } + ch if ch == bytes[start - 1] => { + depth += 1; + idx += 1; + } + ch if ch == close => { + depth -= 1; + if depth == 0 { + let inner = &input[inner_start..idx]; + values.extend(extract_literal_values(inner, allow_bare)); + } + idx += 1; + } + _ => idx += 1, + } + } + } + ch if ch.is_ascii_digit() || ch == b'+' || ch == b'-' => { + let start = idx; + idx += 1; + while idx < bytes.len() + && (bytes[idx].is_ascii_digit() || matches!(bytes[idx], b'.' | b'_' | b'x')) + { + idx += 1; + } + values.push(input[start..idx].to_string()); + } + ch if allow_bare + && (ch.is_ascii_alphanumeric() || matches!(ch, b'_' | b'$' | b'@')) => + { + let start = idx; + idx += 1; + while idx < bytes.len() + && !matches!( + bytes[idx], + b' ' | b'\t' | b'\r' | b'\n' | b',' | b')' | b']' | b'}' + ) + { + idx += 1; + } + values.push(input[start..idx].to_string()); + } + _ => idx += 1, + } + } + + values +} + +fn split_mapping_pair(line: &str) -> Option<(&str, &str)> { + let mut in_single = false; + let mut in_double = false; + for (idx, ch) in line.char_indices() { + match ch { + '\'' if !in_double => in_single = !in_single, + '"' if !in_single => in_double = !in_double, + ':' if !in_single && !in_double => return Some((&line[..idx], &line[idx + 1..])), + _ => {} + } + } + None +} + +fn split_assignment(line: &str, needle: char) -> Option<(&str, &str)> { + let mut in_single = false; + let mut in_double = false; + for (idx, ch) in line.char_indices() { + match ch { + '\'' if !in_double => in_single = !in_single, + '"' if !in_single => in_double = !in_double, + ch if ch == needle && !in_single && !in_double => { + return Some((&line[..idx], &line[idx + 1..])); + } + _ => {} + } + } + None +} + +#[derive(Clone, Copy)] +struct CommentStyle { + line_comment_hash: bool, + line_comment_slash: bool, + block_comments: bool, + backticks: bool, + verbatim_strings: bool, + triple_quotes: bool, +} + +impl CommentStyle { + const fn c_style() -> Self { + Self { + line_comment_hash: false, + line_comment_slash: true, + block_comments: true, + backticks: false, + verbatim_strings: false, + triple_quotes: false, + } + } + + const fn shell() -> Self { + Self { + line_comment_hash: true, + line_comment_slash: false, + block_comments: false, + backticks: false, + verbatim_strings: false, + triple_quotes: false, + } + } + + const fn hash_only() -> Self { + Self { + line_comment_hash: true, + line_comment_slash: false, + block_comments: false, + backticks: false, + verbatim_strings: false, + triple_quotes: false, + } + } + + const fn php() -> Self { + Self { + line_comment_hash: true, + line_comment_slash: true, + block_comments: true, + backticks: false, + verbatim_strings: false, + triple_quotes: false, + } + } + + const fn python() -> Self { + Self { + line_comment_hash: true, + line_comment_slash: false, + block_comments: false, + backticks: false, + verbatim_strings: false, + triple_quotes: true, + } + } + + const fn with_backticks(mut self) -> Self { + self.backticks = true; + self + } + + const fn with_verbatim_strings(mut self) -> Self { + self.verbatim_strings = true; + self + } +} + +// NOTE: We index `source` byte-by-byte and cast via `bytes[idx] as char`. +// This is correct for comment/string delimiter detection because all +// delimiters we care about (`'`, `"`, `/`, `*`, `#`, `` ` ``, `\n`, `@`) +// are single-byte ASCII. Interior bytes of multi-byte UTF-8 sequences +// have their high bit set (0x80..0xFF) so they can never collide with +// those ASCII delimiters. The cast produces a garbage char for non-ASCII +// bytes, but the output is only consumed by regex patterns that match +// ASCII identifiers and quoted strings, so this is harmless. +fn strip_comments(source: &str, style: CommentStyle) -> String { + #[derive(Clone, Copy)] + enum StringState { + Single, + Double, + Backtick, + Verbatim, + TripleSingle, + TripleDouble, + } + + let bytes = source.as_bytes(); + let mut out = String::with_capacity(source.len()); + let mut idx = 0usize; + let mut string_state: Option = None; + let mut in_block_comment = false; + + while idx < bytes.len() { + if in_block_comment { + if idx + 1 < bytes.len() && bytes[idx] == b'*' && bytes[idx + 1] == b'/' { + in_block_comment = false; + idx += 2; + } else { + if bytes[idx] == b'\n' { + out.push('\n'); + } + idx += 1; + } + continue; + } + + if let Some(state) = string_state { + match state { + StringState::Single => { + out.push(bytes[idx] as char); + if bytes[idx] == b'\\' && idx + 1 < bytes.len() { + out.push(bytes[idx + 1] as char); + idx += 2; + continue; + } + if bytes[idx] == b'\'' { + string_state = None; + } + idx += 1; + } + StringState::Double => { + out.push(bytes[idx] as char); + if bytes[idx] == b'\\' && idx + 1 < bytes.len() { + out.push(bytes[idx + 1] as char); + idx += 2; + continue; + } + if bytes[idx] == b'"' { + string_state = None; + } + idx += 1; + } + StringState::Backtick => { + out.push(bytes[idx] as char); + if bytes[idx] == b'`' { + string_state = None; + } + idx += 1; + } + StringState::Verbatim => { + out.push(bytes[idx] as char); + if bytes[idx] == b'"' { + if idx + 1 < bytes.len() && bytes[idx + 1] == b'"' { + out.push('"'); + idx += 2; + continue; + } + string_state = None; + } + idx += 1; + } + StringState::TripleSingle => { + out.push(bytes[idx] as char); + if idx + 2 < bytes.len() + && bytes[idx] == b'\'' + && bytes[idx + 1] == b'\'' + && bytes[idx + 2] == b'\'' + { + out.push('\''); + out.push('\''); + idx += 3; + string_state = None; + continue; + } + idx += 1; + } + StringState::TripleDouble => { + out.push(bytes[idx] as char); + if idx + 2 < bytes.len() + && bytes[idx] == b'"' + && bytes[idx + 1] == b'"' + && bytes[idx + 2] == b'"' + { + out.push('"'); + out.push('"'); + idx += 3; + string_state = None; + continue; + } + idx += 1; + } + } + continue; + } + + if style.block_comments + && idx + 1 < bytes.len() + && bytes[idx] == b'/' + && bytes[idx + 1] == b'*' + { + in_block_comment = true; + idx += 2; + continue; + } + + if style.line_comment_slash + && idx + 1 < bytes.len() + && bytes[idx] == b'/' + && bytes[idx + 1] == b'/' + { + while idx < bytes.len() && bytes[idx] != b'\n' { + idx += 1; + } + continue; + } + + if style.line_comment_hash && bytes[idx] == b'#' { + while idx < bytes.len() && bytes[idx] != b'\n' { + idx += 1; + } + continue; + } + + if style.verbatim_strings + && idx + 1 < bytes.len() + && bytes[idx] == b'@' + && bytes[idx + 1] == b'"' + { + out.push('@'); + out.push('"'); + idx += 2; + string_state = Some(StringState::Verbatim); + continue; + } + + if style.triple_quotes && idx + 2 < bytes.len() { + if bytes[idx] == b'\'' && bytes[idx + 1] == b'\'' && bytes[idx + 2] == b'\'' { + out.push('\''); + out.push('\''); + out.push('\''); + idx += 3; + string_state = Some(StringState::TripleSingle); + continue; + } + if bytes[idx] == b'"' && bytes[idx + 1] == b'"' && bytes[idx + 2] == b'"' { + out.push('"'); + out.push('"'); + out.push('"'); + idx += 3; + string_state = Some(StringState::TripleDouble); + continue; + } + } + + match bytes[idx] { + b'\'' => { + out.push('\''); + string_state = Some(StringState::Single); + idx += 1; + } + b'"' => { + out.push('"'); + string_state = Some(StringState::Double); + idx += 1; + } + b'`' if style.backticks => { + out.push('`'); + string_state = Some(StringState::Backtick); + idx += 1; + } + _ => { + out.push(bytes[idx] as char); + idx += 1; + } + } + } + + out +} + +#[cfg(test)] +mod tests { + use super::*; + + // ── extract_literal_values ────────────────────────────────────────── + + #[test] + fn extract_literals_double_quoted() { + let vals = extract_literal_values(r#""hello", "world""#, false); + assert_eq!(vals, vec![r#""hello""#, r#""world""#]); + } + + #[test] + fn extract_literals_single_quoted() { + let vals = extract_literal_values("'abc', 'def'", false); + assert_eq!(vals, vec!["'abc'", "'def'"]); + } + + #[test] + fn extract_literals_backtick() { + let vals = extract_literal_values("`template ${var}`", false); + assert_eq!(vals, vec!["`template ${var}`"]); + } + + #[test] + fn extract_literals_escaped_quotes() { + let vals = extract_literal_values(r#""he said \"hi\"""#, false); + assert_eq!(vals, vec![r#""he said \"hi\"""#]); + } + + #[test] + fn extract_literals_numbers() { + let vals = extract_literal_values("42, -3.14, +1", false); + assert_eq!(vals, vec!["42", "-3.14", "+1"]); + } + + #[test] + fn extract_literals_nested_brackets() { + let vals = extract_literal_values(r#"["a", ["b", "c"]]"#, false); + assert_eq!(vals, vec![r#""a""#, r#""b""#, r#""c""#]); + } + + #[test] + fn extract_literals_nested_parens() { + let vals = extract_literal_values(r#"("x", ("y"))"#, false); + assert_eq!(vals, vec![r#""x""#, r#""y""#]); + } + + #[test] + fn extract_literals_nested_braces() { + let vals = extract_literal_values(r#"{"key": "val"}"#, false); + assert_eq!(vals, vec![r#""key""#, r#""val""#]); + } + + #[test] + fn extract_literals_mixed_nesting() { + let vals = extract_literal_values(r#"[{"a": "b"}, ("c")]"#, false); + assert_eq!(vals, vec![r#""a""#, r#""b""#, r#""c""#]); + } + + #[test] + fn extract_literals_empty_input() { + let vals = extract_literal_values("", false); + assert!(vals.is_empty()); + } + + #[test] + fn extract_literals_only_whitespace() { + let vals = extract_literal_values(" \t\n ", false); + assert!(vals.is_empty()); + } + + #[test] + fn extract_literals_unclosed_string() { + // Gracefully handles unclosed quote — takes everything to end + let vals = extract_literal_values(r#""unclosed"#, false); + assert_eq!(vals.len(), 1); + assert!(vals[0].starts_with('"')); + } + + #[test] + fn extract_literals_mismatched_brackets_does_not_panic() { + // Must not panic on mismatched brackets — result may be empty because + // the unclosed bracket consumes to EOF and the inner recursion only + // fires once the bracket is closed. + let _ = extract_literal_values(r#"["a", "b""#, false); + } + + #[test] + fn extract_literals_verbatim_string() { + let vals = extract_literal_values(r#"@"line1""line2""#, false); + assert_eq!(vals.len(), 1); + assert_eq!(vals[0], r#"@"line1""line2""#); + } + + #[test] + fn extract_literals_bare_values_when_allowed() { + let vals = extract_literal_values("foo, bar_baz", true); + assert_eq!(vals, vec!["foo", "bar_baz"]); + } + + #[test] + fn extract_literals_bare_values_rejected_when_disallowed() { + let vals = extract_literal_values("foo, bar", false); + assert!(vals.is_empty()); + } + + // ── strip_comments ────────────────────────────────────────────────── + + #[test] + fn strip_c_style_line_comment() { + let result = strip_comments("x = 1; // comment\ny = 2;", CommentStyle::c_style()); + assert_eq!(result, "x = 1; \ny = 2;"); + } + + #[test] + fn strip_c_style_block_comment() { + let result = strip_comments("a /* block */ b", CommentStyle::c_style()); + assert_eq!(result, "a b"); + } + + #[test] + fn strip_c_style_block_comment_multiline() { + let result = strip_comments("a /* line1\nline2 */ b", CommentStyle::c_style()); + assert_eq!(result, "a \n b"); + } + + #[test] + fn strip_hash_comment() { + let result = strip_comments("key = val # comment\nnext", CommentStyle::shell()); + assert_eq!(result, "key = val \nnext"); + } + + #[test] + fn strip_preserves_hash_inside_string() { + let result = strip_comments(r#"x = "has # inside""#, CommentStyle::shell()); + assert_eq!(result, r#"x = "has # inside""#); + } + + #[test] + fn strip_preserves_slash_inside_string() { + let result = strip_comments(r#"x = "has // inside""#, CommentStyle::c_style()); + assert_eq!(result, r#"x = "has // inside""#); + } + + #[test] + fn strip_python_triple_double_quotes() { + let result = strip_comments( + "x = 1\n\"\"\"docstring # not a comment\"\"\"\ny = 2", + CommentStyle::python(), + ); + assert!(result.contains("docstring # not a comment")); + assert!(result.contains("y = 2")); + } + + #[test] + fn strip_python_triple_single_quotes() { + let result = strip_comments("'''multi\nline'''# real comment", CommentStyle::python()); + assert!(result.contains("multi\nline")); + assert!(!result.contains("real comment")); + } + + #[test] + fn strip_csharp_verbatim_string() { + let style = CommentStyle::c_style().with_verbatim_strings(); + let result = strip_comments(r#"x = @"path\to\file" // comment"#, style); + assert!(result.contains(r#"@"path\to\file""#)); + assert!(!result.contains("comment")); + } + + #[test] + fn strip_backtick_template_preserves_content() { + let style = CommentStyle::c_style().with_backticks(); + let result = strip_comments("x = `template // not a comment`", style); + assert_eq!(result, "x = `template // not a comment`"); + } + + #[test] + fn strip_php_both_comment_styles() { + let result = strip_comments("a # hash\nb // slash\nc", CommentStyle::php()); + assert_eq!(result, "a \nb \nc"); + } + + #[test] + fn strip_escaped_quote_in_string() { + let result = + strip_comments(r#"x = "escaped \" quote" // comment"#, CommentStyle::c_style()); + assert!(result.contains(r#"escaped \" quote"#)); + assert!(!result.contains("comment")); + } + + #[test] + fn strip_no_comments_passthrough() { + let input = "let x = 42;\nlet y = \"hello\";"; + let result = strip_comments(input, CommentStyle::c_style()); + assert_eq!(result, input); + } + + // ── normalize_key / normalize_value ──────────────────────────────── + + #[test] + fn normalize_key_strips_prefix_symbols() { + assert_eq!(normalize_key("$var", false), "var"); + assert_eq!(normalize_key("@ivar", false), "ivar"); + } + + #[test] + fn normalize_key_extracts_last_segment() { + assert_eq!(normalize_key("self.password", false), "password"); + assert_eq!(normalize_key("obj::field", false), "field"); + } + + #[test] + fn normalize_key_keeps_full_when_requested() { + assert_eq!(normalize_key("self.password", true), "self.password"); + } + + #[test] + fn normalize_value_strips_quotes() { + assert_eq!(normalize_value(r#""hello""#, false), "hello"); + assert_eq!(normalize_value("'world'", false), "world"); + assert_eq!(normalize_value("`tmpl`", false), "tmpl"); + } + + #[test] + fn normalize_value_rejects_bare_when_not_allowed() { + assert_eq!(normalize_value("bareword", false), ""); + } + + #[test] + fn normalize_value_accepts_bare_when_allowed() { + assert_eq!(normalize_value("bareword", true), "bareword"); + } + + #[test] + fn normalize_value_accepts_numbers() { + assert_eq!(normalize_value("42", false), "42"); + assert_eq!(normalize_value("-3.14", false), "-3.14"); + } +} diff --git a/src/parser/queries.rs b/src/parser/queries.rs deleted file mode 100644 index 7213015..0000000 --- a/src/parser/queries.rs +++ /dev/null @@ -1,1105 +0,0 @@ -use rustc_hash::FxHashMap; -pub mod regex { - use super::*; - pub fn get_regex_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_regex_query".to_string(), QUERIES_REGEX.to_string()); - queries - } -} -pub mod bash { - use super::*; - pub fn get_bash_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_bash_query".to_string(), QUERIES_BASH.to_string()); - queries - } -} -pub mod c { - use super::*; - pub fn get_c_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_c_query".to_string(), QUERIES_C.to_string()); - queries - } -} -pub mod cpp { - use super::*; - pub fn get_cpp_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_cpp_query".to_string(), QUERIES_CPP.to_string()); - queries - } -} -pub mod css { - use super::*; - pub fn get_css_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_css_query".to_string(), QUERIES_CSS.to_string()); - queries - } -} -pub mod csharp { - use super::*; - pub fn get_csharp_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_csharp_query".to_string(), QUERIES_CSHARP.to_string()); - queries - } -} -pub mod ruby { - use super::*; - pub fn get_ruby_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_ruby_query".to_string(), QUERIES_RUBY.to_string()); - queries - } -} -pub mod rust { - use super::*; - pub fn get_rust_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_rust_query".to_string(), QUERIES_RUST.to_string()); - queries - } -} -pub mod yaml { - use super::*; - pub fn get_yaml_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_yaml_query".to_string(), QUERIES_YAML.to_string()); - queries - } -} -pub mod go { - use super::*; - pub fn get_go_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_go_query".to_string(), QUERIES_GO.to_string()); - queries - } -} -pub mod html { - use super::*; - pub fn get_html_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_html_query".to_string(), QUERIES_HTML.to_string()); - queries - } -} -pub mod java { - use super::*; - pub fn get_java_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_java_query".to_string(), QUERIES_JAVA.to_string()); - queries - } -} -pub mod javascript { - use super::*; - pub fn get_javascript_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_javascript_query".to_string(), QUERIES_JAVASCRIPT.to_string()); - queries - } -} -pub mod php { - use super::*; - pub fn get_php_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_php_query".to_string(), QUERIES_PHP.to_string()); - queries - } -} -pub mod python { - use super::*; - pub fn get_python_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_python_query".to_string(), QUERIES_PYTHON.to_string()); - queries - } -} -pub mod toml { - use super::*; - pub fn get_toml_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_toml_query".to_string(), QUERIES_TOML.to_string()); - queries - } -} -pub mod typescript { - use super::*; - pub fn get_typescript_queries() -> FxHashMap { - let mut queries = FxHashMap::default(); - queries.insert("combined_typescript_query".to_string(), QUERIES_TYPESCRIPT.to_string()); - queries - } -} -//////////////////////////////////////////////////////// -/// -pub const QUERIES_REGEX: &str = r#" -( - (non_capturing_group - (pattern - (alternation) - ) - ) - (anonymous_capturing_group - (pattern) @key - ) - (boundary_assertion)? @boundary -) -"#; -pub const QUERIES_BASH: &str = r#" - (variable_assignment - name: (variable_name) @key - value: [(string)(word)] @val - ) -"#; -pub const QUERIES_C: &str = r#" - ; Query 1: Matches variable declarations with string literal initializations - (declaration - declarator: (init_declarator - declarator: (identifier) @key - value: (string_literal) @val - ) - ) - - ; Query 2: Matches pointer variable declarations with string literal initializations - (declaration - declarator: (init_declarator - declarator: (pointer_declarator - declarator: (identifier) @key - ) - value: (string_literal) @val - ) - ) - - ; Query 3: Matches assignments to variables, pointers, or struct fields with string literals - (assignment_expression - left: [(identifier)(pointer_expression)(field_expression)] @key - right: (string_literal) @val - ) - - ; Query 4: Matches function calls with an identifier and string literal as arguments - (call_expression - function: (identifier) - arguments: (argument_list - (identifier) @key - (string_literal) @val - ) - ) - - ; Query 5: Matches struct initializations with field designators and string literal values - (initializer_pair - designator: (field_designator - (field_identifier) @key - ) - value: (string_literal) @val - ) - - ; Query 6: Matches struct declarations with pointer fields - (declaration - type: (struct_specifier - body: (field_declaration_list - (field_declaration - declarator: (pointer_declarator - declarator: (field_identifier) @key - ) - ) - ) - ) - ) - - ; Query 7: Matches initializer lists containing string literals - (declaration - declarator: (init_declarator - declarator: [(identifier)(array_declarator)(pointer_declarator)] @key - value: (initializer_list - (string_literal) @val - ) - ) - ) - - ; Query 8: Matches function calls with string literal arguments - (call_expression - function: (identifier) @key - arguments: (argument_list - (string_literal) @val - ) - ) - - ; Query 9: Matches array declarations with string literal initializations - (declaration - declarator: (init_declarator - declarator: (array_declarator - declarator: (identifier) @key - ) - value: (string_literal) @val - ) - ) -"#; -pub const QUERIES_CPP: &str = r#" - ; Query 1: Matches string declarations with literal initializations - ; Example: string s1 = "a string written here"; - (declaration - declarator: (init_declarator - declarator: (identifier) @key - value: (string_literal) @val - ) - ) - - ; Query 2: Matches char pointer declarations with string literal initializations - ; Example: char *line = "a string written here"; - (declaration - declarator: (init_declarator - declarator: (pointer_declarator - declarator: (identifier) @key - ) - value: (string_literal) @val - ) - ) - - ; Query 3: Matches assignments to variables or object fields with string literals - ; Examples: s1 = "a string written here"; - ; myObj.myString = "Some text"; - (assignment_expression - left: [(identifier)(field_expression)] @key - right: (string_literal) @val - ) - - ; Query 4: Matches assignments to dereferenced pointers with string literals - ; Example: *msg = "a string here"; - (assignment_expression - left: (pointer_expression - argument: (identifier) @key - ) - right: (string_literal) @val - ) - - ; Query 5: Matches string declarations with character and count initializations - ; Example: string s6 (15,'*'); - (declaration - declarator: (init_declarator - declarator: (identifier) @key - value: (argument_list - (char_literal) @val - ) - ) - ) - - ; Query 6: Matches function calls with identifier and string literal arguments - ; Example: strcpy(str, "this is a test"); - (call_expression - function: (identifier) - arguments: (argument_list - (identifier) @key - (string_literal) @val - ) - ) - - ; Query 7: Matches class field declarations with string literal default values - ; Example: string model = "test string 1"; - (field_declaration - declarator: (field_identifier) @key - default_value: (string_literal) @val - ) - - ; Query 8: Matches class field declarations of char pointers with string literal default values - ; Example: char *ch = "another test string"; - (field_declaration - declarator: (pointer_declarator - declarator: (field_identifier) @key - ) - default_value: (string_literal) @val - ) - - ; Query 9: Matches function calls with string literal arguments - ; Example: SomeFunction("Passing a string"); - (call_expression - function: (identifier) @key - arguments: (argument_list - (string_literal) @val - ) - ) - - ; Query 10: Matches char array declarations with string literal initializations - ; Example: char my_str[] = "Hello"; - (declaration - declarator: (init_declarator - declarator: (array_declarator - declarator: (identifier) @key - ) - value: (string_literal) @val - ) - ) - - ; Query 11: Matches struct initializer pairs with string literal values - ; Example: .password = "@pple123" - (initializer_pair - designator: (field_designator - (field_identifier) @key - ) - value: (string_literal) @val - ) - - ; Query 12: Matches struct declarations with char pointer fields and string literal initializations - ; Example: char* password; (in struct definition) - ; employee_default = {0, "29304!@$#201u3242"}; (in struct initialization) - (declaration - type: (struct_specifier - body: (field_declaration_list - (field_declaration - declarator: (pointer_declarator - declarator: (field_identifier) @key - ) - ) - ) - ) - declarator: (init_declarator - value: (initializer_list - (string_literal) @val - ) - ) - ) -"#; -pub const QUERIES_CSS: &str = r#" - ; Query 1: Matches CSS declarations with string values - (declaration - (property_name) @key - (string_value) @val - ) - - ; Query 2: Matches CSS declarations with function calls - (declaration - (property_name) @key - (call_expression - (arguments - (plain_value) @val - ) - ) - ) -"#; -pub const QUERIES_RUST: &str = r#" - ; Query 1: Matches let declarations with function calls containing string literal arguments - ; Example: let my_string = String::from("Hello, world!"); - (let_declaration - pattern: (identifier) @key - value: (_ - arguments: (arguments - (string_literal) @val - ) - ) - ) - - ; Query 2: Matches assignments to struct fields with function calls containing string literal arguments - ; Example: self.name = String::from("John Doe"); - (assignment_expression - left: (_ - field: (field_identifier) @key - ) - right: (call_expression - arguments: (arguments - (string_literal) @val - ) - ) - ) - - ; Query 3: Matches let declarations with direct string literal assignments - ; Example: let greeting = "Hello, Rust!"; - (let_declaration - pattern: (identifier) @key - value: (string_literal) @val - ) - - ; Query 4: Matches let declarations with macro invocations or other complex initializations - ; Example: let formatted = format!("Hello, {}!", name); - (let_declaration - pattern: (identifier) @key - value: (_ - (token_tree) @val - ) - ) -"#; -pub const QUERIES_YAML: &str = r#" - ; Query 1: Matches key-value pairs in YAML block mappings - ; Examples: - ; key: value - ; another_key: "quoted value" - ; third_key: 'single quoted value' - (block_mapping_pair - key: (flow_node - [(plain_scalar)(string_scalar)] @key - ) - value: (flow_node - [(plain_scalar)(string_scalar)(single_quote_scalar)(double_quote_scalar)] @val - ) - ) -"#; -pub const QUERIES_CSHARP: &str = r#" - ; Query 1: Matches assignments to object properties - ; Example: obj.PropertyName = "value"; - (assignment_expression - left: (member_access_expression - name: (identifier) @key - ) - right: (string_literal) @val - ) - - ; Query 2: Matches variable declarations with object creation and string argument - ; Example: var obj = new SomeClass("string value"); - (variable_declarator - (identifier) @key - (object_creation_expression - arguments: (argument_list - (argument - (string_literal) @val - ) - ) - - ) - ) - - ; Query 3: Matches variable declarations with string literals, verbatim strings, or interpolated strings - ; Examples: - ; string str = "Hello"; - ; string verbatim = @"C:\path\to\file"; - ; string interpolated = $"Hello, {name}"; - (variable_declaration - (variable_declarator - name: (identifier) @key - [(string_literal)(verbatim_string_literal)(interpolated_string_expression)] @val - - ) - ) - - ; Query 4: Matches variable declarations with string literal in an initializer expression - ; Example: SomeType var = new SomeType { StringProp = "value" }; - (assignment_expression - left: (identifier) @key - right: (string_literal) @val - ) -"#; -pub const QUERIES_RUBY: &str = r#" - ; Assignment with identifier or instance variable - (assignment - left: [(identifier)(instance_variable)] @key - right: [(string)(integer)] @val - ) - - ; Operator assignment - (operator_assignment - left: (identifier) @key - right: [(string)(integer)] @val - ) - - ; Assignment with method call on left side - (assignment - left: (call - method: (identifier) @key - ) - right: [(string)(integer)] @val - ) - - ; Assignment with right_assignment_list - (assignment - left: (identifier) @key - right: (right_assignment_list - [(string)(integer)] @val - ) - ) - - ; Assignment with method call and string argument - (assignment - left: (identifier) @key - right: (call - arguments: (argument_list - (string - (string_content) @val - ) - ) - ) - ) - - ; Assignment with constant - (assignment - left: (constant) @key - right: [(string)(integer)] @val - ) - - ; Method call with receiver and string argument - (call - receiver: (identifier) @key - method: (identifier) - arguments: (argument_list - (string - (string_content) @val - ) - ) - ) - - ; Assignment with method call and two string arguments - (assignment - left: (identifier) - right: (call - arguments: (argument_list - (string - (string_content) @key - ) - (string - (string_content) @val - ) - ) - ) - ) - - ; Method call with receiver and two string arguments - (call - receiver: (identifier) - method: (identifier) - arguments: (argument_list - (string - (string_content) @key - ) - (string - (string_content) @val - ) - ) - ) - - ; Method call with string argument - (call - method: (identifier) @key - arguments: (argument_list - (string) @val - ) - ) - - ; Assignment with string array - (assignment - left: (identifier) @key - right: (string_array - (bare_string - (string_content) @val - ) - ) - ) -"#; -pub const QUERIES_GO: &str = r#" - ; Query 1: Matches variable declarations with string literal values - (var_spec - name: (identifier) @key - value: (expression_list - [(interpreted_string_literal)(raw_string_literal)] @val - ) - ) - - ; Query 2: Matches short variable declarations with string literal values - (short_var_declaration - left: (expression_list - (identifier) @key - ) - right: (expression_list - [(interpreted_string_literal)(raw_string_literal)] @val - ) - ) - - ; Query 3: Matches assignment statements with string literal values - (assignment_statement - left: (expression_list - (identifier) @key - ) - right: (expression_list - [(interpreted_string_literal)(raw_string_literal)] @val - ) - ) - - ; Query 4: Matches short variable declarations with selector expressions and string literal values - (short_var_declaration - left: (expression_list - (selector_expression - field: (field_identifier) @key - ) - ) - right: (expression_list - [(interpreted_string_literal)(raw_string_literal)] @val - ) - ) - - ; Query 5: Matches assignment statements with selector expressions and string literal values - (assignment_statement - left: (expression_list - (selector_expression) @key - ) - right: (expression_list - [(interpreted_string_literal)(raw_string_literal)] @val - ) - ) - - ; Query 6: Matches variable specifications with optional type and string literal values - (var_spec - (identifier) @key - (type_identifier)? - "=" - (expression_list - [(interpreted_string_literal)(raw_string_literal)] @val - )+ - ) -"#; -pub const QUERIES_HTML: &str = r#" - ; Query 1: Matches HTML elements with text content - (element - (start_tag (tag_name) @key) - (text) @val - ) - - ; Query 2: Matches HTML attributes with quoted values - (attribute - (attribute_name) @key - (quoted_attribute_value - (attribute_value) @value - ) - ) - - ; Query 3: Extracts embedded JavaScript from script tags - (script_element - (start_tag) @key - (raw_text) @val - ) -"#; -pub const QUERIES_JAVA: &str = r#" - ; Query 1: Local variable declarations with direct string assignments - (local_variable_declaration - declarator: (variable_declarator - name: (identifier) @key - value: (string_literal) @val - ) - ) - - ; Query 2: Field declarations with direct string assignments - (field_declaration - declarator: (variable_declarator - name: (identifier) @key - value: (string_literal) @val - ) - ) - - ; Query 3: Identifier assignment with direct string literal - (assignment_expression - left: (identifier) @key - right: (string_literal) @val - ) - - ; Query 4: Field assignment with direct string literal - (assignment_expression - left: (field_access - field: (identifier) @key - ) - right: (string_literal) @val - ) - - ; Query 5: Local variable assignment from constructor call containing a string - (local_variable_declaration - declarator: (variable_declarator - name: (identifier) @key - value: (object_creation_expression - arguments: (argument_list - (string_literal) @val - ) - ) - ) - ) - - ; Query 6: Local variable assignment from method call containing a string - (local_variable_declaration - declarator: (variable_declarator - name: (identifier) @key - value: (method_invocation - arguments: (argument_list - (string_literal) @val - ) - ) - ) - ) -"#; -pub const QUERIES_JAVASCRIPT: &str = r#" - ; Query 1: Matches assignments to object properties - (assignment_expression - left: (member_expression - property: (property_identifier) @key - ) - right: [ - (string (string_fragment) @val) - (template_string) @val - ] - ) - - ; Query 2: Matches variable declarations with literal values - (variable_declarator - name: (identifier) @key - value: [ - (string (string_fragment) @val) - (template_string) @val - ] - ) - - ; Query 3: Matches variable declarations with object literals - (variable_declarator - name: (identifier) - value: (object - (pair - key: (property_identifier) @key - value: [ - (string (string_fragment) @val) - (template_string) @val - ] - ) - ) - ) - - ; Query 4: Matches function calls with identifier and string arguments - (call_expression - arguments: (arguments - (identifier) @key - [ - (string (string_fragment) @val) - (template_string) @val - ] - ) - ) - - ; Query 5: Matches object literal key-value pairs - (pair - key: (property_identifier) @key - value: [ - (string (string_fragment) @val) - (template_string) @val - ] - ) - - ; Query 6: Matches assignments to array or object elements - (assignment_expression - left: (subscript_expression - index: [(string)(identifier)] @key - ) - right: [ - (string (string_fragment) @val) - (template_string) @val - ] - ) - - ; Query 7: Matches method calls on objects with string arguments - (call_expression - function: (member_expression - object: (identifier) @key - ) - arguments: (arguments - [ - (string - (string_fragment) @val - ) - (template_string) @val - ] - ) - ) -"#; -pub const QUERIES_PHP: &str = r#" - ; Query 1: Matches variable assignments - (expression_statement - (assignment_expression - left: (variable_name - (name) @key - ) - right: [(string)(integer)] @val - ) - ) - - ; Query 2: Matches property declarations with initializers - (property_declaration - (property_element - (variable_name - (name) @key - ) - default_value: (string - (string_content) @val - ) - ) - ) - - ; Query 3: Matches assignments to object properties - (expression_statement - (assignment_expression - left: (member_access_expression - (name) @key - ) - right: [(string)(integer)] @val - ) - ) - - - ; Query 4: Matches method calls with string or integer arguments - (expression_statement - (member_call_expression - name: (name) @key - arguments: (arguments - (argument - [(string)(integer)] @val - ) - ) - ) - ) - - ; Query 5 - (expression_statement - (variable_name) @key - (_ - (name) @val - ) - ) - - ; Query 6 - (assignment_expression - left: (variable_name - (name) @key - ) - right: (encapsed_string - (string_content) @val - ) - ) - - ; Query 7 - (assignment_expression - left: (member_access_expression - name: (name) @key - ) - right: (encapsed_string - (string_content) @val - ) - ) -"#; -pub const QUERIES_PYTHON: &str = r#" - ; Query 1: Matches assignments to object attributes - (assignment - left: (attribute - attribute: (identifier) @key - ) - right: [(string)(integer)] @val - ) - - ; Query 2: Matches dictionary key-value pairs - (pair - key: (string) @key - value: [(string)(integer)(attribute)] @val - ) - - ; Query 3: Matches function calls with keyword arguments - (call - arguments: (argument_list - (keyword_argument - name: (identifier) @key - value: (string) @val - ) - ) - ) - - ; Query 4: Matches function calls with keyword arguments containing tuples, lists, or attributes - (call - arguments: (argument_list - (keyword_argument - name: (identifier) @key - value: [ - (tuple(attribute) @val) - (list(string) @val) - ] - ) - ) - ) - - ; Query 5: Matches assignments with function calls containing string or integer arguments - (expression_statement - (assignment - left: (_) @key - right: (expression_list - (call - arguments: (argument_list - [(string)(integer)] @val - ) - ) - ) - ) - ) - - ; Query 6: Matches assignments with dictionary values - (expression_statement - (assignment - left: (_) @key - right: (expression_list - (dictionary - (pair) @val - ) - ) - ) - ) - - ; Query 7: Matches function calls with keyword arguments containing lists of strings - (call - arguments: (argument_list - (keyword_argument - name: (identifier) @key - value: (list - (string) @val - ) - ) - ) - ) - - ; Query 8: Matches function calls with keyword arguments containing dictionaries - (call - arguments: (argument_list - (keyword_argument - name: (identifier) @key - value: (dictionary - (pair) @val - ) - ) - ) - ) - - ; Query 9: Matches function calls with keyword arguments containing tuples - (call - arguments: (argument_list - (keyword_argument - name: (identifier) @key - value: (tuple - (_) @val - ) - ) - ) - ) - - ; Query 10: Matches simple variable assignments - (assignment - left: (identifier) @key - right: [(string)(integer)] @val - ) - - ; Query 11: Matches assignments with function calls containing string arguments - (assignment - left: (identifier) @key - right: (call - arguments: (argument_list - (string) @val - ) - ) - ) - - ; Query 12: Matches assignments with method calls containing string arguments - (assignment - right: (call - function: (attribute) @key - arguments: (argument_list - (string) @val - ) - ) - ) - - ; Query 13: Matches string arguments in function calls - (call - function: (attribute) @key - arguments: (argument_list - (string - (string_content) @val - ) - ) - ) -"#; -pub const QUERIES_TOML: &str = r#" - ; Query 1: Matches key-value pairs - (pair - [(bare_key)(string)] @key - [(bare_key)(string)] @val - ) - - ; Query 2: Matches key-value pairs with array values containing strings - (pair - [(bare_key)(string)] @key - (array - (string) @val - ) - ) - - ; Query 3: Matches key-value pairs with nested array values containing strings - (pair - [(bare_key)(string)] @key - (array - (array - (string) @val - ) - ) - ) -"#; -pub const QUERIES_TYPESCRIPT: &str = r#" - ; Query 1: Matches variable declarations with string or number values - (variable_declarator - name: (identifier) @key - value: [(string)(template_string)(number)] @val - ) - - ; Query 2: Matches assignments to variables or object properties - (assignment_expression - left: [(member_expression)(identifier)] @key - right: [(string)(template_string)(number)] @val - ) - - ; Query 3: Matches variable declarations with string literal type annotations - (variable_declarator - name: (identifier) @key - type: (type_annotation - (literal_type - (string) @val - ) - ) - ) - - ; Query 4: Matches object property definitions with array values containing strings - (pair - key: (property_identifier) @key - value: ( - (array - [(string)(template_string)] @val - ) - ) - ) - - ; Query 5: Matches object property definitions with string or number values - (pair - key: (property_identifier) @key - value: [(string)(template_string)(number)] @val - ) - - ; Query 6: Matches property signatures with literal types - (property_signature - name: (property_identifier) @key - (_ - (literal_type) @val - ) - ) - - ; Query 7: Matches property signatures with union types - (property_signature - name: (property_identifier) @key - type: (type_annotation - (union_type) @val - ) - ) - - ; Query 8: Matches method calls with string arguments - (call_expression - function: (_ - property: (property_identifier) @key - ) - arguments: (arguments - [(string)(template_string)] @val - ) - ) -"#; diff --git a/src/reporter.rs b/src/reporter.rs index b01c6df..fde8aa4 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -12,6 +12,8 @@ use schemars::JsonSchema; use serde::Serialize; use url::Url; +use kingfisher_scanner::validation::http_validation::is_auto_provided_request_var; + use crate::{ access_map::{AccessSummary, AccessTokenDetails, ProviderMetadata, ResourceExposure}, blob::BlobMetadata, @@ -23,6 +25,7 @@ use crate::{ origin::{Origin, OriginSet}, rules::rule::Confidence, rules::Revocation, + template_vars::extract_template_vars, validation_body::{self, ValidationResponseBody}, }; mod bson_format; @@ -47,45 +50,6 @@ fn escape_for_shell(s: &str) -> String { format!("'{}'", s.replace('\'', "'\\''")) } -static TEMPLATE_BLOCK_RE: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - regex::Regex::new(r"\{\{\s*([^}]*)\}\}").expect("template block regex should compile") -}); - -static TEMPLATE_IDENT_RE: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - regex::Regex::new(r"[A-Za-z_][A-Za-z0-9_]*").expect("template identifier regex should compile") -}); - -const TEMPLATE_FILTER_NAMES: &[&str] = &[ - "append", - "b64enc", - "base62", - "crc32", - "crc32_hex", - "default", - "downcase", - "json_escape", - "prefix", - "replace", - "url_encode", -]; - -fn extract_template_vars(text: &str) -> BTreeSet { - let mut vars = BTreeSet::new(); - - for block_cap in TEMPLATE_BLOCK_RE.captures_iter(text) { - let inner = block_cap.get(1).map(|m| m.as_str()).unwrap_or_default(); - for ident_cap in TEMPLATE_IDENT_RE.captures_iter(inner) { - let ident = ident_cap.get(0).map(|m| m.as_str()).unwrap_or_default(); - if TEMPLATE_FILTER_NAMES.iter().any(|f| f.eq_ignore_ascii_case(ident)) { - continue; - } - vars.insert(ident.to_uppercase()); - } - } - - vars -} - fn required_vars_for_validation(validation: &crate::rules::Validation) -> BTreeSet { use crate::rules::Validation; let mut vars = BTreeSet::new(); @@ -133,11 +97,13 @@ fn required_vars_for_validation(validation: &crate::rules::Validation) -> BTreeS vars.insert("TOKEN".to_string()); vars.insert("CRED_NAME".to_string()); } - Validation::Raw(_) => { - vars.insert("TOKEN".to_string()); + Validation::Raw(raw) => { + vars.extend(kingfisher_scanner::validation::raw::required_vars(raw)); } } + vars.retain(|var| !is_auto_provided_request_var(var)); + vars } @@ -936,7 +902,11 @@ impl DetailsReporter { let validation_status = if rm.validation_success { "Active Credential".to_string() - } else if rm.validation_response_status == StatusCode::CONTINUE.as_u16() { + } else if matches!( + rm.validation_response_status, + status if status == StatusCode::CONTINUE.as_u16() + || status == StatusCode::PRECONDITION_REQUIRED.as_u16() + ) { "Not Attempted".to_string() } else { "Inactive Credential".to_string() @@ -1969,7 +1939,7 @@ mod tests { let (report_match, _) = sample_report_match( "(skip list entry) AWS validation not attempted for account 111122223333.", - StatusCode::CONTINUE.as_u16(), + StatusCode::PRECONDITION_REQUIRED.as_u16(), false, ); let scan_args = sample_scan_args(); diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs index 42e2327..7b572e7 100644 --- a/src/scanner/processing.rs +++ b/src/scanner/processing.rs @@ -6,7 +6,7 @@ use crate::{ blob::{Blob, BlobMetadata}, content_type::ContentInspector, location::LocationMapping, - matcher::{should_attempt_tree_sitter, Match, Matcher, OwnedBlobMatch, ScanResult}, + matcher::{should_attempt_context_verification, Match, Matcher, OwnedBlobMatch, ScanResult}, origin::{Origin, OriginSet}, scanner::repos::DatastoreMessage, Path, @@ -32,7 +32,7 @@ impl<'a> BlobProcessor<'a> { ) -> Result> { let _span = debug_span!("matcher", temp_id = blob.temp_id()).entered(); let t1 = Instant::now(); - let language_hint = if fast_mode || !should_attempt_tree_sitter(blob.len()) { + let language_hint = if fast_mode || !should_attempt_context_verification(blob.len()) { None } else { origin diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index cba8d33..088a556 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -766,9 +766,9 @@ async fn validate_single( om.validation_success = cached.is_valid; om.validation_response_body = cached.body.clone(); om.validation_response_status = cached.status; - if om.validation_success { + if om.validation_success && is_counted_validation_status(om.validation_response_status) { success_count.fetch_add(1, Ordering::Relaxed); - } else if om.validation_response_status != http::StatusCode::CONTINUE { + } else if is_counted_validation_status(om.validation_response_status) { fail_count.fetch_add(1, Ordering::Relaxed); } maybe_record_access_map(om, access_map); @@ -787,9 +787,10 @@ async fn validate_single( om.validation_success = cached.is_valid; om.validation_response_body = cached.body.clone(); om.validation_response_status = cached.status; - if om.validation_success { + if om.validation_success && is_counted_validation_status(om.validation_response_status) + { success_count.fetch_add(1, Ordering::Relaxed); - } else if om.validation_response_status != http::StatusCode::CONTINUE { + } else if is_counted_validation_status(om.validation_response_status) { fail_count.fetch_add(1, Ordering::Relaxed); } maybe_record_access_map(om, access_map); @@ -818,9 +819,10 @@ async fn validate_single( // Store result in cache match outcome { Ok(_) => { - if om.validation_success { + if om.validation_success && is_counted_validation_status(om.validation_response_status) + { success_count.fetch_add(1, Ordering::Relaxed); - } else if om.validation_response_status != http::StatusCode::CONTINUE { + } else if is_counted_validation_status(om.validation_response_status) { fail_count.fetch_add(1, Ordering::Relaxed); } cache.insert( @@ -849,6 +851,10 @@ async fn validate_single( } } +fn is_counted_validation_status(status: StatusCode) -> bool { + !matches!(status, StatusCode::CONTINUE | StatusCode::PRECONDITION_REQUIRED) +} + // Helper to compute the cache key for an OwnedBlobMatch fn build_cache_key( om: &OwnedBlobMatch, @@ -1358,3 +1364,16 @@ fn extract_azure_devops_org_from_body( let text = validation_body::clone_as_string(body); ORG_RE.captures(&text).and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())) } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn counted_validation_status_excludes_skipped_statuses() { + assert!(!is_counted_validation_status(StatusCode::CONTINUE)); + assert!(!is_counted_validation_status(StatusCode::PRECONDITION_REQUIRED)); + assert!(is_counted_validation_status(StatusCode::OK)); + assert!(is_counted_validation_status(StatusCode::UNAUTHORIZED)); + } +} diff --git a/src/template_vars.rs b/src/template_vars.rs new file mode 100644 index 0000000..dd7a277 --- /dev/null +++ b/src/template_vars.rs @@ -0,0 +1,151 @@ +use std::collections::BTreeSet; + +static TEMPLATE_BLOCK_RE: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + regex::Regex::new(r"\{\{\s*([^}]*)\}\}").expect("template block regex should compile") +}); + +const LIQUID_LITERAL_NAMES: &[&str] = &["blank", "empty", "false", "nil", "null", "true"]; + +pub(crate) fn extract_template_vars(text: &str) -> BTreeSet { + let mut vars = BTreeSet::new(); + + for block_cap in TEMPLATE_BLOCK_RE.captures_iter(text) { + let inner = block_cap.get(1).map(|m| m.as_str()).unwrap_or_default(); + for (segment_index, segment) in split_filter_segments(inner).into_iter().enumerate() { + collect_segment_vars(segment, segment_index != 0, &mut vars); + } + } + + vars +} + +fn split_filter_segments(inner: &str) -> Vec<&str> { + let mut segments = Vec::new(); + let mut start = 0; + let mut in_single = false; + let mut in_double = false; + let mut escaped = false; + + for (idx, ch) in inner.char_indices() { + if escaped { + escaped = false; + continue; + } + + match ch { + '\\' if in_single || in_double => escaped = true, + '\'' if !in_double => in_single = !in_single, + '"' if !in_single => in_double = !in_double, + '|' if !in_single && !in_double => { + segments.push(&inner[start..idx]); + start = idx + ch.len_utf8(); + } + _ => {} + } + } + + segments.push(&inner[start..]); + segments +} + +fn collect_segment_vars(segment: &str, skip_first_ident: bool, vars: &mut BTreeSet) { + let mut chars = segment.char_indices().peekable(); + let mut in_single = false; + let mut in_double = false; + let mut escaped = false; + let mut skipped_filter_name = !skip_first_ident; + + while let Some((idx, ch)) = chars.next() { + if escaped { + escaped = false; + continue; + } + + match ch { + '\\' if in_single || in_double => { + escaped = true; + continue; + } + '\'' if !in_double => { + in_single = !in_single; + continue; + } + '"' if !in_single => { + in_double = !in_double; + continue; + } + _ => {} + } + + if in_single || in_double || !is_ident_start(ch) { + continue; + } + + let mut end = idx + ch.len_utf8(); + while let Some(&(next_idx, next_ch)) = chars.peek() { + if !is_ident_continue(next_ch) { + break; + } + chars.next(); + end = next_idx + next_ch.len_utf8(); + } + + let ident = &segment[idx..end]; + if !skipped_filter_name { + skipped_filter_name = true; + continue; + } + + if LIQUID_LITERAL_NAMES.iter().any(|name| name.eq_ignore_ascii_case(ident)) { + continue; + } + + vars.insert(ident.to_ascii_uppercase()); + } +} + +fn is_ident_start(ch: char) -> bool { + ch.is_ascii_alphabetic() || ch == '_' +} + +fn is_ident_continue(ch: char) -> bool { + ch.is_ascii_alphanumeric() || ch == '_' +} + +#[cfg(test)] +mod tests { + use super::extract_template_vars; + use std::collections::BTreeSet; + + #[test] + fn ignores_filter_names_but_keeps_filter_argument_vars() { + let vars = extract_template_vars( + "{{ NEXT_PUBLIC_VERCEL_APP_CLIENT_ID | default: VERCEL_APP_CLIENT_ID | append: ':' | append: VERCEL_APP_CLIENT_SECRET | b64enc }}", + ); + + assert_eq!( + vars, + BTreeSet::from([ + "NEXT_PUBLIC_VERCEL_APP_CLIENT_ID".to_string(), + "VERCEL_APP_CLIENT_ID".to_string(), + "VERCEL_APP_CLIENT_SECRET".to_string(), + ]) + ); + } + + #[test] + fn ignores_literal_strings_and_new_filter_names() { + let vars = extract_template_vars( + r#"{{ "" | unix_timestamp_ms }} {{ "" | rfc1123_date }} {{ TOKEN | hmac_sha384_hex: SECRET }} {{ "https://example.com/oauth/callback" | url_encode }}"#, + ); + + assert_eq!(vars, BTreeSet::from(["SECRET".to_string(), "TOKEN".to_string()])); + } + + #[test] + fn ignores_liquid_literal_arguments() { + let vars = extract_template_vars(r#"{{ TOKEN | default: blank | append: FALLBACK }}"#); + + assert_eq!(vars, BTreeSet::from(["FALLBACK".to_string(), "TOKEN".to_string()])); + } +} diff --git a/src/validation.rs b/src/validation.rs index fd33662..804f7f2 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -614,10 +614,12 @@ async fn timed_validate_single_match<'a>( let request_timeout = validation_timeout; let multipart_timeout = validation_timeout; let max_retries: u32 = validation_retries; + let request_globals = httpvalidation::with_request_template_globals(&globals); + let cache_globals = httpvalidation::with_cache_key_template_globals(&globals); // render URL let url = match render_and_parse_url( parser, - &globals, + &request_globals, &rule_syntax.name, &http_validation.request.url, clients.allow_internal_ips, @@ -643,7 +645,7 @@ async fn timed_validate_single_match<'a>( &http_validation.request.body, request_timeout, parser, - &globals, + &request_globals, ) { Ok(rb) => rb, Err(e) => { @@ -660,10 +662,19 @@ async fn timed_validate_single_match<'a>( // old per-request cache (optional) if !is_multipart { + let cache_url = render_template( + parser, + &cache_globals, + &rule_syntax.name, + &http_validation.request.url, + ) + .await + .unwrap_or_else(|_| http_validation.request.url.clone()); + let rendered_headers = httpvalidation::process_headers( &http_validation.request.headers, parser, - &globals, + &cache_globals, &url, ) .unwrap_or_default(); @@ -681,12 +692,12 @@ async fn timed_validate_single_match<'a>( parser .parse(body_template) .ok() - .and_then(|template| template.render(&globals).ok()) + .and_then(|template| template.render(&cache_globals).ok()) }); cache_key = httpvalidation::generate_http_cache_key_parts( http_validation.request.method.as_str(), - &url, + &cache_url, &header_map, rendered_body.as_deref(), ); @@ -726,7 +737,7 @@ async fn timed_validate_single_match<'a>( if let Ok(mut headers) = httpvalidation::process_headers( &http_validation.request.headers, parser, - &globals, + &request_globals, &url, ) { // add realistic UA & accept headers @@ -752,7 +763,7 @@ async fn timed_validate_single_match<'a>( "file" => { let path = render_template( parser, - &globals, + &request_globals, &rule_syntax.name, &part.content, ) @@ -771,7 +782,7 @@ async fn timed_validate_single_match<'a>( "text" => { let txt = render_template( parser, - &globals, + &request_globals, &rule_syntax.name, &part.content, ) @@ -872,11 +883,12 @@ async fn timed_validate_single_match<'a>( // ---------------------------------------------------- gRPC validator Some(Validation::Grpc(grpc_validation_cfg)) => { let request_timeout = validation_timeout; + let request_globals = httpvalidation::with_request_template_globals(&globals); // Render URL let url = match render_and_parse_url( parser, - &globals, + &request_globals, &rule_syntax.name, &grpc_validation_cfg.request.url, clients.allow_internal_ips, @@ -899,7 +911,7 @@ async fn timed_validate_single_match<'a>( &grpc_validation_cfg.request.headers, &grpc_validation_cfg.request.body, parser, - &globals, + &request_globals, request_timeout, ) .await @@ -1309,7 +1321,7 @@ async fn timed_validate_single_match<'a>( "(skip list entry) AWS validation not attempted for account {}.", account_id )); - m.validation_response_status = StatusCode::CONTINUE; + m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; cache.insert( cache_key, CachedResponse { @@ -1481,11 +1493,28 @@ async fn timed_validate_single_match<'a>( } // --------------------------------------------------------- Raw / none Some(Validation::Raw(raw)) => { - debug!("Raw validation not implemented: {}", raw); - m.validation_success = false; - m.validation_response_body = - validation_body::from_string("Validator not implemented".to_string()); - m.validation_response_status = StatusCode::NOT_IMPLEMENTED; + match kingfisher_scanner::validation::raw::validate_raw( + raw, + &globals, + client, + clients.should_use_lax(rule_syntax.tls_mode), + clients.allow_internal_ips, + ) + .await + { + Ok(result) => { + m.validation_success = result.valid; + m.validation_response_body = validation_body::from_string(result.body); + m.validation_response_status = result.status; + } + Err(e) => { + debug!("Raw validation error for {}: {}", raw, e); + m.validation_success = false; + m.validation_response_body = + validation_body::from_string(format!("Raw validation error: {}", e)); + m.validation_response_status = StatusCode::BAD_GATEWAY; + } + } } None => { /* no validation specified */ } } diff --git a/src/validation_rate_limit.rs b/src/validation_rate_limit.rs index 8b1ac1c..e7413cb 100644 --- a/src/validation_rate_limit.rs +++ b/src/validation_rate_limit.rs @@ -117,8 +117,8 @@ fn selector_matches(rule_id: &str, selector: &str) -> bool { || rule_id.strip_prefix(selector).is_some_and(|suffix| suffix.starts_with('.')) } -pub fn should_rate_limit_validation(validation: &Validation) -> bool { - !matches!(validation, Validation::Raw(_)) +pub fn should_rate_limit_validation(_validation: &Validation) -> bool { + true } #[cfg(test)] @@ -175,7 +175,7 @@ mod tests { } #[test] - fn should_skip_rate_limit_for_raw_validation() { - assert!(!should_rate_limit_validation(&Validation::Raw("custom".to_string()))); + fn should_rate_limit_raw_validation() { + assert!(should_rate_limit_validation(&Validation::Raw("azurebatch".to_string()))); } } diff --git a/testdata/css_vulnerable.css b/testdata/css_vulnerable.css new file mode 100644 index 0000000..2a7040b --- /dev/null +++ b/testdata/css_vulnerable.css @@ -0,0 +1,8 @@ +.banner { + password: "blink182"; + background-image: url("all-along-the-watchtower"); +} + +.secret-key { + content: "abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234"; +} diff --git a/testdata/html_embedded_vulnerable.html b/testdata/html_embedded_vulnerable.html new file mode 100644 index 0000000..ecf3d0a --- /dev/null +++ b/testdata/html_embedded_vulnerable.html @@ -0,0 +1,16 @@ + + + + + + + + + diff --git a/testdata/html_vulnerable.html b/testdata/html_vulnerable.html new file mode 100644 index 0000000..0b1cbef --- /dev/null +++ b/testdata/html_vulnerable.html @@ -0,0 +1,7 @@ + + + +
hunter2
+ + + diff --git a/testdata/parsers/comment_only_context.py b/testdata/parsers/comment_only_context.py new file mode 100644 index 0000000..89344d6 --- /dev/null +++ b/testdata/parsers/comment_only_context.py @@ -0,0 +1,2 @@ +# auth0 token abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234 +# password = "superSecret123" diff --git a/testdata/parsers/context_verifier_golden.json b/testdata/parsers/context_verifier_golden.json new file mode 100644 index 0000000..7ce0f53 --- /dev/null +++ b/testdata/parsers/context_verifier_golden.json @@ -0,0 +1,455 @@ +{ + "bash:testdata/shell_vulnerable.sh": [ + "IPADDRESS = 8.8.8.8", + "PASSWORD = s3cr3tp@ssw0rd", + "PWD = a9lah209la81la3", + "PASSPHRASE = all along the watchtower", + "KEY = qpsbnoewdmdsoeg", + "SECRET_KEY = 402750613792034973", + "PRIVATE_KEY = ja4wALsaho20af21dS", + "another_password = blink182", + "backup_password = letmein123", + "API_KEY = 932" + ], + "c:testdata/c_vulnerable.c": [ + "id = 0", + "secret_key = my voice is my passport", + "employee_default = 0", + "employee_default = 8934#@hafRhzj13!d<2$F5q", + "age = 30", + "secret_key = John", + "strdup = John", + "password = Doe", + "strdup = Doe", + "msg = sunshine19", + "s1 = blink182", + "printf = values: %s; Age: %u\\n", + "age = 25", + "secret_key = 449a@QL#cha0213aKL:HF#@9;+_345Awd", + "strdup = 449a@QL#cha0213aKL:HF#@9;+_345Awd", + "printf = values: %s; Age: %u\\n", + "firstName = Marty", + "password = McFly", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "printf = values: %s; Age: %u\\n" + ], + "c_sharp:testdata/csharp_vulnerable.cs": [ + "user = John", + "user = Doe", + "user = john@email.com", + "User = John", + "User = Doe", + "User = john@email.com", + "John = Doe", + "FirsName = Bob", + "ipAddress = 8.8.8.8", + "String = 8.8.8.8", + "password = s3cr3tp@ssw0rd", + "String = s3cr3tp@ssw0rd", + "passwd = 9043hfdlasf023", + "String = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "String = a9lah209la81la3", + "password = all along the watchtower", + "String = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "String = qpsbnoewdmdsoeg", + "secretKey = 402750613792034973", + "String = 402750613792034973", + "privateKey = ja4wALsaho20af21dS", + "String = ja4wALsaho20af21dS", + "ip = 8.8.8.8", + "pass = s3cr3tp@ssw0rd 2", + "password = 9043hfdlasf023", + "secret = a9lah209la81la3", + "phrase = all along the watchtower", + "myKey = qpsbnoewdmdsoeg", + "secretKey = 402750613792034973", + "privateKey = ja4wALsaho20af21dS", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "hidden_passphrase = blink182", + "escaped = Hello \\\"World\\\"", + "name = John", + "firstName = John ", + "lastName = Doe", + "score = The score is {0}", + "score = 42", + "Format = The score is {0}", + "Format = 42" + ], + "cpp:testdata/cpp_vulnerable.cpp": [ + "my_api_key = foo", + "setMyNum = 15", + "setMyString = p@ssw0rd123", + "setSecretKey = 23847601237597123230895", + "secret_pass = my voice is my passport", + "temp_password = short line for testing", + "s5 = 6", + "s5 = 4", + "6 = 4", + "szHackerProof = 15", + "szHackerProof = *", + "15 = *", + "strForFunc = Passing a string" + ], + "css:testdata/css_vulnerable.css": [ + "password = blink182", + "background-image = url(", + "background-image = all-along-the-watchtower", + "content = abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234" + ], + "go:testdata/go_vulnerable.go": [ + "Println = hello world", + "ipAddress = 8.8.8.8", + "password = s3cr3tp@ssw0rd", + "passwd = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "passphrase = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "secret_key = 402750613792034973", + "private_key = ja4wALsaho20af21dS", + "ipAddress = 8.8.8.8", + "password = s3cr3tp@ssw0rd 2", + "passwd = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "passphrase = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "secret_key = 402750613792034973", + "private_key = ja4wALsaho20af21dS", + "ipAddress = 1a2w3eqwerty", + "password = space2001", + "passwd = space1958", + "pwd = qwertyuiop123", + "passphrase = trustno1", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "hidden_passphrase = blink182", + "badPassword = sunshine123", + "goodPassword = kingpin987", + "bestPassword = kingpin987", + "Printf = %s %s %s %s %s %s %s %s", + "AccessKey = 924JSR1PGW2D4MNRZX45", + "SecretKey = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", + "Println = >>done<<" + ], + "html:testdata/html_embedded_vulnerable.html": [ + "content = abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234", + "auth0_client_secret = abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234", + "password = superSecret123" + ], + "html:testdata/html_vulnerable.html": [ + "html = hunter2", + "data-api-key = html-key-123", + "secret_key = all along the watchtower", + "body = hunter2", + "password = blink182", + "div = hunter2", + "name = auth0_client_secret", + "content = abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234abcd1234" + ], + "java:testdata/java_vulnerable.java": [ + "ipAddress = 8.8.8.8", + "String = 8.8.8.8", + "password = s3cr3tp@ssw0rd", + "String = s3cr3tp@ssw0rd", + "passwd = 9043hfdlasf023", + "String = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "String = a9lah209la81la3", + "passphrase = all along the watchtower", + "String = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "String = qpsbnoewdmdsoeg", + "secret_key = 402750613792034973", + "String = 402750613792034973", + "private_key = ja4wALsaho20af21dS", + "String = ja4wALsaho20af21dS", + "ipAddress = 8.8.8.8", + "password = s3cr3tp@ssw0rd 2", + "passwd = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "passphrase = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "secret_key = 402750613792034973", + "private_key = ja4wALsaho20af21dS", + "ipAddress = 1a2w3eqwerty", + "password = grape1999", + "passwd = grape2020", + "pwd = qwertyuiop123", + "passphrase = trustno1", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "hidden_passphrase = blink182", + "println = Hello, World", + "strPassword = sunshine123", + "foobarPassword = kingpin987", + "horsePassword = kingpin987", + "ipAddress = 8.8.8.8", + "String = 8.8.8.8", + "password = s3cr3tp@ssw0rd", + "String = s3cr3tp@ssw0rd", + "passwd = 9043hfdlasf023", + "String = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "String = a9lah209la81la3", + "passphrase = all along the watchtower", + "String = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "String = qpsbnoewdmdsoeg", + "secret_key = 402750613792034973", + "String = 402750613792034973", + "private_key = ja4wALsaho20af21dS", + "String = ja4wALsaho20af21dS", + "ipAddress = 8.8.8.8", + "password = s3cr3tp@ssw0rd 2", + "passwd = 9043hfdlasf023", + "pwd = a9lah209la81la3", + "passphrase = all along the watchtower", + "key = qpsbnoewdmdsoeg", + "secret_key = 402750613792034973", + "private_key = ja4wALsaho20af21dS", + "ipAddress = 1a2w3eqwerty", + "password = grape87", + "passwd = grape2020", + "pwd = qwertyuiop123", + "passphrase = trustno1", + "println = Hello, World", + "put = 412389uSwYkRm1Tg!", + "put = fakefakefake@contoso.com", + "println = InitialDirContext" + ], + "javascript:testdata/javascript_vulnerable.js": [ + "name = chris", + "password = hunter2", + "password = foo123", + "person = Bob Doe", + "carName = Buick", + "price = 300", + "person = Bob Doe", + "person = Buick", + "person = 300", + "password = qwerty123", + "secret_key = this is a secret key", + "person = John Doe", + "person = John Doe", + "carName = Volvo", + "carName = Volvo", + "price = 200", + "this_password = correct horse battery staple", + "foobaz = 75", + "number = 42", + "newpassword = sunshine123", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "hidden_passphrase = blink182" + ], + "php:testdata/php_vulnerable.php": [ + "id = 4", + "lang = grape123", + "password = this_is_my_passport", + "v = Berne", + "v = Berne", + "v = Zurich1", + "api_key = 9823yrdfijo239jd3wsad30dj2d", + "v = trustno1", + "v = Genf", + "v = Geneva", + "v = Genève", + "property1 = Value 1", + "property2 = Value 2", + "property1 = property2", + "password = kingpin987", + "set_password = hunter2", + "set_color = Red", + "location = Essex", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "hidden_passphrase = blink182", + "sql = SELECT name, email FROM users WHERE id=$id", + "sql = SELECT name, email FROM users WHERE id=$id", + "color = beige", + "color = blue", + "comp = BMW", + "comp = Mercedes Benz" + ], + "python:testdata/parsers/comment_only_context.py": [], + "python:testdata/python_vulnerable.py": [ + "staticGroupID = 0", + "customClassUser = this_is_a_user_id", + "customClassPassword = rJl8QgApOjNfEiMWQUR", + "Accept = application/json", + "password = thisisabadpassword", + "print = Welcome to this demo program", + "default_password = qwerty123", + "AppPassword = b12c789b123bn12389", + "NotAnything = 12i7128931238912739712893", + "PleaseNoFalsePostive = joe123", + "another_password = blink182", + "another_password_again = blink182", + "backup_password = letmein123", + "name = Peter", + "age = 23", + "print = %s is %d years old", + "print = {} is {} years old", + "print = {name} is {age} years old", + "pypi_value_01 = pypi-AgEIcHlwaS5vcmcCAWEAAAYgNh9pJUqVF-EtMCwGaZYcStFR07RbE8hyb9h2vYxifO8", + "pypi_value_02 = pypi-AgEIcHlwaS5vcmcCAWIAAAYgxbyLvb9egSCECeOdB3qW3h4oXEoNC6kJI0NtaFOQlUY", + "pypi_value_03 = pypi-AgEIcHlwaS5vcmcCAWIAAAYgf_d_XvJfqkOhrkqbEBo-eW9UID46ABNJIdGfaO3n3_k", + "pypi_value_04 = pypi-AgEIcHlwaS5vcmcCAWIAAiV7InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiAidXNlciJ9AAAGIBeIJGhXk8kPPref7vLuwlKbnSWusZKZivIh92GRUUX4", + "pypi_value_05 = pypi-AgEIcHlwaS5vcmcCAWIAAi97InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogW119fQAABiBWHBa1jsbY-iN-Swf3JCrxy8Q8eRCxMrc_1KkkDuB6KQ", + "pypi_value_06 = pypi-AgENdGVzdC5weXBpLm9yZwIBYgACL3sidmVyc2lvbiI6IDEsICJwZXJtaXNzaW9ucyI6IHsicHJvamVjdHMiOiBbXX19AAAGIFYcFrWOxtj6I35LB_ckKvHLxDx5ELEytz_UqSQO4Hop" + ], + "ruby:testdata/ruby_vulnerable.rb": [ + "my_name = Roger Rabbit", + "my_number = 27", + "foo = My name is #{my_name} and my favorite number is #{my_number}.", + "foo = My name is #{my_name} and my favorite number is #{my_number}.", + "password = My voice is my passport:", + "password = Verify me ", + "password = MongoDB123", + "concat = Mongo", + "concat = DB", + "this_number = 23", + "this_word = rolling stone", + "aUser = Bicylops", + "aUser = Fleck", + "aUser = 260", + "aUser = Bicylops", + "aUser = Fleck", + "aUser = 260", + "new = Bicylops", + "new = Fleck", + "new = 260", + "Bicylops = Fleck", + "password = , ", + "password = , ", + "password = 123", + "send = password=", + "send = secret123", + "password= = secret123", + "my_api_key = 1", + "my_api_key = 1", + "my_api_key = SGwJgqnZYzH945UBWnauBuKXKLEhq5Le", + "my_api_key = 3", + "bVal = 88df97769ab3185f2c0b2a73fdae1b27d89409ca", + "bVal = 88df97769ab3185f2c0b2a73fdae1b27d89409ca", + "bVal = 3", + "bVal = car", + "GITHUB_KEY = 17df97169af3785f2c0b2a73dhba1c46f33928de", + "GITHUB_CLIENT_ID = Iv1.3e3354ce147fd412", + "GITHUB_APP_SECRET = 895b1da4051440395f90e1411c4a1150e423c922", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "hidden_passphrase = blink182" + ], + "rust:testdata/rust_vulnerable.rs": [ + "user = John", + "user = Doe", + "user = john@email.com", + "new = John", + "new = Doe", + "new = john@email.com", + "John = Doe", + "first_name = Bob", + "from = Bob", + "ip = 8.8.8.8", + "str = 8.8.8.8", + "pass = s3cr3tp@ssw0rd 2", + "str = s3cr3tp@ssw0rd 2", + "api_key = Hello \\\"World\\\"", + "str = Hello \\\"World\\\"", + "multiline = This is a \\nmultiline string literal", + "str = This is a \\nmultiline string literal", + "key_id = AKIA6ODU5DHT7VPXGCE4", + "str = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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", + "str = eD4++rSUVbOmDrRI7EDLmskuwpAAddEA0WNwu+fI", + "hidden_passphrase = blink182", + "str = blink182", + "name = John", + "str = John", + "first_name = John ", + "str = John ", + "last_name = Doe", + "str = Doe" + ], + "toml:testdata/toml_vulnerable.toml": [ + "name = vvp.auth.oidc.registration.clientSecret", + "valueFrom.secretKeyRef.name = mysecrets", + "valueFrom.secretKeyRef.key = oidc", + "name = spring.datasource.password", + "valueFrom.secretKeyRef.name = mysecrets", + "valueFrom.secretKeyRef.key = jdbc", + "name = vvp.auth.bootstrapToken.token", + "valueFrom.secretKeyRef.name = mysecrets", + "valueFrom.secretKeyRef.key = blink182", + "private_key = all along the watchtower", + "my_private_key = ja4wALsaho20af21dS", + "kind = Opaque", + "password = dG9wLVNlY3JldA==", + "jdbc = dG9wLVNlY3JldA==", + "my_unique_authorization_key = dG9wLVNlY3JldA==", + "aws_key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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" + ], + "typescript:testdata/typescript_vulnerable.ts": [ + "say = a bird in hand > two in the bush", + "html =
I would just like to say : ${say}
", + "bob_password = allthesecretsarehere", + "sally_password = superSecret123", + "i = 0", + "i = 0", + "i = +", + "i = +", + "for = 0", + "for = +", + "for = +", + "0 = +", + "replace = &", + "replace = /g, ", + "replace = 39", + "replace = <", + "replace = >", + "result = -", + "result = 1", + "password = chicken", + "person = Bob Doe", + "carName = Buick", + "price = 300", + "person = Bob Doe", + "person = Buick", + "person = 300", + "password = qwerty123", + "secret_key = this is a secret key", + "person = John Doe", + "person = John Doe", + "carName = Volvo", + "carName = Volvo", + "price = 200", + "this_password = correct horse battery staple", + "newpassword = sunshine123" + ], + "yaml:testdata/yaml_vulnerable.yaml": [ + "name = vvp.auth.oidc.registration.clientSecret", + "name = mysecrets", + "key = oidc", + "name = spring.datasource.password", + "name = mysecrets", + "key = jdbc", + "name = vvp.auth.bootstrapToken.token", + "name = mysecrets", + "key = blink182", + "apiVersion = v1", + "kind = Secret", + "private_key = all along the watchtower", + "my_private_key = ja4wALsaho20af21dS", + "type = Opaque", + "password = dG9wLVNlY3JldA==", + "jdbc = dG9wLVNlY3JldA==", + "my_unique_authorization_key = dG9wLVNlY3JldA==", + "aws_key_id = AKIA6ODU5DHT7VPXGCE4", + "aws_secret = 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" + ] +} diff --git a/testdata/parsers/scan_findings_baseline.json b/testdata/parsers/scan_findings_baseline.json new file mode 100644 index 0000000..f10b56f --- /dev/null +++ b/testdata/parsers/scan_findings_baseline.json @@ -0,0 +1,150 @@ +[ + { + "rule_id": "kingfisher.aws.2", + "snippet": "eD4++rSUVbOmDrRI7EDLmskuwpAAddEA0WNwu+fI" + }, + { + "rule_id": "kingfisher.credentials.1", + "snippet": "gitlab-ci-token" + }, + { + "rule_id": "kingfisher.github.8", + "snippet": "17df97169af3785f2c0b2a73dhba1c46f33928de" + }, + { + "rule_id": "kingfisher.google.7", + "snippet": "AIzaSyBUPHAjZl3n8Eza66ka6B78iVyPteC5MgM" + }, + { + "rule_id": "kingfisher.pem.1", + "snippet": "MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShz A2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3 q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGA MeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCV UpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS /70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/X QZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62X Rr1VAkEAljSLsMOk5H7XCctEk3mCu1WgDtUvb/RRCBiBT+cic14OpVtytJMAeLeq cAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/Mhrc F+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/T FnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0I kdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vDk6tKaqK Ic3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw= " + }, + { + "rule_id": "kingfisher.pem.1", + "snippet": "MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShzA2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGAMeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCVUpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS/70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/XQZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62XRr1VAkEAljSLsMOk5H7XCctEk3mCu1WgCsUvb/RRCBiBT+cic14OpVtytJMAeLeqcAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/MhrcF+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/TFnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0IkdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vCi5tKaqKIc3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw=" + }, + { + "rule_id": "kingfisher.pem.1", + "snippet": "\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQChoGF4j4AUnAfj\\nbVGP/tSJqAyeYiZfOf4UCwd9+B/2oej3rsiuZmx506kuWVN4Jhg8UocLn5l/OfqU\\n2MyV3Mq5VjtGQjYWF7a/Y04yEMRWf+spiJp1iYGS1vTOVjuyYyMa9h+8sbDiBFAD\\nBcZejB4FQHxstFtmlnehf7cieMLTa3Wezv8LX8pH0q+pEynuvusQkhe8uPmjUsuo\\nWG5W5CgVchQVzQf9eB5xtyt85t6VozMvAEI4h+WwZRdn+EWrQi+z8A8vXF7iUDmu\\n2lpypLExcZBrZINMh8ecs8B34JNIYzO4Hod7RB4IwXN8PG/5RHlb7qQbzXSxir2B\\n17gPPf8JAgMBAAECggEAHbkdG7sGIqQkJjypInpKc0tKkMj7hgkn8t8pYE7kb+qM\\nKZqE0N/IpKnaY8ntGfwlelhx+d7+r0FGFh/9lbTOOkHDslLEWBFB3BYC4B2pwb+S\\nC2gSAboJMGwkBpsgrNhi8RcgtIaYASSqYzfpaGNLtQsMJsCPS4Ex3GscjnQXXiJK\\n5MExF8VYZVvT8Hq2lvECUpFMTWwM2o/QndwjLrEq/vRI3n7PmweXZGKgLuyOjpWk\\ny80qa/IUlB6xO4XHvjnaEGxRq1LSF8hgEGU2Nmd8GDRT5ZLkSk+TMtqPrEbHEi6n\\n4pZGndX0XmttWkKcUX/NwB/WZC5ROEsUl8Fyw+T5RQKBgQDMfgFB6Xx+Na2iB33w\\nkhzNxo4HPCJzxeAB0zCRpfDpM1GtqK6JsIxvrci5lDAKaP8TQTr/gQxXpbJjE1Dl\\n3VWGzFbW4czSw+AqBFl1he20RZhGjATcDCCzSOyEiRhqoJwTPTvqcXRK8NbKGfJR\\nV6b4Auw+McNhnEUyfrZzguV93QKBgQDKVlLPhb4O84mINKFK73QFf2xlns0IHI0m\\nWqNvY7HxJP9WUH5FgX4r/cO6aIafg+u5j0gNPDd2JD67htnY85EH/n5KNhb9ytsN\\n+hkDeidFvdOrD+h9YFHkNoNy3XHwrQ0mtYRj2FBWhhpBsVlHVO2KcLe0TvivinN2\\nfIac2uZhHQKBgAYE23KeNbzdRZwUTl+rXU+tPXb3DSiNNXe4SKCw2rNygD/1TBXf\\nbXLIEbVsqDFWP9PIQr1Mhhl6VhLWebYaWq8aCqBOiyHVBB8Ye62a4JFCzyWcb3Qu\\nozPDvLp18pMI4S8ryTywVDT0e839D4XXZ6G7LEr0WgTgfaTr1+D0hF69AoGBAKIQ\\nxKGeAV6eaOGlLjAEXgztRFic+qLto409+jyFQQji1nY/YPSxROtdhkGv6WypUM0/\\nW7nmKpJBc9HmsGUaqmcZy/QLIR1FN3IZiaGEXSJ6aqlQw6pw1QcTNvRxNQtOwQLp\\nT1Jd9/Nl1HAb6mO9PcqugCY3Pu/z2InmMjg/CVptAoGAMpwMsoen4xEHv4uGZVt8\\n8wlvQ2fYnso4wgRSYAkjh8cOHjB85eazlSAsaJvmQ9D1rV086Re5zKxKjrjQWdaT\\nRMyIZJMJYZr6c8RKmabOfO1oc5urDdETQjGi3qXJuiu86wp7IoBINdmBEPRl6+m3\\nGqJA6hgV5niKAq4sJtv9EW4=\\n" + }, + { + "rule_id": "kingfisher.privkey.2", + "snippet": "-----BEGIN PRIVATE KEY-----\\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQChoGF4j4AUnAfj\\nbVGP/tSJqAyeYiZfOf4UCwd9+B/2oej3rsiuZmx506kuWVN4Jhg8UocLn5l/OfqU\\n2MyV3Mq5VjtGQjYWF7a/Y04yEMRWf+spiJp1iYGS1vTOVjuyYyMa9h+8sbDiBFAD\\nBcZejB4FQHxstFtmlnehf7cieMLTa3Wezv8LX8pH0q+pEynuvusQkhe8uPmjUsuo\\nWG5W5CgVchQVzQf9eB5xtyt85t6VozMvAEI4h+WwZRdn+EWrQi+z8A8vXF7iUDmu\\n2lpypLExcZBrZINMh8ecs8B34JNIYzO4Hod7RB4IwXN8PG/5RHlb7qQbzXSxir2B\\n17gPPf8JAgMBAAECggEAHbkdG7sGIqQkJjypInpKc0tKkMj7hgkn8t8pYE7kb+qM\\nKZqE0N/IpKnaY8ntGfwlelhx+d7+r0FGFh/9lbTOOkHDslLEWBFB3BYC4B2pwb+S\\nC2gSAboJMGwkBpsgrNhi8RcgtIaYASSqYzfpaGNLtQsMJsCPS4Ex3GscjnQXXiJK\\n5MExF8VYZVvT8Hq2lvECUpFMTWwM2o/QndwjLrEq/vRI3n7PmweXZGKgLuyOjpWk\\ny80qa/IUlB6xO4XHvjnaEGxRq1LSF8hgEGU2Nmd8GDRT5ZLkSk+TMtqPrEbHEi6n\\n4pZGndX0XmttWkKcUX/NwB/WZC5ROEsUl8Fyw+T5RQKBgQDMfgFB6Xx+Na2iB33w\\nkhzNxo4HPCJzxeAB0zCRpfDpM1GtqK6JsIxvrci5lDAKaP8TQTr/gQxXpbJjE1Dl\\n3VWGzFbW4czSw+AqBFl1he20RZhGjATcDCCzSOyEiRhqoJwTPTvqcXRK8NbKGfJR\\nV6b4Auw+McNhnEUyfrZzguV93QKBgQDKVlLPhb4O84mINKFK73QFf2xlns0IHI0m\\nWqNvY7HxJP9WUH5FgX4r/cO6aIafg+u5j0gNPDd2JD67htnY85EH/n5KNhb9ytsN\\n+hkDeidFvdOrD+h9YFHkNoNy3XHwrQ0mtYRj2FBWhhpBsVlHVO2KcLe0TvivinN2\\nfIac2uZhHQKBgAYE23KeNbzdRZwUTl+rXU+tPXb3DSiNNXe4SKCw2rNygD/1TBXf\\nbXLIEbVsqDFWP9PIQr1Mhhl6VhLWebYaWq8aCqBOiyHVBB8Ye62a4JFCzyWcb3Qu\\nozPDvLp18pMI4S8ryTywVDT0e839D4XXZ6G7LEr0WgTgfaTr1+D0hF69AoGBAKIQ\\nxKGeAV6eaOGlLjAEXgztRFic+qLto409+jyFQQji1nY/YPSxROtdhkGv6WypUM0/\\nW7nmKpJBc9HmsGUaqmcZy/QLIR1FN3IZiaGEXSJ6aqlQw6pw1QcTNvRxNQtOwQLp\\nT1Jd9/Nl1HAb6mO9PcqugCY3Pu/z2InmMjg/CVptAoGAMpwMsoen4xEHv4uGZVt8\\n8wlvQ2fYnso4wgRSYAkjh8cOHjB85eazlSAsaJvmQ9D1rV086Re5zKxKjrjQWdaT\\nRMyIZJMJYZr6c8RKmabOfO1oc5urDdETQjGi3qXJuiu86wp7IoBINdmBEPRl6+m3\\nGqJA6hgV5niKAq4sJtv9EW4=\\n-----END PRIVATE KEY-----" + }, + { + "rule_id": "kingfisher.privkey.2", + "snippet": "-----BEGIN RSA PRIVATE KEY-----MIICWQIBAAKBgHsSuRPLMDrxcwMB9P6ubGFGmlSvHvSXq2kfwycrcEKf/TCctShzA2HYo2IWed8n1rqazlESHnhNmCWlFWIMMFWagZyDBy9yy71MhWISvoTuQVyCx/z3q1v171fy+Ds5smKwZ8wK3bgwBTR7BTKfYNmearDZvPJgwK0jsYEJDZ/DAgElAoGAMeT+7FlK53akP31VfAFG4j83pcp0VVI+kmbSk1bMpWN0e33M5uKE1KPvNZpowkCVUpHJQ3YMWkj4ffbRUUM2L/jQmKkICf7vynIdq5cj+lF6lNXSzwq6pVR6/octdeKS/70DuGcVG+LiRTu2mRb6mPY9bIJIvcgenXajnVanx9UCQQDRwf6oyU/EH4x+kw/XQZi/RebtDPD1yIQuhVG8B1xkPxBsAywTwVDL7DSZ1BsbWJcl5HcXt/q0n/3NZ62XRr1VAkEAljSLsMOk5H7XCctEk3mCu1WgCsUvb/RRCBiBT+cic14OpVtytJMAeLeqcAhIj54ef4hQPGKbAsQZ3E/X4EsotwJAa7alXZfPA9jZcW4c5Ciai7wcoz3/MhrcF+OYrKnVf5YBg5LtHua6yZT4aqswg6oIbWd7bQty5yG5rqrcmcphOQJAHGrOUd/TFnjckyZ0wfRk11VjeG2Fg+IdKwuOFgkiMYB/T7da4+R1tfk7666KRK82M82uUJ0IkdISuvpZRhwOnwJBAI34lnrN4bNcUVB5kAXT9huyH8tJomNdsJOufS3vCi5tKaqKIc3jMIwtyuXsn4NhJNUFlgfPL70CPtb3x/eePqw=-----END RSA PRIVATE KEY-----" + }, + { + "rule_id": "kingfisher.pypi.1", + "snippet": "pypi-AgEIcHlwaS5vcmcCAWEAAAYgNh9pJUqVF-EtMCwGaZYcStFR07RbE8hyb9h2vYxifO8" + }, + { + "rule_id": "kingfisher.pypi.1", + "snippet": "pypi-AgEIcHlwaS5vcmcCAWIAAAYgf_d_XvJfqkOhrkqbEBo-eW9UID46ABNJIdGfaO3n3_k" + }, + { + "rule_id": "kingfisher.pypi.1", + "snippet": "pypi-AgEIcHlwaS5vcmcCAWIAAAYgxbyLvb9egSCECeOdB3qW3h4oXEoNC6kJI0NtaFOQlUY" + }, + { + "rule_id": "kingfisher.pypi.1", + "snippet": "pypi-AgEIcHlwaS5vcmcCAWIAAi97InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiB7InByb2plY3RzIjogW119fQAABiBWHBa1jsbY-iN-Swf3JCrxy8Q8eRCxMrc_1KkkDuB6KQ" + }, + { + "rule_id": "kingfisher.pypi.1", + "snippet": "pypi-AgEIcHlwaS5vcmcCAWIAAiV7InZlcnNpb24iOiAxLCAicGVybWlzc2lvbnMiOiAidXNlciJ9AAAGIBeIJGhXk8kPPref7vLuwlKbnSWusZKZivIh92GRUUX4" + }, + { + "rule_id": "kingfisher.slack.1", + "snippet": "xapp-1-A01C259PH2A-1440755929120-7d5241948a2cc1b464add85df8a8e75f9040ae2869f6599926ed0b9dcafdb32b" + }, + { + "rule_id": "kingfisher.slack.1", + "snippet": "xapp-1-A01SURJVBLJ-1936696714400-FAKE1f53b593f2951c547e39dd5e1d39aae8d142daff1e94a64af304334fe04f" + }, + { + "rule_id": "kingfisher.slack.1", + "snippet": "xapp-1-A0219JRGYSF-2049594540292-FAKE4796aa92658d4e0ae36cae694ffeb7bf1c87d80347b4ef74169433b55345" + }, + { + "rule_id": "kingfisher.slack.1", + "snippet": "xapp-1-B42342KL2RLY-2936428313672-FAKE8a4e42c6dc16000cb84fcFAKE3ba456b65b3560729178b2126d9153498037" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxa-2-B6342RL2UNF-2936428303672-FAKE8a4e42c6dc16000cb84fcFAKE3ba456b65b3560729178b2126d9153498037" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxa-2-B7342RL2UNF-2936428303672-FAKE8a4e42c6dc16000cb84fcFAKE3ba456b65b3560729178b2126d9153498037" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-034302345987-336503610493-FAKEvWppeEYXx5TsvScfAAwl" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-138060324327-1855530675702-FAKEZxYAIfI7Jrv8hxODBm5k" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-229090314224-691247287811-FAKE5lrlR3O9eYVKf4eKpras" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-235060315121-1909810446613-FAKE1NuEz5KXRsCBwEUzjiRt" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-494126390276-1259618305827-FAKE53z2wripYKAm4xPAsPRK" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-689144892354-720001127957-FAKE4lK3kSc08oebIvZdPWG4" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxb-730191371696-1413868247813-IG7Z6nYevC2hdviE3aJhb5kY" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxo-523423-234243-234233-e039d02840a0b9379c" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxp-523423-234243-234233-e039d02840a0b9379c" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxp-677471389651-618638257620-FAKE17772739-5da7b6942285" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxr-523423-234243-234233-e039d02840a0b9379c" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxr-B2342KL8RJT-2931428303672-FAKE8a4e42c6dc16000cb84fcFAKE3ba456b65b3560729178b2126d9153498037" + }, + { + "rule_id": "kingfisher.slack.2", + "snippet": "xoxs-523423-234243-234233-e039d02840a0b9379c" + }, + { + "rule_id": "kingfisher.slack.4", + "snippet": "https://hooks.slack.com/services/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" + }, + { + "rule_id": "kingfisher.stripe.1", + "snippet": "pk_live_bu9JFVJtII3FINL1rOKcNpveXD4hSMtSDx7opOWDEFGHIJKLMNOPQRSTUVWX" + }, + { + "rule_id": "kingfisher.stripe.2", + "snippet": "rk_live_z59MoCJoFc114PpJlP1OnB1O" + }, + { + "rule_id": "kingfisher.stripe.2", + "snippet": "sk_live_bu9JFVJtII3FINL1rOKcNpveXD4hSMtSDx7opOWDEFGHIJKLMNOPQRST" + } +] diff --git a/tests/dependent_rule_dedup.rs b/tests/dependent_rule_dedup.rs index 273468e..003e55b 100644 --- a/tests/dependent_rule_dedup.rs +++ b/tests/dependent_rule_dedup.rs @@ -145,3 +145,60 @@ fn dedup_still_merges_non_dependency_rules_across_blobs() -> Result<()> { Ok(()) } + +#[test] +fn dedup_uses_a_stable_canonical_representative() -> Result<()> { + let rule = make_rule("RULE.SIMPLE", vec![]); + + let make_store = |rule: &Arc| { + let mut store = FindingsStore::new(PathBuf::from("/tmp")); + store.record_rules(&[rule.clone()]); + store + }; + + let origin_a = Arc::new(OriginSet::single(Origin::from_file(PathBuf::from("a.txt")))); + let origin_z = Arc::new(OriginSet::single(Origin::from_file(PathBuf::from("z.txt")))); + let blob_a = Arc::new(BlobMetadata { + id: BlobId::new(b"blob-a"), + num_bytes: 10, + mime_essence: None, + language: None, + }); + let blob_z = Arc::new(BlobMetadata { + id: BlobId::new(b"blob-z"), + num_bytes: 10, + mime_essence: None, + language: None, + }); + + let forward = vec![ + record_match(&origin_z, &blob_z, make_match(rule.clone(), blob_z.id, "shared_token")), + record_match(&origin_a, &blob_a, make_match(rule.clone(), blob_a.id, "shared_token")), + ]; + let reverse = vec![ + record_match(&origin_a, &blob_a, make_match(rule.clone(), blob_a.id, "shared_token")), + record_match(&origin_z, &blob_z, make_match(rule.clone(), blob_z.id, "shared_token")), + ]; + + let mut forward_store = make_store(&rule); + forward_store.record(forward, true); + + let mut reverse_store = make_store(&rule); + reverse_store.record(reverse, true); + + for store in [&forward_store, &reverse_store] { + assert_eq!(store.get_matches().len(), 1); + + let (origin, blob, matched) = &*store.get_matches()[0]; + assert_eq!(origin.len(), 2, "duplicate findings should merge origins"); + assert_eq!( + origin.first().full_path().as_deref(), + Some(PathBuf::from("a.txt").as_path()), + "the lexicographically smallest path should be the representative", + ); + assert_eq!(blob.id, blob_a.id); + assert_eq!(matched.blob_id, blob_a.id); + } + + Ok(()) +} diff --git a/tests/int_base64.rs b/tests/int_base64.rs index d0fc980..bb1661a 100644 --- a/tests/int_base64.rs +++ b/tests/int_base64.rs @@ -61,7 +61,7 @@ fn skips_base64_when_disabled() -> anyhow::Result<()> { Ok(()) } -// Ensure disabling Base64 decoding does not trigger tree-sitter errors on empty files +// Ensure disabling Base64 decoding does not trigger context verifier errors on empty files #[test] fn no_base64_skips_empty_files() -> anyhow::Result<()> { let dir = tempdir()?; @@ -87,9 +87,9 @@ fn no_base64_skips_empty_files() -> anyhow::Result<()> { Ok(()) } -// Ensure tree-sitter based decoding works even when the standalone base64 scanner is disabled +// Ensure parser-based context extraction still surfaces base64-looking code assignments #[test] -fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> { +fn detects_base64_in_code_with_context_verifier() -> anyhow::Result<()> { let dir = tempdir()?; let file_path = dir.path().join("secret.py"); // Base64 for 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 diff --git a/tests/int_context_verification.rs b/tests/int_context_verification.rs new file mode 100644 index 0000000..b1d8c3e --- /dev/null +++ b/tests/int_context_verification.rs @@ -0,0 +1,121 @@ +use std::{ffi::OsString, fs, path::Path, process::Command}; + +use anyhow::{Context, Result}; +use serde_json::{Deserializer, Value}; + +fn scan_inputs_without_parser_fixtures() -> Result> { + let mut inputs = fs::read_dir("testdata") + .context("read testdata directory")? + .map(|entry| { + let entry = entry.context("read testdata entry")?; + let path = entry.path(); + Ok((entry.file_name(), path)) + }) + .collect::>>()?; + + inputs.sort_by(|left, right| left.0.cmp(&right.0)); + + Ok(inputs + .into_iter() + .filter_map(|(name, path)| { + (name != OsString::from("parsers")).then_some(path.into_os_string()) + }) + .collect()) +} + +#[test] +fn scan_findings_match_pre_removal_baseline() -> Result<()> { + let mut args = vec![OsString::from("scan")]; + args.extend(scan_inputs_without_parser_fixtures()?); + args.extend([ + OsString::from("--format"), + OsString::from("json"), + OsString::from("--no-validate"), + OsString::from("--no-update-check"), + OsString::from("--no-dedup"), + ]); + + let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args(&args) + .output() + .context("run kingfisher scan against testdata inputs without parser fixtures")?; + + let code = output.status.code().unwrap_or_default(); + assert!( + matches!(code, 0 | 200), + "expected exit code 0 or 200, got {code}. stderr:\n{}", + String::from_utf8_lossy(&output.stderr) + ); + + let stdout = String::from_utf8(output.stdout).context("scan stdout is not valid utf-8")?; + let mut stream = Deserializer::from_str(&stdout).into_iter::(); + let value = stream + .next() + .transpose() + .context("parse scan json output")? + .context("scan output did not contain a json object")?; + + let findings = value + .get("findings") + .and_then(Value::as_array) + .context("scan output missing findings array")?; + + // This baseline is meant to verify the secret corpus, not store-level dedup behavior or the + // parser fixture artifacts kept under `testdata/parsers/`. Scan only the real corpus inputs + // and compare a stable unique rule+snippet set. + let mut actual = findings + .iter() + .map(|finding| { + let rule = finding.get("rule").and_then(Value::as_object).cloned().unwrap_or_default(); + serde_json::json!({ + "rule_id": rule.get("id").and_then(Value::as_str), + "snippet": finding + .get("finding") + .and_then(Value::as_object) + .and_then(|data| data.get("snippet")) + .and_then(Value::as_str), + }) + }) + .collect::>(); + actual.sort_by(|left, right| left.to_string().cmp(&right.to_string())); + actual.dedup(); + + let mut expected = serde_json::from_str::>( + &fs::read_to_string("testdata/parsers/scan_findings_baseline.json") + .context("read scan findings baseline")?, + ) + .context("parse scan findings baseline json")? + .into_iter() + .filter(|finding| finding.get("snippet").and_then(Value::as_str).is_some()) + .map(|finding| { + serde_json::json!({ + "rule_id": finding.get("rule_id").and_then(Value::as_str), + "snippet": finding.get("snippet").and_then(Value::as_str), + }) + }) + .filter(|finding| { + finding + .get("snippet") + .and_then(Value::as_str) + .map(|snippet| !snippet.is_empty()) + .unwrap_or(true) + }) + .collect::>(); + expected.sort_by(|left, right| left.to_string().cmp(&right.to_string())); + expected.dedup(); + + assert_eq!(actual, expected); + Ok(()) +} + +#[test] +fn scan_inputs_exclude_parser_fixture_directory() -> Result<()> { + let inputs = scan_inputs_without_parser_fixtures()?; + + assert!(inputs.iter().all(|path| Path::new(path) != Path::new("testdata/parsers"))); + assert!(inputs + .iter() + .any(|path| Path::new(path) == Path::new("testdata/python_vulnerable.py"))); + + Ok(()) +}