diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 90bf6ac..01f0baa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -132,7 +132,7 @@ jobs: toolchain: ${{ env.RUST_TOOLCHAIN }} - name: Set up MSYS2 - uses: msys2/setup-msys2@cafece8e6baf9247cf9b1bf95097b0b983cc558d # v2.31.0 + uses: msys2/setup-msys2@e9898307ac31d1a803454791be09ab9973336e1c # v2.31.1 with: msystem: ${{ matrix.msystem }} update: true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 136ebe9..c097164 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -274,7 +274,7 @@ jobs: toolchain: ${{ env.RUST_TOOLCHAIN }} - name: Set up MSYS2 - uses: msys2/setup-msys2@cafece8e6baf9247cf9b1bf95097b0b983cc558d # v2.31.0 + uses: msys2/setup-msys2@e9898307ac31d1a803454791be09ab9973336e1c # v2.31.1 with: msystem: ${{ matrix.msystem }} update: true diff --git a/.gitignore b/.gitignore index 420511a..1ccd1e3 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ logs/* !testdata/html_embedded_vulnerable.html !docs/viewer/index.html !docs-site/overrides/*.html +private-notes/ *.dot fuzz/* !fuzz/Cargo.toml diff --git a/CHANGELOG.md b/CHANGELOG.md index c3dbe05..4d81983 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ All notable changes to this project will be documented in this file. +## [v1.100.0] +- Archive scanning now reaches inside Android/iOS app packages: added `apk`, `aab`, and `ipa` to the recognized ZIP-based archive formats so secrets embedded in APK/AAB/IPA contents (e.g. `classes*.dex`, `res/values/strings.xml`) are extracted and matched. +- Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. +- Fixed tar-wrapped archive extraction for `.tgz` and `.tar.*` files, and made dependent credential validation deduplication preserve per-occurrence context so repeated secrets validate with the correct nearby companion value. +- Performance: ZIP-based git blobs ≤ 64 MB extract entirely in memory (no temp-file round trip), beating the v1.99.0 baseline by ~15% on a 80 GiB monorepo despite scanning ~300K additional archive-content blobs. Larger archives auto-fall-back to a disk-streaming extractor. +- Memory safety: hard caps on archive extraction — 64 MB compressed pre-flight, 256 MB aggregate decompressed per archive (in-memory and disk paths), 512 MB per entry, plus a `PK\x03\x04` magic-byte gate. Worst-case footprint is bounded at ~`num_jobs * 320 MB`. +- Release binary trimmed from 34 MB to 26 MB (~24% smaller). Switched `jsonwebtoken` to its `rust_crypto` backend (eliminates our scanner's pull on `aws-lc-rs`), bumped workspace `hmac` 0.12→0.13, `sha1` 0.10→0.11, `sha2` 0.10→0.11 to deduplicate our internal crypto code with the AWS sigv4 side, and migrated affected call sites in `kingfisher-core`, `kingfisher-rules`, and `kingfisher-scanner` to the digest-0.11 API (`hex::encode` for hex digests, explicit `KeyInit` import for HMAC). + ## [v1.99.0] - Fixed [#371](https://github.com/mongodb/kingfisher/issues/371): `pip install kingfisher-bin` on glibc Linux distros (Ubuntu, Debian, RHEL, Fedora, …) installed a macOS Mach-O binary and failed with `OSError: [Errno 8] Exec format error`. Linux wheels are now tagged `manylinux_2_17_.musllinux_1_2_` (instead of `musllinux_1_2_` only), so pip accepts them on both glibc-2.17+ and musl distros. The `pypi/hatch_build.py` hook now hard-fails when `KINGFISHER_PYPI_WHEEL_TAG` is unset, and the publish workflow refuses to upload any `py3-none-any.whl`, so the v1.92.0-era pure-Python wheel cannot recur. - `--self-update` (alias `--update`) on a scan or other command now **re-execs into the freshly installed binary** so the current invocation completes with the new code and the latest detection rules. Previously the on-disk binary was replaced but the running process kept using the old in-memory version, requiring a second invocation to pick up the changes. On Unix this is a true `exec()` (same PID); on Windows the new binary is spawned and the parent exits with its status code. The explicit `kingfisher self-update` subcommand still updates and exits without re-execing. Self-update now also covers Windows arm64 (the asset was already published; the runtime cfg map gained the missing arm). See `docs/ADVANCED.md` → *Update Checks*. diff --git a/Cargo.lock b/Cargo.lock index 8275480..85f94fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -115,7 +115,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -126,7 +126,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -193,9 +193,9 @@ dependencies = [ [[package]] name = "asn1-rs" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56624a96882bb8c26d61312ae18cb45868e5a9992ea73c58e45c3101e56a1e60" +checksum = "b7f43a50ac4fdca5df8e885c21b835997f0a1cdee65494a6847694a98652d9d8" dependencies = [ "asn1-rs-derive", "asn1-rs-impl", @@ -242,9 +242,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "2.2.1" +version = "2.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39bae1d3fa576f7c6519514180a72559268dd7d1fe104070956cb687bc6673bd" +checksum = "2aa3a22042e45de04255c7bf3626e239f450200fd0493c1e382263544b20aea6" dependencies = [ "anstyle", "bstr", @@ -283,17 +283,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "async-recursion" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "async-stream" version = "0.3.6" @@ -383,9 +372,9 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.16.3" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ec6fb3fe69024a75fa7e1bfb48aa6cf59706a101658ea01bfd33b2b248a038f" +checksum = "5ec2f1fc3ec205783a5da9a7e6c1509cc69dedf09a1949e412c1e18469326d00" dependencies = [ "aws-lc-sys", "untrusted 0.7.1", @@ -394,9 +383,9 @@ dependencies = [ [[package]] name = "aws-lc-sys" -version = "0.40.0" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f50037ee5e1e41e7b8f9d161680a725bd1626cb6f8c7e901f91f942850852fe7" +checksum = "1a2f9779ce85b93ab6170dd940ad0169b5766ff848247aff13bb788b832fe3f4" dependencies = [ "cc", "cmake", @@ -458,9 +447,9 @@ dependencies = [ [[package]] name = "aws-sdk-ec2" -version = "1.223.0" +version = "1.226.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c051cf4af033cea16c4eeb73b9c3c07f61fe747ae0d4119aabd45fa0288c19b" +checksum = "27ef215366676d2392accd5a5f964e891f7a97d210b34ccabe775510ccfd0302" dependencies = [ "aws-credential-types", "aws-runtime", @@ -483,9 +472,9 @@ dependencies = [ [[package]] name = "aws-sdk-ecr" -version = "1.115.0" +version = "1.115.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b97059a7e76b122096f333cf06cf109bfe8e763ee04ced2315f68ee2a6e0ed0" +checksum = "5e30c797e7df731ba0c9d051b2f82a95526fd1fb3905185908d1b77d6e0a1386" dependencies = [ "aws-credential-types", "aws-runtime", @@ -507,9 +496,9 @@ dependencies = [ [[package]] name = "aws-sdk-iam" -version = "1.108.0" +version = "1.108.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b20095024963a201cc42b561195b8ea0d2ce8a11b47a6f228609635994e3f531" +checksum = "0e105d3f0200de0c9672abbbf299d89ca4fccd8f8886fd91b0f1950fb933336d" dependencies = [ "aws-credential-types", "aws-runtime", @@ -608,9 +597,9 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.131.0" +version = "1.132.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe1b8c5282bf859170836045296b3cd710b7573aceb909498366bb508a41058e" +checksum = "5575840a3a6b11f6011463ebe359320dfe5b67babb5e9b06fed6ddf809a9ab40" dependencies = [ "aws-credential-types", "aws-runtime", @@ -1127,12 +1116,6 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "022dfe9eb35f19ebbcb51e0b40a5ab759f46ad60cadf7297e0bd085afb50e076" -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" @@ -1170,12 +1153,6 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - [[package]] name = "bitflags" version = "2.11.1" @@ -1246,8 +1223,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee04c4c84f1f811b017f2fbb7dd8815c976e7ca98593de9c1e2afad0f636bff4" dependencies = [ "async-stream", - "base64 0.22.1", - "bitflags 2.11.1", + "base64", + "bitflags", "bollard-buildkit-proto", "bollard-stubs", "bytes", @@ -1303,7 +1280,7 @@ version = "1.52.1-rc.29.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f0a8ca8799131c1837d1282c3f81f31e76ceb0ce426e04a7fe1ccee3287c066" dependencies = [ - "base64 0.22.1", + "base64", "bollard-buildkit-proto", "bytes", "prost", @@ -1334,14 +1311,22 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bs58" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4" +dependencies = [ + "tinyvec", +] + [[package]] name = "bson" version = "2.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969a9ba84b0ff843813e7249eed1678d9b6607ce5a3b8f0a47af3fcf7978e6e" +source = "git+https://github.com/mongodb/bson-rust?branch=2.15.x#f6f163095b5159ce175424b0e02f9bd7acfaddf2" dependencies = [ "ahash", - "base64 0.22.1", + "base64", "bitvec", "getrandom 0.2.17", "getrandom 0.3.4", @@ -1406,9 +1391,6 @@ name = "bytes" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" -dependencies = [ - "serde", -] [[package]] name = "bytes-utils" @@ -1469,9 +1451,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.61" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16d90359e986641506914ba71350897565610e87ce0ad9e6f28569db3dd5c6d" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "jobserver", @@ -1676,7 +1658,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -1825,6 +1807,16 @@ dependencies = [ "url", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -1883,7 +1875,7 @@ dependencies = [ "crc", "digest 0.10.7", "rustversion", - "spin", + "spin 0.10.0", ] [[package]] @@ -2144,16 +2136,16 @@ dependencies = [ [[package]] name = "dashmap" -version = "6.1.0" +version = "6.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c" dependencies = [ "cfg-if", "crossbeam-utils", "hashbrown 0.14.5", "lock_api", "once_cell", - "parking_lot_core 0.9.12", + "parking_lot_core", "serde", ] @@ -2385,9 +2377,9 @@ dependencies = [ [[package]] name = "digest" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4850db49bf08e663084f7fb5c87d202ef91a3907271aff24a94eb97ff039153c" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" dependencies = [ "block-buffer 0.12.0", "const-oid 0.10.2", @@ -2408,11 +2400,11 @@ dependencies = [ [[package]] name = "docker_credential" -version = "1.3.2" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d89dfcba45b4afad7450a99b39e751590463e45c04728cf555d36bb66940de8" +checksum = "a4564c274ebf369f501de192b02a0b81a5c4bda375abfe526aa70fc702fa6fa0" dependencies = [ - "base64 0.21.7", + "base64", "serde", "serde_json", ] @@ -2510,6 +2502,7 @@ dependencies = [ "ff", "generic-array", "group", + "hkdf", "pem-rfc7468", "pkcs8", "rand_core 0.6.4", @@ -2542,18 +2535,6 @@ dependencies = [ "encoding_rs", ] -[[package]] -name = "enum-as-inner" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "syn 2.0.117", -] - [[package]] name = "env_filter" version = "1.0.1" @@ -2590,7 +2571,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -2696,13 +2677,12 @@ checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" [[package]] name = "filetime" -version = "0.2.27" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" dependencies = [ "cfg-if", "libc", - "libredox", ] [[package]] @@ -2919,10 +2899,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b43924e3df02cb3b846ca66a7ee58e8c13eb2556d0308c71f6154083f6980365" dependencies = [ "async-trait", - "base64 0.22.1", + "base64", "gcloud-metadata", "home", - "jsonwebtoken 10.3.0", + "jsonwebtoken", "reqwest 0.13.3", "serde", "serde_json", @@ -2952,7 +2932,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e17f9662a6966402de91daf0edb5accaae05c87f1a85479e57b95d2af7284b9f" dependencies = [ "anyhow", - "base64 0.22.1", + "base64", "bytes", "futures-util", "gcloud-auth", @@ -2963,7 +2943,7 @@ dependencies = [ "pkcs8", "regex", "reqwest 0.13.3", - "reqwest-middleware 0.5.1", + "reqwest-middleware", "ring", "serde", "serde_json", @@ -3052,7 +3032,7 @@ version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b88256088d75a56f8ecfa070513a775dd9107f6530ef14919dac831af9cfe2b" dependencies = [ - "bitflags 2.11.1", + "bitflags", "libc", "libgit2-sys", "log", @@ -3116,7 +3096,7 @@ dependencies = [ "gix-worktree-state", "gix-worktree-stream", "nonempty", - "parking_lot 0.12.5", + "parking_lot", "regex", "serde", "signal-hook", @@ -3257,7 +3237,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b39ed39ee4c10a3b157f9fb94bac8098d9f8e56201f0cf7dee6c187416c4b2" dependencies = [ - "bitflags 2.11.1", + "bitflags", "bstr", "gix-path", "libc", @@ -3380,7 +3360,7 @@ dependencies = [ "gix-utils", "libc", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "prodash", "thiserror 2.0.18", "walkdir", @@ -3410,9 +3390,9 @@ dependencies = [ [[package]] name = "gix-fs" -version = "0.21.0" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b5d9f7e55a0f9a936a877fa4f9758692a308550a39a45684286941a20a8e5c0" +checksum = "1e1967daac9848757c47c2aef0c57bcadc1a897347f559778249bf286a536c86" dependencies = [ "bstr", "fastrand", @@ -3428,7 +3408,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08bf29249a069bf2507f5964f80997f37b134d320ea348d66527726b9be2c38c" dependencies = [ - "bitflags 2.11.1", + "bitflags", "bstr", "gix-features", "gix-path", @@ -3456,7 +3436,7 @@ checksum = "d33b455e07b3c16d3b2eeebc7b38d2dafcbf8a653de1138ef55d4c2a1fd0b08b" dependencies = [ "gix-hash", "hashbrown 0.16.1", - "parking_lot 0.12.5", + "parking_lot", ] [[package]] @@ -3489,7 +3469,7 @@ version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54c3ef97ad08121e4327a6226bd63fed6b9e3c6b976d48bddd4356d9d41191db" dependencies = [ - "bitflags 2.11.1", + "bitflags", "bstr", "filetime", "fnv", @@ -3568,7 +3548,7 @@ version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "103d42bfade1b8a96ca5005933127bdad461ce588d92422b2c2daa3ff20d780c" dependencies = [ - "bitflags 2.11.1", + "bitflags", "gix-commitgraph", "gix-date", "gix-hash", @@ -3612,7 +3592,7 @@ dependencies = [ "gix-path", "gix-quote", "memmap2", - "parking_lot 0.12.5", + "parking_lot", "serde", "tempfile", "thiserror 2.0.18", @@ -3634,7 +3614,7 @@ dependencies = [ "gix-path", "gix-tempfile", "memmap2", - "parking_lot 0.12.5", + "parking_lot", "serde", "smallvec", "thiserror 2.0.18", @@ -3671,7 +3651,7 @@ version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a84a4f083dd70fb49f4377e13afa6d90df2daaa1c705c49d6ff1331fc7e8855" dependencies = [ - "bitflags 2.11.1", + "bitflags", "bstr", "gix-attributes", "gix-config-value", @@ -3688,7 +3668,7 @@ checksum = "e041a626c64cb69e4117fcdf80da8d0e454fba3b1f420412792d191f52251aee" dependencies = [ "gix-command", "gix-config-value", - "parking_lot 0.12.5", + "parking_lot", "rustix", "thiserror 2.0.18", ] @@ -3774,7 +3754,7 @@ version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fb5288fac706d3ea3e4e2ba9ec38b78743b8c02f422e18cb342299cfd6ab7e8" dependencies = [ - "bitflags 2.11.1", + "bitflags", "bstr", "gix-commitgraph", "gix-date", @@ -3810,7 +3790,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f5a3a2d3e504a238136751e646a6c028252286a0ea64ea9974bf0498633407c6" dependencies = [ - "bitflags 2.11.1", + "bitflags", "gix-path", "libc", "serde", @@ -3878,7 +3858,7 @@ dependencies = [ "dashmap", "gix-fs", "libc", - "parking_lot 0.12.5", + "parking_lot", "signal-hook", "signal-hook-registry", "tempfile", @@ -3913,7 +3893,7 @@ version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a14b7052c0786676c03e71fcfde7d7f0f8e8316e642b5cec6bb3998719b2ce5c" dependencies = [ - "bitflags 2.11.1", + "bitflags", "gix-commitgraph", "gix-date", "gix-hash", @@ -4009,7 +3989,7 @@ dependencies = [ "gix-object", "gix-path", "gix-traverse", - "parking_lot 0.12.5", + "parking_lot", ] [[package]] @@ -4037,7 +4017,7 @@ version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757" dependencies = [ - "bitflags 2.11.1", + "bitflags", "ignore", "walkdir", ] @@ -4098,9 +4078,9 @@ dependencies = [ [[package]] name = "h2" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54" +checksum = "171fefbc92fe4a4de27e0698d6a5b392d6a0e333506bc49133760b3bcf948733" dependencies = [ "atomic-waker", "bytes", @@ -4158,9 +4138,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.17.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" [[package]] name = "hashlink" @@ -4206,23 +4186,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] -name = "hickory-proto" -version = "0.25.2" +name = "hickory-net" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502" +checksum = "e2295ed2f9c31e471e1428a8f88a3f0e1f4b27c15049592138d1eebe9c35b183" dependencies = [ "async-trait", "cfg-if", "data-encoding", - "enum-as-inner", "futures-channel", "futures-io", "futures-util", + "hickory-proto", "idna", "ipnet", - "once_cell", - "rand 0.9.4", - "ring", + "jni", + "rand 0.10.1", "thiserror 2.0.18", "tinyvec", "tokio", @@ -4231,26 +4210,60 @@ dependencies = [ ] [[package]] -name = "hickory-resolver" -version = "0.25.2" +name = "hickory-proto" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a" +checksum = "0bab31817bfb44672a252e97fe81cd0c18d1b2cf892108922f6818820df8c643" +dependencies = [ + "data-encoding", + "idna", + "ipnet", + "jni", + "once_cell", + "prefix-trie", + "rand 0.10.1", + "ring", + "thiserror 2.0.18", + "tinyvec", + "tracing", + "url", +] + +[[package]] +name = "hickory-resolver" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0d58d28879ceecde6607729660c2667a081ccdc082e082675042793960f178c" dependencies = [ "cfg-if", "futures-util", + "hickory-net", "hickory-proto", "ipconfig", + "ipnet", + "jni", "moka", + "ndk-context", "once_cell", - "parking_lot 0.12.5", - "rand 0.9.4", + "parking_lot", + "rand 0.10.1", "resolv-conf", "smallvec", + "system-configuration", "thiserror 2.0.18", "tokio", "tracing", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac 0.12.1", +] + [[package]] name = "hmac" version = "0.12.1" @@ -4266,7 +4279,7 @@ version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" dependencies = [ - "digest 0.11.2", + "digest 0.11.3", ] [[package]] @@ -4387,9 +4400,9 @@ dependencies = [ [[package]] name = "hybrid-array" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d46837a0ed51fe95bd3b05de33cd64a1ee88fc797477ca48446872504507c5" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" dependencies = [ "typenum", ] @@ -4467,7 +4480,7 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-channel", "futures-util", @@ -4478,7 +4491,7 @@ dependencies = [ "libc", "percent-encoding", "pin-project-lite", - "socket2 0.5.10", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -4698,7 +4711,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown 0.17.0", + "hashbrown 0.17.1", "serde", "serde_core", ] @@ -4717,18 +4730,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "io-close" version = "0.3.7" @@ -4757,14 +4758,7 @@ name = "ipnet" version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" - -[[package]] -name = "iri-string" -version = "0.7.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20" dependencies = [ - "memchr", "serde", ] @@ -4819,7 +4813,7 @@ dependencies = [ "portable-atomic", "portable-atomic-util", "serde_core", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -4909,9 +4903,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.97" +version = "0.3.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" dependencies = [ "cfg-if", "futures-util", @@ -4932,34 +4926,27 @@ dependencies = [ [[package]] name = "jsonwebtoken" -version = "9.3.1" +version = "10.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a87cc7a48537badeae96744432de36f4be2b4a34a05a5ef32e9dd8a1c169dde" -dependencies = [ - "base64 0.22.1", - "js-sys", - "pem", - "ring", - "serde", - "serde_json", - "simple_asn1", -] - -[[package]] -name = "jsonwebtoken" -version = "10.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" +checksum = "eba32bfb4ffdeaca3e34431072faf01745c9b26d25504aa7a6cf5684334fc4fc" dependencies = [ "aws-lc-rs", - "base64 0.22.1", + "base64", + "ed25519-dalek", "getrandom 0.2.17", + "hmac 0.12.1", "js-sys", + "p256", + "p384", "pem", + "rand 0.8.6", + "rsa", "serde", "serde_json", + "sha2 0.10.9", "signature", "simple_asn1", + "zeroize", ] [[package]] @@ -4973,7 +4960,7 @@ dependencies = [ [[package]] name = "kingfisher" -version = "1.99.0" +version = "1.100.0" dependencies = [ "anyhow", "asar", @@ -4998,7 +4985,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "axum", - "base64 0.22.1", + "base64", "blake3", "bloomfilter", "bstr", @@ -5045,21 +5032,19 @@ dependencies = [ "mongodb", "mysql_async", "oci-client", - "octorust", - "parking_lot 0.12.5", + "parking_lot", "path-dedot", "percent-encoding", "petgraph", "predicates", "pretty_assertions", "proptest", - "quick-xml 0.39.2", + "quick-xml 0.39.4", "rand 0.10.1", "rand_chacha 0.10.0", "rayon", "regex", "reqwest 0.12.28", - "reqwest-middleware 0.4.2", "roaring", "rusqlite", "rustc-hash", @@ -5072,7 +5057,7 @@ dependencies = [ "serde-sarif", "serde_json", "serde_yaml", - "sha1 0.10.6", + "sha1 0.11.0", "sha2 0.11.0", "smallvec", "strum 0.28.0", @@ -5118,13 +5103,13 @@ dependencies = [ "hex", "memchr", "memmap2", - "parking_lot 0.12.5", + "parking_lot", "pretty_assertions", "rustc-hash", "schemars 0.8.22", "serde", "serde_json", - "sha1 0.10.6", + "sha1 0.11.0", "smallvec", "thiserror 2.0.18", "tokei", @@ -5136,9 +5121,10 @@ version = "0.1.0" dependencies = [ "anyhow", "base32", - "base64 0.22.1", + "base64", "crc32fast", - "hmac 0.12.1", + "hex", + "hmac 0.13.0", "ignore", "include_dir", "kingfisher-core", @@ -5153,8 +5139,8 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "sha1 0.10.6", - "sha2 0.10.9", + "sha1 0.11.0", + "sha2 0.11.0", "thiserror 2.0.18", "time", "tracing", @@ -5178,16 +5164,16 @@ dependencies = [ "aws-smithy-types", "aws-types", "base32", - "base64 0.22.1", + "base64", "bstr", "byteorder", "chrono", "crossbeam-skiplist", "ed25519-dalek", "hex", - "hmac 0.12.1", + "hmac 0.13.0", "http 1.4.0", - "jsonwebtoken 10.3.0", + "jsonwebtoken", "kingfisher-core", "kingfisher-rules", "ldap3", @@ -5196,11 +5182,11 @@ dependencies = [ "mongodb", "mysql_async", "p256", - "parking_lot 0.12.5", + "parking_lot", "pem", "percent-encoding", "pretty_assertions", - "quick-xml 0.39.2", + "quick-xml 0.39.4", "rand 0.10.1", "regex", "reqwest 0.12.28", @@ -5211,8 +5197,8 @@ dependencies = [ "schemars 0.8.22", "serde", "serde_json", - "sha1 0.10.6", - "sha2 0.10.9", + "sha1 0.11.0", + "sha2 0.11.0", "smallvec", "tempfile", "thiserror 2.0.18", @@ -5258,6 +5244,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin 0.9.8", +] [[package]] name = "lber" @@ -5310,9 +5299,9 @@ checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libgit2-sys" -version = "0.18.3+1.9.2" +version = "0.18.4+1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9b3acc4b91781bb0b3386669d325163746af5f6e4f73e6d2d630e09a35f3487" +checksum = "9b26f66f35e1871b22efcf7191564123d2a446ca0538cde63c23adfefa9b15b7" dependencies = [ "cc", "libc", @@ -5341,10 +5330,7 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ - "bitflags 2.11.1", "libc", - "plain", - "redox_syscall 0.7.4", ] [[package]] @@ -5456,9 +5442,6 @@ name = "log" version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" -dependencies = [ - "serde_core", -] [[package]] name = "lru" @@ -5550,9 +5533,9 @@ checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" [[package]] name = "maybe-async" -version = "0.2.10" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cf92c10c7e361d6b99666ec1c6f9805b0bea2c3bd8c78dc6fe98ac5bd78db11" +checksum = "746873a384ad60adc5db74471dfaba74bd278afbdcfd81db93fafcdfc8b5ca0c" dependencies = [ "proc-macro2", "quote", @@ -5576,7 +5559,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69b6441f590336821bb897fb28fc622898ccceb1d6cea3fde5ea86b090c4de98" dependencies = [ "cfg-if", - "digest 0.11.2", + "digest 0.11.3", ] [[package]] @@ -5656,7 +5639,7 @@ dependencies = [ "crossbeam-epoch", "crossbeam-utils", "equivalent", - "parking_lot 0.12.5", + "parking_lot", "portable-atomic", "smallvec", "tagptr", @@ -5666,8 +5649,7 @@ dependencies = [ [[package]] name = "mongocrypt" version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da0cd419a51a5fb44819e290fbdb0665a54f21dead8923446a799c7f4d26ad9" +source = "git+https://github.com/mongodb/libmongocrypt-rust.git?branch=main#6fc0084825b4eb98229d13d621cab42b03498653" dependencies = [ "bson", "mongocrypt-sys", @@ -5678,20 +5660,18 @@ dependencies = [ [[package]] name = "mongocrypt-sys" version = "0.1.5+1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224484c5d09285a7b8cb0a0c117e847ebd14cb6e4470ecf68cdb89c503b0edb9" +source = "git+https://github.com/mongodb/libmongocrypt-rust.git?branch=main#6fc0084825b4eb98229d13d621cab42b03498653" [[package]] name = "mongodb" version = "3.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ef2c933617431ad0246fb5b43c425ebdae18c7f7259c87de0726d93b0e7e91b" +source = "git+https://github.com/mongodb/mongo-rust-driver?rev=bdddefc50c4794d51d10b944320d42c6eb216b04#bdddefc50c4794d51d10b944320d42c6eb216b04" dependencies = [ "aws-config", "aws-credential-types", "aws-sigv4", - "base64 0.22.1", - "bitflags 2.11.1", + "base64", + "bitflags", "bson", "chrono", "derive-where", @@ -5700,6 +5680,7 @@ dependencies = [ "futures-io", "futures-util", "hex", + "hickory-net", "hickory-proto", "hickory-resolver", "hmac 0.12.1", @@ -5713,7 +5694,6 @@ dependencies = [ "rand 0.9.4", "rustc_version_runtime", "rustls", - "rustversion", "serde", "serde_bytes", "serde_with", @@ -5735,8 +5715,7 @@ dependencies = [ [[package]] name = "mongodb-internal-macros" version = "3.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5758dc828eb2d02ec30563cba365609d56ddd833190b192beaee2b475a7bb3" +source = "git+https://github.com/mongodb/mongo-rust-driver?rev=bdddefc50c4794d51d10b944320d42c6eb216b04#bdddefc50c4794d51d10b944320d42c6eb216b04" dependencies = [ "macro_magic", "proc-macro2", @@ -5800,8 +5779,8 @@ version = "0.35.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbb9f371618ce723f095c61fbcdc36e8936956d2b62832f9c7648689b338e052" dependencies = [ - "base64 0.22.1", - "bitflags 2.11.1", + "base64", + "bitflags", "btoi", "byteorder", "bytes", @@ -5893,7 +5872,7 @@ version = "0.50.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -5920,6 +5899,22 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e661dda6640fad38e827a6d4a310ff4763082116fe217f279885c97f511bb0b7" +dependencies = [ + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.6", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -5983,6 +5978,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -6010,7 +6006,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "bitflags 2.11.1", + "bitflags", ] [[package]] @@ -6025,7 +6021,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" dependencies = [ - "bitflags 2.11.1", + "bitflags", "objc2", ] @@ -6068,7 +6064,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-auth", - "jsonwebtoken 10.3.0", + "jsonwebtoken", "lazy_static", "oci-spec", "olpc-cjson", @@ -6100,39 +6096,6 @@ dependencies = [ "thiserror 2.0.18", ] -[[package]] -name = "octorust" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c488b641cf652f023d371f6d472191bf3b3fd8075a11f274e95d4fe6e5e3878" -dependencies = [ - "async-recursion", - "async-trait", - "bytes", - "chrono", - "http 1.4.0", - "jsonwebtoken 9.3.1", - "log", - "mime", - "parse_link_header", - "pem", - "percent-encoding", - "reqwest 0.12.28", - "reqwest-conditional-middleware", - "reqwest-middleware 0.4.2", - "reqwest-retry", - "reqwest-tracing", - "ring", - "schemars 0.8.22", - "serde", - "serde_json", - "serde_urlencoded", - "thiserror 1.0.69", - "tokio", - "url", - "uuid", -] - [[package]] name = "oid-registry" version = "0.8.1" @@ -6171,15 +6134,14 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "openssl" -version = "0.10.78" +version = "0.10.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38c4372413cdaaf3cc79dd92d29d7d9f5ab09b51b10dded508fb90bb70b9222" +checksum = "a45fa2aa886c42762255da344f0a0d313e254066c46aad76f300c3d3da62d967" dependencies = [ - "bitflags 2.11.1", + "bitflags", "cfg-if", "foreign-types 0.3.2", "libc", - "once_cell", "openssl-macros", "openssl-sys", ] @@ -6203,9 +6165,9 @@ checksum = "7c87def4c32ab89d880effc9e097653c8da5d6ef28e6b539d313baaacfbafcbe" [[package]] name = "openssl-sys" -version = "0.9.114" +version = "0.9.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13ce1245cd07fcc4cfdb438f7507b0c7e4f3849a69fd84d52374c66d83741bb6" +checksum = "f28a22dc7140cda5f096e5e7724a6962ca81a7f8bfd2979f9b18c11af56318c4" dependencies = [ "cc", "libc", @@ -6238,14 +6200,15 @@ dependencies = [ ] [[package]] -name = "parking_lot" -version = "0.11.2" +name = "p384" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +checksum = "fe42f1670a52a47d448f14b6a5c61dd78fce51856e68edaa38f7ae3a46b8d6b6" dependencies = [ - "instant", - "lock_api", - "parking_lot_core 0.8.6", + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2 0.10.9", ] [[package]] @@ -6255,21 +6218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", - "parking_lot_core 0.9.12", -] - -[[package]] -name = "parking_lot_core" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" -dependencies = [ - "cfg-if", - "instant", - "libc", - "redox_syscall 0.2.16", - "smallvec", - "winapi", + "parking_lot_core", ] [[package]] @@ -6280,7 +6229,7 @@ checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.18", + "redox_syscall", "smallvec", "windows-link", ] @@ -6319,17 +6268,6 @@ dependencies = [ "regex", ] -[[package]] -name = "parse_link_header" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3687fe9debbbf2a019f381a8bc6b42049b22647449b39af54b3013985c0cf6de" -dependencies = [ - "http 0.2.12", - "lazy_static", - "regex", -] - [[package]] name = "path-dedot" version = "3.1.1" @@ -6354,7 +6292,7 @@ version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d30c53c26bc5b31a98cd02d20f25a7c8567146caf63ed593a9d87b2775291be" dependencies = [ - "base64 0.22.1", + "base64", "serde_core", ] @@ -6487,18 +6425,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.11" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1749c7ed4bcaf4c3d0a3efc28538844fb29bcdd7d2b67b2be7e20ba861ff517" +checksum = "2466b2336ed02bcdca6b294417127b90ec92038d1d5c4fbeac971a922e0e0924" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.11" +version = "1.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b20ed30f105399776b9c883e68e536ef602a16ae6f596d2c473591d6ad64c6" +checksum = "c96395f0a926bc13b1c17622aaddda1ecb55d49c8f1bf9777e4d877800a43f8b" dependencies = [ "proc-macro2", "quote", @@ -6517,6 +6455,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + [[package]] name = "pkcs8" version = "0.10.2" @@ -6533,12 +6482,6 @@ version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" -[[package]] -name = "plain" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" - [[package]] name = "pori" version = "0.0.0" @@ -6569,7 +6512,7 @@ version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56201207dac53e2f38e848e31b4b91616a6bb6e0c7205b77718994a7f49e70fc" dependencies = [ - "base64 0.22.1", + "base64", "byteorder", "bytes", "fallible-iterator 0.2.0", @@ -6646,6 +6589,17 @@ dependencies = [ "termtree", ] +[[package]] +name = "prefix-trie" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cf6e3177f0684016a5c209b00882e15f8bdd3f3bb48f0491df10cd102d0c6e7" +dependencies = [ + "either", + "ipnet", + "num-traits", +] + [[package]] name = "pretty_assertions" version = "1.4.1" @@ -6723,7 +6677,7 @@ checksum = "962200e2d7d551451297d9fdce85138374019ada198e30ea9ede38034e27604c" dependencies = [ "bytesize", "human_format", - "parking_lot 0.12.5", + "parking_lot", ] [[package]] @@ -6734,7 +6688,7 @@ checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.11.1", + "bitflags", "num-traits", "rand 0.9.4", "rand_chacha 0.9.0", @@ -6783,7 +6737,7 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" dependencies = [ - "bitflags 2.11.1", + "bitflags", "memchr", "unicase", ] @@ -6805,9 +6759,9 @@ dependencies = [ [[package]] name = "quick-xml" -version = "0.39.2" +version = "0.39.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "958f21e8e7ceb5a1aa7fa87fab28e7c75976e0bfe7e23ff069e0a260f894067d" +checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e" dependencies = [ "memchr", "serde", @@ -6826,7 +6780,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", - "socket2 0.5.10", + "socket2 0.6.3", "thiserror 2.0.18", "tokio", "tracing", @@ -6864,7 +6818,7 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.5.10", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] @@ -7011,31 +6965,13 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.11.1", -] - -[[package]] -name = "redox_syscall" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f450ad9c3b1da563fb6948a8e0fb0fb9269711c9c73d9ea1de5058c79c8d643a" -dependencies = [ - "bitflags 2.11.1", + "bitflags", ] [[package]] @@ -7099,7 +7035,7 @@ version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "futures-channel", "futures-core", @@ -7143,7 +7079,7 @@ version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62e0021ea2c22aed41653bc7e1419abb2c97e038ff2c33d0e1309e49a97deec0" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "encoding_rs", "futures-channel", @@ -7183,33 +7119,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "reqwest-conditional-middleware" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f67ad7fdf5c0a015763fcd164bee294b13fb7b6f89f1b55961d40f00c3e32d6b" -dependencies = [ - "async-trait", - "http 1.4.0", - "reqwest 0.12.28", - "reqwest-middleware 0.4.2", -] - -[[package]] -name = "reqwest-middleware" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57f17d28a6e6acfe1733fe24bcd30774d13bffa4b8a22535b4c8c98423088d4e" -dependencies = [ - "anyhow", - "async-trait", - "http 1.4.0", - "reqwest 0.12.28", - "serde", - "thiserror 1.0.69", - "tower-service", -] - [[package]] name = "reqwest-middleware" version = "0.5.1" @@ -7225,59 +7134,12 @@ dependencies = [ "tower-service", ] -[[package]] -name = "reqwest-retry" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29c73e4195a6bfbcb174b790d9b3407ab90646976c55de58a6515da25d851178" -dependencies = [ - "anyhow", - "async-trait", - "futures", - "getrandom 0.2.17", - "http 1.4.0", - "hyper", - "parking_lot 0.11.2", - "reqwest 0.12.28", - "reqwest-middleware 0.4.2", - "retry-policies", - "thiserror 1.0.69", - "tokio", - "tracing", - "wasm-timer", -] - -[[package]] -name = "reqwest-tracing" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d70ea85f131b2ee9874f0b160ac5976f8af75f3c9badfe0d955880257d10bd83" -dependencies = [ - "anyhow", - "async-trait", - "getrandom 0.2.17", - "http 1.4.0", - "matchit", - "reqwest 0.12.28", - "reqwest-middleware 0.4.2", - "tracing", -] - [[package]] name = "resolv-conf" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7" -[[package]] -name = "retry-policies" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5875471e6cab2871bc150ecb8c727db5113c9338cc3354dc5ee3425b6aa40a1c" -dependencies = [ - "rand 0.8.6", -] - [[package]] name = "rfc6979" version = "0.4.0" @@ -7312,6 +7174,26 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rsa" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8573f03f5883dcaebdfcf4725caa1ecb9c15b2ef50c43a07b816e06799bb12d" +dependencies = [ + "const-oid 0.9.6", + "digest 0.10.7", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rsqlite-vfs" version = "0.1.0" @@ -7328,7 +7210,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0d2b0146dd9661bf67bb107c0bb2a55064d556eeb3fc314151b957f313bcd4e" dependencies = [ - "bitflags 2.11.1", + "bitflags", "fallible-iterator 0.3.0", "fallible-streaming-iterator", "hashlink", @@ -7383,11 +7265,11 @@ version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.11.1", + "bitflags", "errno", "libc", "linux-raw-sys", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7443,7 +7325,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d1e2536ce4f35f4846aa13bff16bd0ff40157cdb14cc056c7b14ba41233ba0" dependencies = [ - "core-foundation", + "core-foundation 0.10.1", "core-foundation-sys", "jni", "log", @@ -7455,7 +7337,7 @@ dependencies = [ "security-framework", "security-framework-sys", "webpki-root-certs", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -7557,14 +7439,10 @@ version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ - "bytes", - "chrono", "dyn-clone", "schemars_derive", "serde", "serde_json", - "url", - "uuid", ] [[package]] @@ -7629,8 +7507,8 @@ version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d" dependencies = [ - "bitflags 2.11.1", - "core-foundation", + "bitflags", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -7812,11 +7690,12 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.18.0" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd5414fad8e6907dbdd5bc441a50ae8d6e26151a03b1de04d89a5576de61d01f" +checksum = "e72c1c2cb7b223fafb600a619537a871c2818583d619401b785e7c0b746ccde2" dependencies = [ - "base64 0.22.1", + "base64", + "bs58", "chrono", "hex", "indexmap 1.9.3", @@ -7831,9 +7710,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.18.0" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3db8978e608f1fe7357e211969fd9abdcae80bac1ba7a3369bb7eb6b404eb65" +checksum = "b90c488738ecb4fb0262f41f43bc40efc5868d9fb744319ddf5f5317f417bfac" dependencies = [ "darling 0.23.0", "proc-macro2", @@ -7873,7 +7752,7 @@ checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" dependencies = [ "cfg-if", "cpufeatures 0.3.0", - "digest 0.11.2", + "digest 0.11.3", ] [[package]] @@ -7905,7 +7784,7 @@ checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" dependencies = [ "cfg-if", "cpufeatures 0.3.0", - "digest 0.11.2", + "digest 0.11.3", ] [[package]] @@ -7995,9 +7874,9 @@ dependencies = [ [[package]] name = "siphasher" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" +checksum = "8ee5873ec9cce0195efcb7a4e9507a04cd49aec9c83d0389df45b1ef7ba2e649" [[package]] name = "skeptic" @@ -8056,7 +7935,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -8070,6 +7949,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "spin" version = "0.10.0" @@ -8276,6 +8161,27 @@ dependencies = [ "windows", ] +[[package]] +name = "system-configuration" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a13f3d0daba03132c0aa9767f98351b3488edc2c100cda2d2ec2b04f3d8d3c8b" +dependencies = [ + "bitflags", + "core-foundation 0.9.4", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "table_formatter" version = "0.6.1" @@ -8322,7 +8228,7 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96374855068f47402c3121c6eed88d29cb1de8f3ab27090e273e420bdabcf050" dependencies = [ - "parking_lot 0.12.5", + "parking_lot", ] [[package]] @@ -8335,7 +8241,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8386,7 +8292,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "230a1b821ccbd75b185820a1f1ff7b14d21da1e442e22c0863ea5f08771a8874" dependencies = [ "rustix", - "windows-sys 0.59.0", + "windows-sys 0.61.2", ] [[package]] @@ -8616,7 +8522,7 @@ dependencies = [ "log", "num-format", "once_cell", - "parking_lot 0.12.5", + "parking_lot", "rayon", "regex", "serde", @@ -8638,14 +8544,14 @@ dependencies = [ [[package]] name = "tokio" -version = "1.52.1" +version = "1.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" dependencies = [ "bytes", "libc", "mio", - "parking_lot 0.12.5", + "parking_lot", "pin-project-lite", "signal-hook-registry", "socket2 0.6.3", @@ -8687,7 +8593,7 @@ dependencies = [ "futures-channel", "futures-util", "log", - "parking_lot 0.12.5", + "parking_lot", "percent-encoding", "phf 0.13.1", "pin-project-lite", @@ -8746,6 +8652,7 @@ dependencies = [ "futures-core", "futures-io", "futures-sink", + "futures-util", "pin-project-lite", "tokio", ] @@ -8803,7 +8710,7 @@ dependencies = [ "indexmap 2.14.0", "toml_datetime 1.1.1+spec-1.1.0", "toml_parser", - "winnow 1.0.2", + "winnow 1.0.3", ] [[package]] @@ -8812,7 +8719,7 @@ version = "1.1.2+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526" dependencies = [ - "winnow 1.0.2", + "winnow 1.0.3", ] [[package]] @@ -8823,13 +8730,13 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tonic" -version = "0.14.5" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fec7c61a0695dc1887c1b53952990f3ad2e3a31453e1f49f10e75424943a93ec" +checksum = "ac2a5518c70fa84342385732db33fb3f44bc4cc748936eb5833d2df34d6445ef" dependencies = [ "async-trait", "axum", - "base64 0.22.1", + "base64", "bytes", "h2", "http 1.4.0", @@ -8852,9 +8759,9 @@ dependencies = [ [[package]] name = "tonic-prost" -version = "0.14.5" +version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a55376a0bbaa4975a3f10d009ad763d8f4108f067c7c2e74f3001fb49778d309" +checksum = "50849f68853be452acf590cde0b146665b8d507b3b8af17261df47e02c209ea0" dependencies = [ "bytes", "prost", @@ -8894,25 +8801,25 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.8" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" +checksum = "4cfcf7e2740e6fc6d4d688b4ef00650406bb94adf4731e43c096c3a19fe40840" dependencies = [ "async-compression", - "bitflags 2.11.1", + "bitflags", "bytes", "futures-core", "futures-util", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "iri-string", "pin-project-lite", "tokio", "tokio-util", "tower", "tower-layer", "tower-service", + "url", ] [[package]] @@ -9162,7 +9069,7 @@ version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dea7109cdcd5864d4eeb1b58a1648dc9bf520360d7af16ec26d0a9354bafcfc0" dependencies = [ - "base64 0.22.1", + "base64", "cookie_store", "encoding_rs", "flate2", @@ -9184,7 +9091,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e994ba84b0bd1b1b0cf92878b7ef898a5c1760108fe7b6010327e274917a808c" dependencies = [ - "base64 0.22.1", + "base64", "http 1.4.0", "httparse", "log", @@ -9265,7 +9172,7 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" name = "vectorscan-rs" version = "0.0.6" dependencies = [ - "bitflags 2.11.1", + "bitflags", "foreign-types 0.5.0", "libc", "thiserror 1.0.69", @@ -9363,9 +9270,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.120" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" dependencies = [ "cfg-if", "once_cell", @@ -9376,9 +9283,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.70" +version = "0.4.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084" +checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" dependencies = [ "js-sys", "wasm-bindgen", @@ -9386,9 +9293,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.120" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -9396,9 +9303,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.120" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" dependencies = [ "bumpalo", "proc-macro2", @@ -9409,9 +9316,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.120" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" dependencies = [ "unicode-ident", ] @@ -9464,28 +9371,13 @@ dependencies = [ "web-sys", ] -[[package]] -name = "wasm-timer" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be0ecb0db480561e9a7642b5d3e4187c128914e58aa84330b9493e3eb68c5e7f" -dependencies = [ - "futures", - "js-sys", - "parking_lot 0.11.2", - "pin-utils", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", -] - [[package]] name = "wasmparser" version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" dependencies = [ - "bitflags 2.11.1", + "bitflags", "hashbrown 0.15.5", "indexmap 2.14.0", "semver", @@ -9508,9 +9400,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.97" +version = "0.3.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602" +checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" dependencies = [ "js-sys", "wasm-bindgen", @@ -9532,7 +9424,7 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fc95580916af1e68ff6a7be07446fc5db73ebf71cf092de939bbf5f7e189f72" dependencies = [ - "core-foundation", + "core-foundation 0.10.1", "jni", "log", "ndk-context", @@ -9749,15 +9641,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "windows-sys" -version = "0.59.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" -dependencies = [ - "windows-targets 0.52.6", -] - [[package]] name = "windows-sys" version = "0.60.2" @@ -9982,9 +9865,9 @@ dependencies = [ [[package]] name = "winnow" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ee1708bef14716a11bae175f579062d4554d95be2c6829f518df847b7b3fdd0" +checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1" dependencies = [ "memchr", ] @@ -9996,7 +9879,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08db1edfb05d9b3c1542e521aea074442088292f00b5f28e435c714a98f85031" dependencies = [ "assert-json-diff", - "base64 0.22.1", + "base64", "deadpool", "futures", "http 1.4.0", @@ -10076,7 +9959,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" dependencies = [ "anyhow", - "bitflags 2.11.1", + "bitflags", "indexmap 2.14.0", "log", "serde", @@ -10223,9 +10106,9 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] @@ -10330,7 +10213,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dba6063ff82cdbd9a765add16d369abe81e520f836054e997c2db217ceca40c0" dependencies = [ - "base64 0.22.1", + "base64", "ed25519-dalek", "thiserror 2.0.18", ] diff --git a/Cargo.toml b/Cargo.toml index 71e199d..e6876a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,9 +32,9 @@ xxhash-rust = { version = "0.8", features = ["xxh3"] } ignore = "0.4" walkdir = "2.5" include_dir = "0.7" -sha1 = "0.10" -sha2 = "0.10" -hmac = "0.12" +sha1 = "0.11" +sha2 = "0.11" +hmac = "0.13" base32 = "0.5.1" base64 = "0.22" percent-encoding = "2.3" @@ -48,7 +48,7 @@ http = "1.4" [package] name = "kingfisher" -version = "1.99.0" +version = "1.100.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -148,15 +148,16 @@ flate2 = "1.1" thousands = "0.2.0" crossbeam-skiplist = "0.1.3" tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"] } -mongodb = { version = "3.4", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] } +# Temporary Git pin: keeps MongoDB SRV validation enabled while using the upstream +# Hickory 0.26 DNS-resolver fix before it is available in a crates.io release. +# TODO: switch back to a crates.io mongodb release once it includes that fix. +mongodb = { git = "https://github.com/mongodb/mongo-rust-driver", rev = "bdddefc50c4794d51d10b944320d42c6eb216b04", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] } mysql_async = { version = "0.36.2", default-features = false, features = ["default-rustls"] } aws-config = { version = "1.8.14", default-features = false, features = ["default-https-client", "rt-tokio", "credentials-process", "sso"] } aws-credential-types = "1.2.12" aws-sdk-sts = { version = "1.98.0", default-features = false, features = ["default-https-client", "rt-tokio"] } aws-types = "1.3.12" parking_lot = "0.12.5" -octorust = "0.10.0" -reqwest-middleware-octorust = { package = "reqwest-middleware", version = "0.4.2" } tracing-subscriber = {version = "0.3.22", features = ["env-filter"] } tracing-core = "0.1.35" aws-smithy-http-client = "1.1.10" @@ -181,9 +182,9 @@ futures = "0.3.31" dashmap = "6.1.0" xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] } serde_yaml = "0.9.34" -hmac = "0.13.0" +hmac = { workspace = true } sha1 = { workspace = true } -sha2 = "0.11.0" +sha2 = { workspace = true } humantime = "2.3.0" path-dedot = "3.1.1" quick-xml = { version = "0.39.2", features = ["serde", "serialize"] } @@ -251,11 +252,11 @@ proptest = "1.9.0" [profile.release] debug = false -strip = true #"debuginfo" -opt-level = 3 # Maximum optimization for performance +strip = true +opt-level = "s" # Optimize for smaller binary size over speed lto = true # Enable Link Time Optimization codegen-units = 1 # Optimize for size but slower compilation -panic = "abort" # Remove unwind tables for panics +panic = "abort" # Remove unwind tables for panics rpath = false # Don't embed path dependencies incremental = false diff --git a/crates/kingfisher-core/src/blob.rs b/crates/kingfisher-core/src/blob.rs index 66e462b..5cdd12c 100644 --- a/crates/kingfisher-core/src/blob.rs +++ b/crates/kingfisher-core/src/blob.rs @@ -11,7 +11,7 @@ use std::{ convert::TryInto, fs::File, - io::{Read, Write}, + io::Read, path::Path, sync::{ Arc, OnceLock, @@ -235,7 +235,7 @@ impl BlobId { pub fn new(input: &[u8]) -> Self { const CHUNK: usize = 64 * 1024; // 64KB from start and end let mut hasher = Sha1::new(); - write!(&mut hasher, "blob {}\0", input.len()).unwrap(); + update_git_blob_header(&mut hasher, input.len()); if input.len() <= CHUNK * 2 { hasher.update(input); } else { @@ -249,7 +249,7 @@ impl BlobId { /// Computes a `BlobId` from the complete bytes (no truncation). pub fn compute_from_bytes(bytes: &[u8]) -> Self { let mut hasher = Sha1::new(); - write!(&mut hasher, "blob {}\0", bytes.len()).unwrap(); + update_git_blob_header(&mut hasher, bytes.len()); hasher.update(bytes); let digest: [u8; 20] = hasher.finalize().into(); BlobId(digest) @@ -277,6 +277,27 @@ impl BlobId { } } +fn update_git_blob_header(hasher: &mut Sha1, len: usize) { + let mut digits = [0u8; 20]; + let mut n = len; + let mut i = digits.len(); + + if n == 0 { + i -= 1; + digits[i] = b'0'; + } else { + while n > 0 { + i -= 1; + digits[i] = b'0' + (n % 10) as u8; + n /= 10; + } + } + + hasher.update(b"blob "); + hasher.update(&digits[i..]); + hasher.update(b"\0"); +} + impl<'de> Deserialize<'de> for BlobId { fn deserialize>(d: D) -> std::result::Result { struct Vis; diff --git a/crates/kingfisher-rules/Cargo.toml b/crates/kingfisher-rules/Cargo.toml index c0b26ce..ef50a03 100644 --- a/crates/kingfisher-rules/Cargo.toml +++ b/crates/kingfisher-rules/Cargo.toml @@ -41,6 +41,7 @@ crc32fast = "1.5" hmac.workspace = true sha1.workspace = true sha2.workspace = true +hex.workspace = true percent-encoding.workspace = true time.workspace = true uuid = { workspace = true, features = ["v4"] } diff --git a/crates/kingfisher-rules/data/rules/aws.yml b/crates/kingfisher-rules/data/rules/aws.yml index 1bc7f95..b73620d 100644 --- a/crates/kingfisher-rules/data/rules/aws.yml +++ b/crates/kingfisher-rules/data/rules/aws.yml @@ -5,16 +5,15 @@ rules: (?x) \b ( - (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:A3T[A-Z0-9]|AKIA|ASIA) [A-Z0-9]{16} ) \b pattern_requirements: - min_digits: 1 ignore_if_contains: - "EXAMPLE" - "TEST" - min_entropy: 3.2 + min_entropy: 3.0 visible: false confidence: medium examples: @@ -25,14 +24,14 @@ rules: pattern: | (?xi) (?: - \b - (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:\b|_) + (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|ASIA) (?:.|[\n\r]){0,64}? [^A-Za-z0-9_+!@\#$%^&*()\]./] ([A-Za-z0-9/+]{40}) [^A-Za-z0-9_+!@\#$%^&*()\]./] | - \b(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:\b|_)(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|ASIA) (?:.|[\n\r]){0,96}? (?:SECRET|PRIVATE|ACCESS) (?:.|[\n\r]){0,16}? diff --git a/crates/kingfisher-rules/data/rules/voyageai.yml b/crates/kingfisher-rules/data/rules/voyageai.yml index ce2a0a8..9f187c1 100644 --- a/crates/kingfisher-rules/data/rules/voyageai.yml +++ b/crates/kingfisher-rules/data/rules/voyageai.yml @@ -5,12 +5,13 @@ rules: # Matches keys starting with 'pa-' followed by 43 URL-safe base64 characters pattern: | (?x) + \b ( pa-[a-zA-Z0-9\-_]{43} ) \b min_entropy: 4.0 - confidence: high + confidence: medium examples: - pa-r4yuCYCuPhNO-10Lu9aO7dR4jxUWlLmlUjm_NOVVdSs validation: @@ -22,7 +23,56 @@ rules: headers: Authorization: "Bearer {{ TOKEN }}" response_matcher: + # 200 = key has /v1/files permission, 403 = valid key without that permission + # (e.g. an inference-only key). 401 with "Provided API key is invalid." is the + # only response Voyage AI returns for a bad key, so any non-401 status is live. - type: StatusMatch - status: [200] + status: [401] + negative: true + - type: WordMatch + words: + - "Provided API key is invalid" + negative: true references: - - https://docs.voyageai.com/reference \ No newline at end of file + - https://docs.voyageai.com/reference + - https://docs.voyageai.com/docs/api-key-and-installation + + - name: Voyage AI API Key + id: kingfisher.voyageai.api_key.2 + description: Detects Voyage AI API keys (al- prefix variant) used for embedding and retrieval models. + # Matches keys starting with 'al-' followed by 43 URL-safe base64 characters + pattern: | + (?x) + \b + ( + al-[a-zA-Z0-9\-_]{43} + ) + \b + min_entropy: 4.0 + confidence: medium + examples: + - al-Qf7M2bZ8xnLpvE4hRcDsJtAo1KyU93WgIBmXrNVoYTu + validation: + type: Http + content: + request: + method: GET + url: https://api.voyageai.com/v1/files + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + # 200 = key has /v1/files permission, 403 = valid key without that permission. + # 401 with "Provided API key is invalid." is the only invalid-key response. + - type: StatusMatch + status: [401] + negative: true + - type: WordMatch + words: + - "Provided API key is invalid" + negative: true + references: + - https://docs.voyageai.com/reference + - https://docs.voyageai.com/docs/api-key-and-installation +# NOTE: Revocation is not implemented because Voyage AI does not document a public REST +# endpoint for programmatic API key revocation. All probed admin/key-management paths +# under api.voyageai.com return 404. Keys must be revoked via the Voyage AI dashboard. diff --git a/crates/kingfisher-rules/src/liquid_filters.rs b/crates/kingfisher-rules/src/liquid_filters.rs index ec1becd..c042061 100644 --- a/crates/kingfisher-rules/src/liquid_filters.rs +++ b/crates/kingfisher-rules/src/liquid_filters.rs @@ -2,7 +2,7 @@ use base64::{Engine, engine::general_purpose}; use crc32fast::Hasher; -use hmac::{Hmac, Mac}; +use hmac::{Hmac, KeyInit, Mac}; use liquid_core::{ Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection, FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView, @@ -536,7 +536,7 @@ static_filter!( |input: &dyn ValueView| -> String { let mut h = Sha256::new(); h.update(input.to_kstr().as_bytes()); - format!("{:x}", h.finalize()) + hex::encode(h.finalize()) } ); @@ -1128,7 +1128,7 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { #[cfg(test)] mod tests { use base64::{Engine as _, engine::general_purpose}; - use hmac::{Hmac, Mac}; + use hmac::{Hmac, KeyInit, Mac}; use liquid::{ParserBuilder, object}; use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; use regex::Regex; @@ -1162,7 +1162,7 @@ mod tests { #[test] fn sha256_filter() { - let expect = format!("{:x}", Sha256::digest(b"hello")); + let expect = hex::encode(Sha256::digest(b"hello")); assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect); } diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml index b9133c6..745952c 100644 --- a/crates/kingfisher-scanner/Cargo.toml +++ b/crates/kingfisher-scanner/Cargo.toml @@ -24,6 +24,7 @@ validation-http = [ "dep:liquid-core", "dep:quick-xml", "dep:sha1", + "dep:hex", "dep:time", ] @@ -182,12 +183,15 @@ pem = { version = "3.0.6", optional = true } percent-encoding = { workspace = true, optional = true } ring = { version = "0.17", optional = true } -jsonwebtoken = { version = "10.3.0", features = ["aws-lc-rs"], optional = true } +jsonwebtoken = { version = "10.3.0", default-features = false, features = ["rust_crypto"], optional = true } p256 = { version = "0.13.2", optional = true } ed25519-dalek = { version = "2.2", features = ["pkcs8"], optional = true } hex = { workspace = true, optional = true } url = { version = "2.5.7", optional = true } -mongodb = { version = "3.4", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"], optional = true } +# Temporary Git pin: keeps MongoDB SRV validation enabled while using the upstream +# Hickory 0.26 DNS-resolver fix before it is available in a crates.io release. +# TODO: switch back to a crates.io mongodb release once it includes that fix. +mongodb = { git = "https://github.com/mongodb/mongo-rust-driver", rev = "bdddefc50c4794d51d10b944320d42c6eb216b04", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"], optional = true } mysql_async = { version = "0.36.2", default-features = false, features = ["default-rustls"], optional = true } tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"], optional = true } tokio-postgres-rustls = { version = "0.13.0", optional = true } diff --git a/crates/kingfisher-scanner/src/validation/aws.rs b/crates/kingfisher-scanner/src/validation/aws.rs index f313a92..d211551 100644 --- a/crates/kingfisher-scanner/src/validation/aws.rs +++ b/crates/kingfisher-scanner/src/validation/aws.rs @@ -187,12 +187,12 @@ pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &s hasher.update(aws_access_key_id.as_bytes()); hasher.update(b"\0"); hasher.update(aws_secret_access_key.as_bytes()); - format!("AWS:{:x}", hasher.finalize()) + format!("AWS:{}", hex::encode(hasher.finalize())) } /// Validate AWS credentials format before attempting validation. pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> Result<(), String> { - // Validate access key ID format (20 chars, known AWS prefixes including STS) + // Validate access key ID format (20 chars, usable AWS access-key prefixes including STS) if access_key_id.len() != 20 { return Err("Invalid AWS access key ID format".to_string()); } @@ -200,9 +200,9 @@ pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> return Err("AWS access key ID contains invalid characters".to_string()); } let prefix = &access_key_id[..4]; - let valid_prefix = - matches!(prefix, "AKIA" | "AGPA" | "AIDA" | "AROA" | "AIPA" | "ANPA" | "ANVA" | "ASIA") - || prefix.starts_with("A3T"); + // IAM principal IDs (for example AIDA/AROA) are deliberately rejected here: + // they are not usable access-key IDs for STS credential validation. + let valid_prefix = matches!(prefix, "AKIA" | "ASIA") || prefix.starts_with("A3T"); if !valid_prefix { return Err("Invalid AWS access key ID format".to_string()); } diff --git a/crates/kingfisher-scanner/src/validation/azure.rs b/crates/kingfisher-scanner/src/validation/azure.rs index 8b34270..1f0875f 100644 --- a/crates/kingfisher-scanner/src/validation/azure.rs +++ b/crates/kingfisher-scanner/src/validation/azure.rs @@ -3,7 +3,7 @@ use std::time::Duration; use anyhow::{Result, anyhow}; use base64::{Engine as _, engine::general_purpose::STANDARD as b64}; use chrono::Utc; -use hmac::{Hmac, Mac}; +use hmac::{Hmac, KeyInit, Mac}; use http::StatusCode; use quick_xml::{Reader, events::Event}; use reqwest::{Client, header::HeaderValue}; @@ -18,7 +18,7 @@ pub fn generate_azure_cache_key(azure_json: &str) -> String { use sha1::{Digest, Sha1}; let mut h = Sha1::new(); h.update(azure_json.as_bytes()); - format!("AZURE:{:x}", h.finalize()) + format!("AZURE:{}", hex::encode(h.finalize())) } /// Validate Azure Storage credentials without Azure SDK crates. diff --git a/crates/kingfisher-scanner/src/validation/coinbase.rs b/crates/kingfisher-scanner/src/validation/coinbase.rs index 6b6a945..3d6b7b1 100644 --- a/crates/kingfisher-scanner/src/validation/coinbase.rs +++ b/crates/kingfisher-scanner/src/validation/coinbase.rs @@ -23,7 +23,7 @@ pub fn generate_coinbase_cache_key(cred_name: &str, private_key: &str) -> String h.update(cred_name.as_bytes()); h.update(b"\0"); h.update(private_key.as_bytes()); - format!("COINBASE:{:x}", h.finalize()) + format!("COINBASE:{}", hex::encode(h.finalize())) } pub async fn validate_cdp_api_key( diff --git a/crates/kingfisher-scanner/src/validation/gcp.rs b/crates/kingfisher-scanner/src/validation/gcp.rs index 079fa50..8b67b21 100644 --- a/crates/kingfisher-scanner/src/validation/gcp.rs +++ b/crates/kingfisher-scanner/src/validation/gcp.rs @@ -142,7 +142,7 @@ pub fn generate_gcp_cache_key(gcp_json: &str) -> String { use sha1::{Digest, Sha1}; let mut hasher = Sha1::new(); hasher.update(gcp_json.as_bytes()); - format!("GCP:{:x}", hasher.finalize()) + format!("GCP:{}", hex::encode(hasher.finalize())) } impl GcpValidator { diff --git a/crates/kingfisher-scanner/src/validation/http_validation.rs b/crates/kingfisher-scanner/src/validation/http_validation.rs index 35a863e..fbf31c8 100644 --- a/crates/kingfisher-scanner/src/validation/http_validation.rs +++ b/crates/kingfisher-scanner/src/validation/http_validation.rs @@ -60,7 +60,7 @@ pub fn generate_http_cache_key_parts( hasher.update(b"\0"); } - format!("HTTP:{:x}", hasher.finalize()) + format!("HTTP:{}", hex::encode(hasher.finalize())) } /// Parse an HTTP method from a string. diff --git a/crates/kingfisher-scanner/src/validation/mongodb.rs b/crates/kingfisher-scanner/src/validation/mongodb.rs index 63bfe0a..46ab0bf 100644 --- a/crates/kingfisher-scanner/src/validation/mongodb.rs +++ b/crates/kingfisher-scanner/src/validation/mongodb.rs @@ -147,5 +147,5 @@ pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String { use sha1::{Digest, Sha1}; let mut hasher = Sha1::new(); hasher.update(mongodb_uri.as_bytes()); - format!("MongoDB:{:x}", hasher.finalize()) + format!("MongoDB:{}", hex::encode(hasher.finalize())) } diff --git a/crates/kingfisher-scanner/src/validation/mysql.rs b/crates/kingfisher-scanner/src/validation/mysql.rs index 7be1e1c..23c799c 100644 --- a/crates/kingfisher-scanner/src/validation/mysql.rs +++ b/crates/kingfisher-scanner/src/validation/mysql.rs @@ -56,7 +56,7 @@ pub fn generate_mysql_cache_key(mysql_url: &str) -> String { let mut hasher = Sha1::new(); hasher.update(mysql_url.as_bytes()); - format!("MySQL:{:x}", hasher.finalize()) + format!("MySQL:{}", hex::encode(hasher.finalize())) } fn is_local_host(host: &str) -> bool { diff --git a/crates/kingfisher-scanner/src/validation/postgres.rs b/crates/kingfisher-scanner/src/validation/postgres.rs index 2d204e5..acceb3e 100644 --- a/crates/kingfisher-scanner/src/validation/postgres.rs +++ b/crates/kingfisher-scanner/src/validation/postgres.rs @@ -70,7 +70,7 @@ impl ServerCertVerifier for LaxCertVerifier { pub fn generate_postgres_cache_key(postgres_url: &str) -> String { let mut hasher = Sha1::new(); hasher.update(postgres_url.as_bytes()); - format!("Postgres:{:x}", hasher.finalize()) + format!("Postgres:{}", hex::encode(hasher.finalize())) } pub fn parse_postgres_url(postgres_url: &str) -> Result { diff --git a/crates/kingfisher-scanner/src/validation/utils.rs b/crates/kingfisher-scanner/src/validation/utils.rs index 7756047..cbeae46 100644 --- a/crates/kingfisher-scanner/src/validation/utils.rs +++ b/crates/kingfisher-scanner/src/validation/utils.rs @@ -32,7 +32,7 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, /// that should be paired with a secret key. pub fn find_closest_variable( captures: &[(String, String, usize, usize)], - target_value: &String, + target_value: &str, target_variable_name: &str, search_variable_name: &str, ) -> Option { @@ -40,7 +40,7 @@ pub fn find_closest_variable( // compare relative offsets with candidate variables. let mut target_positions = Vec::new(); for (name, value, start, end) in captures { - if name == target_variable_name && value == target_value { + if name == target_variable_name && value.as_str() == target_value { target_positions.push((*start, *end)); } } @@ -138,8 +138,7 @@ mod tests { ("AKID".to_string(), "following".to_string(), 180usize, 200usize), ]; - let result = - find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap(); + let result = find_closest_variable(&captures, "secret", "TOKEN", "AKID").unwrap(); assert_eq!(result, "preceding".to_string()); } @@ -151,8 +150,7 @@ mod tests { ("AKID".to_string(), "after".to_string(), 60usize, 80usize), ]; - let result = - find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap(); + let result = find_closest_variable(&captures, "secret", "TOKEN", "AKID").unwrap(); assert_eq!(result, "after".to_string()); } diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 05ab499..42d7283 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,6 +7,14 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. +## [v1.100.0] +- Archive scanning now reaches inside Android/iOS app packages: added `apk`, `aab`, and `ipa` to the recognized ZIP-based archive formats so secrets embedded in APK/AAB/IPA contents (e.g. `classes*.dex`, `res/values/strings.xml`) are extracted and matched. +- Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. +- Fixed tar-wrapped archive extraction for `.tgz` and `.tar.*` files, and made dependent credential validation deduplication preserve per-occurrence context so repeated secrets validate with the correct nearby companion value. +- Performance: ZIP-based git blobs ≤ 64 MB extract entirely in memory (no temp-file round trip), beating the v1.99.0 baseline by ~15% on a 80 GiB monorepo despite scanning ~300K additional archive-content blobs. Larger archives auto-fall-back to a disk-streaming extractor. +- Memory safety: hard caps on archive extraction — 64 MB compressed pre-flight, 256 MB aggregate decompressed per archive (in-memory and disk paths), 512 MB per entry, plus a `PK\x03\x04` magic-byte gate. Worst-case footprint is bounded at ~`num_jobs * 320 MB`. +- Release binary trimmed from 34 MB to 26 MB (~24% smaller). Switched `jsonwebtoken` to its `rust_crypto` backend (eliminates our scanner's pull on `aws-lc-rs`), bumped workspace `hmac` 0.12→0.13, `sha1` 0.10→0.11, `sha2` 0.10→0.11 to deduplicate our internal crypto code with the AWS sigv4 side, and migrated affected call sites in `kingfisher-core`, `kingfisher-rules`, and `kingfisher-scanner` to the digest-0.11 API (`hex::encode` for hex digests, explicit `KeyInit` import for HMAC). + ## [v1.99.0] - Fixed [#371](https://github.com/mongodb/kingfisher/issues/371): `pip install kingfisher-bin` on glibc Linux distros (Ubuntu, Debian, RHEL, Fedora, …) installed a macOS Mach-O binary and failed with `OSError: [Errno 8] Exec format error`. Linux wheels are now tagged `manylinux_2_17_.musllinux_1_2_` (instead of `musllinux_1_2_` only), so pip accepts them on both glibc-2.17+ and musl distros. The `pypi/hatch_build.py` hook now hard-fails when `KINGFISHER_PYPI_WHEEL_TAG` is unset, and the publish workflow refuses to upload any `py3-none-any.whl`, so the v1.92.0-era pure-Python wheel cannot recur. - `--self-update` (alias `--update`) on a scan or other command now **re-execs into the freshly installed binary** so the current invocation completes with the new code and the latest detection rules. Previously the on-disk binary was replaced but the running process kept using the old in-memory version, requiring a second invocation to pick up the changes. On Unix this is a true `exec()` (same PID); on Windows the new binary is spawned and the parent exits with its status code. The explicit `kingfisher self-update` subcommand still updates and exits without re-execing. Self-update now also covers Windows arm64 (the asset was already published; the runtime cfg map gained the missing arm). See `docs/ADVANCED.md` → *Update Checks*. diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index ba952d1..dd1b7b8 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -123,9 +123,9 @@ dependencies = [ [[package]] name = "arc-swap" -version = "1.8.2" +version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" +checksum = "6a3a1fd6f75306b68087b831f025c712524bcb19aad54e557b1129cfa0a2b207" dependencies = [ "rustversion", ] @@ -172,6 +172,15 @@ dependencies = [ "generic-array", ] +[[package]] +name = "block-buffer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdd35008169921d80bc60d3d0ab416eecb028c4cd653352907921d95084790be" +dependencies = [ + "hybrid-array", +] + [[package]] name = "bstr" version = "1.12.1" @@ -332,6 +341,12 @@ dependencies = [ "cc", ] +[[package]] +name = "cmov" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f88a43d011fc4a6876cb7344703e297c71dda42494fee094d5f7c76bf13f746" + [[package]] name = "colorchoice" version = "1.0.5" @@ -360,6 +375,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "const-oid" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6ef517f0926dd24a1582492c791b6a4818a4d94e789a334894aa15b0d12f55c" + [[package]] name = "core-foundation-sys" version = "0.8.7" @@ -447,6 +468,24 @@ dependencies = [ "typenum", ] +[[package]] +name = "crypto-common" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77727bb15fa921304124b128af125e7e3b968275d1b108b379190264f4423710" +dependencies = [ + "hybrid-array", +] + +[[package]] +name = "ctutils" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5515a3834141de9eafb9717ad39eea8247b5674e6066c404e8c4b365d2a29e" +dependencies = [ + "cmov", +] + [[package]] name = "dashmap" version = "6.1.0" @@ -494,9 +533,20 @@ version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "block-buffer", - "crypto-common", - "subtle", + "block-buffer 0.10.4", + "crypto-common 0.1.7", +] + +[[package]] +name = "digest" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1dd6dbb5841937940781866fa1281a1ff7bd3bf827091440879f9994983d5c2" +dependencies = [ + "block-buffer 0.12.0", + "const-oid", + "crypto-common 0.2.1", + "ctutils", ] [[package]] @@ -718,9 +768,9 @@ dependencies = [ [[package]] name = "gix" -version = "0.81.0" +version = "0.83.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0473c64d9ccbcfb9953a133b47c8b9a335b87ac6c52b983ee4b03d49000b0f3f" +checksum = "6ce52001b946a6249d5d0d3011df0a042ac3f8a4d013460db6476577b0b9c567" dependencies = [ "gix-actor", "gix-archive", @@ -777,22 +827,21 @@ dependencies = [ [[package]] name = "gix-actor" -version = "0.40.0" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e5e5b518339d5e6718af108fd064d4e9ba33caf728cf487352873d76411df35" +checksum = "272916673b83714734b15d4ef3c8b5f1ccddb15fea8ff548430b97c1ab7b7ed8" dependencies = [ "bstr", "gix-date", "gix-error", "serde", - "winnow", ] [[package]] name = "gix-archive" -version = "0.30.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "651c99be11aac9b303483193ae50b45eb6e094da4f5ed797019b03948f51aad6" +checksum = "9a20ec244b733338d4cb60e5e05eac700dab7fcc689647b1d1daa9396b119342" dependencies = [ "bstr", "gix-date", @@ -803,9 +852,9 @@ dependencies = [ [[package]] name = "gix-attributes" -version = "0.31.0" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c233d6eaa098c0ca5ce03236fd7a96e27f1abe72fad74b46003fbd11fe49563c" +checksum = "fe17c5a1c0b6f2ef1476aa1d3222ea50cdff67608016613a58bfc3e078046000" dependencies = [ "bstr", "gix-glob", @@ -821,18 +870,18 @@ dependencies = [ [[package]] name = "gix-bitmap" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7add20f40d060db8c9b1314d499bac6ed7480f33eb113ce3e1cf5d6ff85d989" +checksum = "1ecbfc77ec6852294e341ecc305a490b59f2813e6ca42d79efda5099dcab1894" dependencies = [ "gix-error", ] [[package]] name = "gix-blame" -version = "0.11.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c77aaf9f7348f4da3ebfbfbbc35fa0d07155d98377856198dde6f695fd648705" +checksum = "14dab9a942ab54a9661ded7397c3bf927274e7afa94494db0d75cfcbde02ca0a" dependencies = [ "gix-commitgraph", "gix-date", @@ -850,18 +899,18 @@ dependencies = [ [[package]] name = "gix-chunk" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1096b6608fbe5d27fb4984e20f992b4e76fb8c613f6acb87d07c5831b53a6959" +checksum = "edf288be9b60fe7231de03771faa292be1493d84786f68727e33ad1f91764320" dependencies = [ "gix-error", ] [[package]] name = "gix-command" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b849c65a609f50d02f8a2774fe371650b3384a743c79c2a070ce0da49b7fb7da" +checksum = "86335306511abe43d75c866d4b1f3d90932fe202edcd43e1314036333e7384d8" dependencies = [ "bstr", "gix-path", @@ -872,9 +921,9 @@ dependencies = [ [[package]] name = "gix-commitgraph" -version = "0.35.0" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3196655fd1443f3c58a48c114aa480be3e4e87b393d7292daaa0d543862eb445" +checksum = "fe3b5aa0f24e19028c261d229aeeedafcaaa52ebd71021cc15184620fc9d32eb" dependencies = [ "bstr", "gix-chunk", @@ -887,9 +936,9 @@ dependencies = [ [[package]] name = "gix-config" -version = "0.54.0" +version = "0.56.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08939b4c4ed7a663d0e64be9e1e9bdf23a1fb4fcee1febdf449f12229542e50d" +checksum = "8c01848aebd21c67f6ba41f1de8efd46ae96df21f001954a3c9e1517e514d410" dependencies = [ "bstr", "gix-config-value", @@ -898,18 +947,16 @@ dependencies = [ "gix-path", "gix-ref", "gix-sec", - "memchr", "smallvec", "thiserror 2.0.18", "unicode-bom", - "winnow", ] [[package]] name = "gix-config-value" -version = "0.17.1" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "441a300bc3645a1f45cba495b9175f90f47256ce43f2ee161da0031e3ac77c92" +checksum = "13b39ed39ee4c10a3b157f9fb94bac8098d9f8e56201f0cf7dee6c187416c4b2" dependencies = [ "bitflags", "bstr", @@ -920,9 +967,9 @@ dependencies = [ [[package]] name = "gix-credentials" -version = "0.37.1" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b2a34b8715e3bbd514f3d1705f5d51c4b250e5bfe506b9fb60b133c85c93d9" +checksum = "65ca11598b70811d7b16ff90945a6e57dfe521e85b744e51636965fe39cc8f60" dependencies = [ "bstr", "gix-command", @@ -939,9 +986,9 @@ dependencies = [ [[package]] name = "gix-date" -version = "0.15.1" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39acf819aa9fee65e4838a2eec5cb2506e47ebb89e02a5ab9918196e491571ea" +checksum = "b94cdae4eb4b0f4136e3d9b3aa2d2cd03cfb5bb9b636b31263aea2df86d41543" dependencies = [ "bstr", "gix-error", @@ -953,31 +1000,30 @@ dependencies = [ [[package]] name = "gix-diff" -version = "0.61.0" +version = "0.63.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88f3b3475e5d3877d7c30c40827cc2441936ce890efc226e5ba4afe3a7ae33f0" +checksum = "dc08e0fa1a91ff5f24affeab052f198056645e1de004910bde7b82b50ea5982a" dependencies = [ "bstr", "gix-command", "gix-filter", "gix-fs", "gix-hash", + "gix-imara-diff", "gix-object", "gix-path", "gix-tempfile", "gix-trace", "gix-traverse", "gix-worktree", - "imara-diff 0.1.8", - "imara-diff 0.2.0", "thiserror 2.0.18", ] [[package]] name = "gix-dir" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5da4604a360988f0ba8efe6f90093ca5a844f4a7f8e1a3dcda501ec44e600ea9" +checksum = "32a0fc06e9e1e430cbf0a313666976d90f822f461a6525320427aa9b8af5236c" dependencies = [ "bstr", "gix-discover", @@ -995,9 +1041,9 @@ dependencies = [ [[package]] name = "gix-discover" -version = "0.49.0" +version = "0.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c65bd3330fe0cb9d40d875bf862fd5e8ad6fa4164ddbc4842fbeb889c3f0b2c6" +checksum = "17852e6a501e688a1702b24ebe5b3761d4719455bc869fd29f38b0b859bcad34" dependencies = [ "bstr", "dunce", @@ -1010,18 +1056,18 @@ dependencies = [ [[package]] name = "gix-error" -version = "0.2.1" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e86d01da904d4a9265def43bd42a18c5e6dc7000a73af512946ba14579c9fbd" +checksum = "e207b971746ab724fccdfced2e4e19e854744611904a0195d3aa8fda8a110613" dependencies = [ "bstr", ] [[package]] name = "gix-features" -version = "0.46.2" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "752493cd4b1d5eaaa0138a7493f65c96863fefa990fc021e0e519579e389ab20" +checksum = "af375693ad5333d0a2c66b4c5b2cbe9ccc38e34f8e8bf24e4ae42c12307fdc4f" dependencies = [ "bytes", "crc32fast", @@ -1038,9 +1084,9 @@ dependencies = [ [[package]] name = "gix-filter" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d37598282a6566da6fb52667570c7fe0aedcb122ac886724a9e62a2180523e35" +checksum = "dac917dbe9653c9b615d248db91907a365bd779750c9e1b457a9d9fdeece3a08" dependencies = [ "bstr", "encoding_rs", @@ -1059,9 +1105,9 @@ dependencies = [ [[package]] name = "gix-fs" -version = "0.19.2" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a964b4aec683eb0bacb87533defa80805bb4768056371a47ab38b00a2d377b72" +checksum = "1e1967daac9848757c47c2aef0c57bcadc1a897347f559778249bf286a536c86" dependencies = [ "bstr", "fastrand", @@ -1073,9 +1119,9 @@ dependencies = [ [[package]] name = "gix-glob" -version = "0.24.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b03e6cd88cc0dc1eafa1fddac0fb719e4e74b6ea58dd016e71125fde4a326bee" +checksum = "08bf29249a069bf2507f5964f80997f37b134d320ea348d66527726b9be2c38c" dependencies = [ "bitflags", "bstr", @@ -1086,9 +1132,9 @@ dependencies = [ [[package]] name = "gix-hash" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fb896a02d9ab96fa518475a5f30ad3952010f801a8de5840f633f4a6b985dfb" +checksum = "bcf70d1e252337eed16360f8b8ebb71865ece58eab7954b39ce38b420de703d2" dependencies = [ "faster-hex", "gix-features", @@ -1099,9 +1145,9 @@ dependencies = [ [[package]] name = "gix-hashtable" -version = "0.13.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2664216fc5e89b51e756a4a3ac676315602ce2dac07acf1da959a22038d69b33" +checksum = "d33b455e07b3c16d3b2eeebc7b38d2dafcbf8a653de1138ef55d4c2a1fd0b08b" dependencies = [ "gix-hash", "hashbrown 0.16.1", @@ -1110,9 +1156,9 @@ dependencies = [ [[package]] name = "gix-ignore" -version = "0.19.1" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09f915dcf6911e3027537166d34e13f0fe101ed12225178d2ae29cd1272cff26" +checksum = "6bb13fbbeeafee943e52b61fcc88dfddf6a452fcaf0c4d0cdc8f218fa25bbec5" dependencies = [ "bstr", "gix-glob", @@ -1123,10 +1169,20 @@ dependencies = [ ] [[package]] -name = "gix-index" -version = "0.49.0" +name = "gix-imara-diff" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bae54ab14e4e74d5dda60b82ea7afad7c8eb3be68283d6d5f29bd2e6d47fff7" +checksum = "39eb0623e15e4cb83c02ce6a959e48fadd1ae3b715b36b5acc01816e01388c82" +dependencies = [ + "bstr", + "hashbrown 0.16.1", +] + +[[package]] +name = "gix-index" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54c3ef97ad08121e4327a6226bd63fed6b9e3c6b976d48bddd4356d9d41191db" dependencies = [ "bitflags", "bstr", @@ -1153,9 +1209,9 @@ dependencies = [ [[package]] name = "gix-lock" -version = "21.0.1" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbe09cf05ba7c679bba189acc29eeea137f643e7fff1b5dff879dfd45248be31" +checksum = "09b3bc074e5723027b482dcd9ab99d95804a53742f6de812d0172fbba4a186c1" dependencies = [ "gix-tempfile", "gix-utils", @@ -1164,9 +1220,9 @@ dependencies = [ [[package]] name = "gix-mailmap" -version = "0.32.0" +version = "0.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7b4818da522786ec7e32a00884ee8fc40fa4c215c3997c0b15f7b62684d1199" +checksum = "023d3a6561cbebe45b89e0764d48928ad970667076f16fa5889e6f86d8432086" dependencies = [ "bstr", "gix-actor", @@ -1177,9 +1233,9 @@ dependencies = [ [[package]] name = "gix-merge" -version = "0.14.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4606747466512d22c2dffc019142e1941238f543987ea51353c938cca80c500" +checksum = "74bbcdcc52b70a32f0a151b024dff9d0fcf56ee48f00d9503e735af9d99ea881" dependencies = [ "bstr", "gix-command", @@ -1187,6 +1243,7 @@ dependencies = [ "gix-filter", "gix-fs", "gix-hash", + "gix-imara-diff", "gix-index", "gix-object", "gix-path", @@ -1196,16 +1253,15 @@ dependencies = [ "gix-tempfile", "gix-trace", "gix-worktree", - "imara-diff 0.1.8", "nonempty", "thiserror 2.0.18", ] [[package]] name = "gix-negotiate" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea064c7595eea08fdd01c70748af747d9acc40f727b61f4c8a2145a5c5fc28c" +checksum = "103d42bfade1b8a96ca5005933127bdad461ce588d92422b2c2daa3ff20d780c" dependencies = [ "bitflags", "gix-commitgraph", @@ -1217,9 +1273,9 @@ dependencies = [ [[package]] name = "gix-object" -version = "0.58.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cafb802bb688a7c1e69ef965612ff5ff859f046bfb616377e4a0ba4c01e43d47" +checksum = "a38075a95d7cc5df8afd38e72c617026c1456952207a4120a7f55a3fbf93b4d7" dependencies = [ "bstr", "gix-actor", @@ -1227,21 +1283,19 @@ dependencies = [ "gix-features", "gix-hash", "gix-hashtable", - "gix-path", "gix-utils", "gix-validate", "itoa", "serde", "smallvec", "thiserror 2.0.18", - "winnow", ] [[package]] name = "gix-odb" -version = "0.78.0" +version = "0.80.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24833ae9323b4f7079575fb9f961cf9c414b0afbec428a536ab8e7dd93bc002b" +checksum = "aeeda12a9663120418735ecdc1250d06eeab0be75700e47b3402a981331716ba" dependencies = [ "arc-swap", "gix-features", @@ -1252,6 +1306,7 @@ dependencies = [ "gix-pack", "gix-path", "gix-quote", + "memmap2", "parking_lot", "serde", "tempfile", @@ -1260,9 +1315,9 @@ dependencies = [ [[package]] name = "gix-pack" -version = "0.68.0" +version = "0.70.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3484119cd19859d7d7639413c27e192478fa354d3f4ff5f7e3c041e8040f0f4" +checksum = "daf02e6f5c8f07a069c9ea5245f40d9b14856ada4086091dc99941b49002b4fa" dependencies = [ "clru", "gix-chunk", @@ -1280,9 +1335,9 @@ dependencies = [ [[package]] name = "gix-packetline" -version = "0.21.2" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be19313dcdb7dff75a3ce2f99be00878458295bcc3b6c7f0005591597573345c" +checksum = "362246df440ee691699f0664cbf7006a6ece477db6734222be95e4198e5656e6" dependencies = [ "bstr", "faster-hex", @@ -1292,9 +1347,9 @@ dependencies = [ [[package]] name = "gix-path" -version = "0.11.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09c31d4373bda7fab9eb01822927b55185a378d6e1bf737e0a54c743ad806658" +checksum = "671a6059e8a4c1b7f406e24716499cefa3926e060876fb1959ef225efeee346e" dependencies = [ "bstr", "gix-trace", @@ -1304,9 +1359,9 @@ dependencies = [ [[package]] name = "gix-pathspec" -version = "0.16.1" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f89611f13544ca5ebeb68a502673814ef57200df60c24a61c2ce7b96f612f08b" +checksum = "2a84a4f083dd70fb49f4377e13afa6d90df2daaa1c705c49d6ff1331fc7e8855" dependencies = [ "bitflags", "bstr", @@ -1319,9 +1374,9 @@ dependencies = [ [[package]] name = "gix-prompt" -version = "0.14.1" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f61f6264e1f6c5a951531fe127722c7522bc02ebda80c4528286bda4642055f" +checksum = "e041a626c64cb69e4117fcdf80da8d0e454fba3b1f420412792d191f52251aee" dependencies = [ "gix-command", "gix-config-value", @@ -1332,9 +1387,9 @@ dependencies = [ [[package]] name = "gix-protocol" -version = "0.59.0" +version = "0.61.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f38666350736b5877c79f57ddae02bde07a4ce186d889adc391e831cddcbe76" +checksum = "aa4bee82db63ec635996b96efae71cf467c155fa3f34a556184373224a26c4fd" dependencies = [ "bstr", "gix-date", @@ -1348,14 +1403,13 @@ dependencies = [ "nonempty", "serde", "thiserror 2.0.18", - "winnow", ] [[package]] name = "gix-quote" -version = "0.7.0" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68533db71259c8776dd4e770d2b7b98696213ecdc1f5c9e3507119e274e0c578" +checksum = "6e97b73791a64bc0fa7dd2c5b3e551136115f97750b876ed1c952c7a7dbaf8be" dependencies = [ "bstr", "gix-error", @@ -1364,9 +1418,9 @@ dependencies = [ [[package]] name = "gix-ref" -version = "0.61.0" +version = "0.63.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2159978abb99b7027c8579d15211e262ef0ef2594d5cecb3334fbcbdfe2997c" +checksum = "d8ba9cc15f558b274c99349b83130f5ec83459660828fde9718bbbb43a726167" dependencies = [ "gix-actor", "gix-features", @@ -1381,14 +1435,13 @@ dependencies = [ "memmap2", "serde", "thiserror 2.0.18", - "winnow", ] [[package]] name = "gix-refspec" -version = "0.39.0" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc806ee13f437428f8a1ba4c72ecfaa3f20e14f5f0d4c2bc17d0b33e794aa6ac" +checksum = "61755b27d57edc8940a1b1593c8c61548ca8e4c02da1ed8d5bfeda9eb2a6b761" dependencies = [ "bstr", "gix-error", @@ -1402,9 +1455,9 @@ dependencies = [ [[package]] name = "gix-revision" -version = "0.43.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c08f1ec5d1e6a524f8ba291c41f0ccaef64e48ed0e8cf790b3461cae45f6d3d" +checksum = "1fb5288fac706d3ea3e4e2ba9ec38b78743b8c02f422e18cb342299cfd6ab7e8" dependencies = [ "bitflags", "bstr", @@ -1421,9 +1474,9 @@ dependencies = [ [[package]] name = "gix-revwalk" -version = "0.29.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4b2b87772b21ca449249e86d32febadba5cba32b0fcce804ab9cefc6f2111c" +checksum = "313813706b073a12ff7f9b2896bf3e6504cdac7cfbc97b1920114724705069f0" dependencies = [ "gix-commitgraph", "gix-date", @@ -1437,9 +1490,9 @@ dependencies = [ [[package]] name = "gix-sec" -version = "0.13.2" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf82ae037de9c62850ce67beaa92ec8e3e17785ea307cdde7618edc215603b4f" +checksum = "f5a3a2d3e504a238136751e646a6c028252286a0ea64ea9974bf0498633407c6" dependencies = [ "bitflags", "gix-path", @@ -1450,9 +1503,9 @@ dependencies = [ [[package]] name = "gix-shallow" -version = "0.10.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf60711c9083b2364b3fac8a352444af76b17201f3682fdebe74fa66d89a772" +checksum = "29187305521bfacf4aefd284ab28dbfa9fb74abd39a5e63dd313b1baa5808c27" dependencies = [ "bstr", "gix-hash", @@ -1464,9 +1517,9 @@ dependencies = [ [[package]] name = "gix-status" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d6c598e3fdbc352fba1c5ba7e709e69402fafbc44d9295edad2e3c4738996b" +checksum = "68c6d2a8c521ffa205fe7e268c82e6d1378ba37cd826ca10ab6129fdc29a4b65" dependencies = [ "bstr", "filetime", @@ -1487,9 +1540,9 @@ dependencies = [ [[package]] name = "gix-submodule" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ce5c3929c5e6821f651d35e8420f72fea3cfafe9fc1e928a61e718b462c72a5" +checksum = "9fd5fc8692890bd71a596e540fd4c364f8460eaa82c4eaaedebde6e1e3eb4d91" dependencies = [ "bstr", "gix-config", @@ -1502,9 +1555,9 @@ dependencies = [ [[package]] name = "gix-tempfile" -version = "21.0.1" +version = "23.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d9ab2c89fe4bfd4f1d8700aa4516534c170d8a21ae2c554167374607c2eaf16" +checksum = "691ea1e31435c7e7d4d04705ec9d1c0d9482c46b2acf512bc723939d8f0af7fb" dependencies = [ "dashmap", "gix-fs", @@ -1515,15 +1568,15 @@ dependencies = [ [[package]] name = "gix-trace" -version = "0.1.18" +version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f69a13643b8437d4ca6845e08143e847a36ca82903eed13303475d0ae8b162e0" +checksum = "6f23569e55f2ffaf958617353b9734a7d52a7c19c439eeaa5e3efc217fd2270e" [[package]] name = "gix-transport" -version = "0.55.1" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a521e39c6235ce63ed6c001e2dd79818c830b82c3b7b59247ee7b229c39ec9bb" +checksum = "ffd6a5c676b92d4ead5f5a2b2935024415dec69edc997b6090ca9cac010a3018" dependencies = [ "bstr", "gix-command", @@ -1538,9 +1591,9 @@ dependencies = [ [[package]] name = "gix-traverse" -version = "0.55.0" +version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "963dc2afcdb611092aa587c3f9365e749ac0a0892ff27662dbc75f26c953fbec" +checksum = "a14b7052c0786676c03e71fcfde7d7f0f8e8316e642b5cec6bb3998719b2ce5c" dependencies = [ "bitflags", "gix-commitgraph", @@ -1555,9 +1608,9 @@ dependencies = [ [[package]] name = "gix-url" -version = "0.35.2" +version = "0.36.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d28e8af3d42581190da884f013caf254d2fd4d6ab102408f08d21bfa11de6c8d" +checksum = "35842d099e813f6f6bba529e88d4670572149c3df79b7a412952259887721ece" dependencies = [ "bstr", "gix-path", @@ -1568,9 +1621,9 @@ dependencies = [ [[package]] name = "gix-utils" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "befcdbdfb1238d2854591f760a48711bed85e72d80a10e8f2f93f656746ef7c5" +checksum = "4e477b4f07a6e8da4ba791c53c858102959703c60d70f199932010d5b94adb2c" dependencies = [ "bstr", "fastrand", @@ -1579,18 +1632,18 @@ dependencies = [ [[package]] name = "gix-validate" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ec1eff98d91941f47766367cba1be746bab662bad761d9891ae6f7882f7840b" +checksum = "e26ac2602b43eadfdca0560b81d3341944162a3c9f64ccdeef8fc501ad80dad5" dependencies = [ "bstr", ] [[package]] name = "gix-worktree" -version = "0.50.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6bd5830cbc43c9c00918b826467d2afad685b195cb82329cde2b2d116d2c578" +checksum = "d69955eb5e2910832f88d041964b809eee01dadd579237e0b55efec58fd406fd" dependencies = [ "bstr", "gix-attributes", @@ -1607,9 +1660,9 @@ dependencies = [ [[package]] name = "gix-worktree-state" -version = "0.28.0" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "644a1681f96e1be43c2a8384337d9d220e7624f50db54beda70997052aebf707" +checksum = "8a96dccbcf9e8fe0291c55f06e08da93ebb2e691c1311276f541eefcc6d70800" dependencies = [ "bstr", "gix-features", @@ -1625,9 +1678,9 @@ dependencies = [ [[package]] name = "gix-worktree-stream" -version = "0.30.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24e3fb70a1f650a5cec7d5b8d10d6d6fe86daf3cf15bde08ba0c70988a2932c3" +checksum = "9a8444b8ed4662e1a0c97f3eceda29630001a1bbb2632201e50312623e594213" dependencies = [ "gix-attributes", "gix-error", @@ -1748,11 +1801,11 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "hmac" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +checksum = "6303bc9732ae41b04cb554b844a762b4115a61bfaa81e3e83050991eeb56863f" dependencies = [ - "digest", + "digest 0.11.3", ] [[package]] @@ -1783,6 +1836,15 @@ dependencies = [ "libm", ] +[[package]] +name = "hybrid-array" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9155a582abd142abc056962c29e3ce5ff2ad5469f4246b537ed42c5deba857da" +dependencies = [ + "typenum", +] + [[package]] name = "iana-time-zone" version = "0.1.65" @@ -1829,25 +1891,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "imara-diff" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17d34b7d42178945f775e84bc4c36dde7c1c6cdfea656d3354d009056f2bb3d2" -dependencies = [ - "hashbrown 0.15.5", -] - -[[package]] -name = "imara-diff" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c" -dependencies = [ - "hashbrown 0.15.5", - "memchr", -] - [[package]] name = "include_dir" version = "0.7.4" @@ -2008,7 +2051,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "sha1", + "sha1 0.11.0", "smallvec", "thiserror 2.0.18", "tokei", @@ -2032,6 +2075,7 @@ dependencies = [ "base32", "base64", "crc32fast", + "hex", "hmac", "ignore", "include_dir", @@ -2045,8 +2089,8 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "sha1", - "sha2", + "sha1 0.11.0", + "sha2 0.11.0", "thiserror 2.0.18", "time", "tracing", @@ -2362,7 +2406,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", - "sha2", + "sha2 0.10.9", ] [[package]] @@ -2768,7 +2812,18 @@ checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures 0.2.17", - "digest", + "digest 0.10.7", +] + +[[package]] +name = "sha1" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aacc4cc499359472b4abe1bf11d0b12e688af9a805fa5e3016f9a386dc2d0214" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -2777,8 +2832,8 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89f599ac0c323ebb1c6082821a54962b839832b03984598375bff3975b804423" dependencies = [ - "digest", - "sha1", + "digest 0.10.7", + "sha1 0.10.6", ] [[package]] @@ -2789,7 +2844,18 @@ checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", "cpufeatures 0.2.17", - "digest", + "digest 0.10.7", +] + +[[package]] +name = "sha2" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "446ba717509524cb3f22f17ecc096f10f4822d76ab5c0b9822c5f9c284e825f4" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "digest 0.11.3", ] [[package]] @@ -2847,12 +2913,6 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" -[[package]] -name = "subtle" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" - [[package]] name = "syn" version = "2.0.117" @@ -3132,9 +3192,9 @@ dependencies = [ [[package]] name = "typenum" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" [[package]] name = "ucd-trie" diff --git a/src/decompress.rs b/src/decompress.rs index e0e9bea..d49e793 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -18,23 +18,23 @@ use zip::ZipArchive; /// Formats that are basically a ZIP container. pub const ZIP_BASED_FORMATS: &[&str] = &[ - "zip", "zipx", "jar", "war", "ear", "aar", "jmod", "jhm", "jnlp", "nupkg", "vsix", "xap", - "docx", "xlsx", "pptx", "odt", "ods", "odp", "odg", "odf", "epub", "gadget", "kmz", "widget", - "xpi", "sketch", "pages", "key", "numbers", "hwpx", + "zip", "zipx", "jar", "war", "ear", "aar", "apk", "aab", "ipa", "jmod", "jhm", "jnlp", "nupkg", + "vsix", "xap", "docx", "xlsx", "pptx", "odt", "ods", "odp", "odg", "odf", "epub", "gadget", + "kmz", "widget", "xpi", "sketch", "pages", "key", "numbers", "hwpx", ]; -/// Break `..` into `(Some(outer), Some(inner))`. -/// For `foo.tar.gz` this returns `("tar", "gz")`. -fn split_extensions(path: &Path) -> (Option, Option) { - let ext_inner = path.extension().and_then(|e| e.to_str()).map(|s| s.to_ascii_lowercase()); +fn is_tar_wrapped_compression(path: &Path) -> bool { + let filename = match path.file_name().and_then(|s| s.to_str()) { + Some(name) => name.to_ascii_lowercase(), + None => return false, + }; - let ext_outer = path - .file_stem() - .and_then(|s| Path::new(s).extension()) - .and_then(|e| e.to_str()) - .map(|s| s.to_ascii_lowercase()); - - (ext_outer, ext_inner) + filename.ends_with(".tgz") + || filename.ends_with(".tar.gz") + || filename.ends_with(".tar.gzip") + || filename.ends_with(".tar.bz2") + || filename.ends_with(".tar.bzip2") + || filename.ends_with(".tar.xz") } #[derive(Debug)] @@ -50,17 +50,17 @@ pub enum CompressedContent { } pub fn is_safe_extract_path(path: &Path) -> bool { - for (idx, comp) in path.components().enumerate() { + if path.is_absolute() { + return false; + } + + for comp in path.components() { match comp { // Never allow parent-directory escapes Component::ParentDir => return false, - // Leading "C:\" (Windows) or "/" (Unix) is fine; - // a prefix later in the path would be suspicious. - Component::Prefix(_) | Component::RootDir if idx == 0 => continue, - - // A prefix *inside* the path (e.g. "foo/C:\evil") is unsafe - Component::Prefix(_) => return false, + // Archive entry names must always be relative to the extraction root. + Component::Prefix(_) | Component::RootDir => return false, _ => {} } @@ -68,6 +68,17 @@ pub fn is_safe_extract_path(path: &Path) -> bool { true } +fn has_parent_or_embedded_prefix(path: &Path) -> bool { + for (idx, comp) in path.components().enumerate() { + match comp { + Component::ParentDir => return true, + Component::Prefix(_) if idx > 0 => return true, + _ => {} + } + } + false +} + fn is_zip_format(ext: &str) -> bool { ZIP_BASED_FORMATS.iter().any(|z| z == &ext) } @@ -87,6 +98,10 @@ fn handle_tar_archive_streaming( let mut entry = entry?; if entry.header().entry_type().is_file() { let path_in_tar = entry.path()?.to_string_lossy().to_string(); + if !is_safe_extract_path(Path::new(&path_in_tar)) { + tracing::warn!("unsafe tar path: {path_in_tar}"); + continue; + } let logical_path = format!("{}!{}", archive_path.display(), path_in_tar); let out_path = base_dir.join(&path_in_tar); @@ -96,10 +111,6 @@ fn handle_tar_archive_streaming( continue; } } - if !is_safe_extract_path(&out_path) { - tracing::warn!("unsafe tar path: {}", out_path.display()); - continue; - } match fs::File::create(&out_path) { Ok(mut out_file) => { if let Err(e) = std::io::copy(&mut entry, &mut out_file) { @@ -118,6 +129,119 @@ fn handle_tar_archive_streaming( Ok(CompressedContent::ArchiveFiles(entries_on_disk)) } +/// Extract every file entry in a ZIP-based archive directly from a byte +/// slice, without touching the filesystem. Intended for the git-blob +/// scan path where blobs already sit in memory and writing them out to a +/// temp file just to read them back imposes substantial overhead in +/// monorepos with many committed `.jar`/`.zip`/`.apk` artifacts. +/// +/// `archive_label` is used to construct logical entry paths of the form +/// `!`, matching the convention used by the +/// streaming-to-disk path. +/// +/// The same per-entry decompressed-size cap as the streaming-to-disk +/// extractor is enforced so that ZIP bombs cannot allocate unbounded +/// memory. +/// Maximum compressed archive size that the in-memory ZIP extractor will +/// accept. Larger archives fall back to the disk-streaming path so that we +/// never hold both the archive bytes AND every decompressed entry in RAM +/// simultaneously. The threshold is intentionally generous — most committed +/// `.jar`/`.zip`/`.apk` artifacts in real repos are well under 64 MB. +pub const MAX_INMEM_ZIP_ARCHIVE_BYTES: usize = 64 * 1024 * 1024; + +/// Aggregate cap on total decompressed bytes the in-memory ZIP extractor +/// will accumulate per archive. Bounds the worst-case footprint of one +/// rayon worker processing one archive: with `num_jobs` workers running +/// in parallel, peak resident memory is bounded by `num_jobs * this`. +/// Independent of the per-entry cap, so a single bomb-style entry can't +/// drain it all but neither can N medium-sized entries. +pub const MAX_INMEM_ZIP_DECOMPRESSED_BYTES: u64 = 256 * 1024 * 1024; + +pub fn extract_zip_archive_in_memory( + data: &[u8], + archive_label: &str, +) -> Result)>> { + if data.len() > MAX_INMEM_ZIP_ARCHIVE_BYTES { + anyhow::bail!( + "zip archive {archive_label} is {} bytes, exceeding {} byte in-memory cap", + data.len(), + MAX_INMEM_ZIP_ARCHIVE_BYTES + ); + } + + // Per-entry cap on decompressed bytes: bounds memory cost of zip bombs. + // Mirrors the disk-streaming variant's cap. + // nosemgrep: this is the defensive cap — do not flag for missing-limit rules. + const MAX_ZIP_ENTRY_DECOMPRESSED_BYTES: u64 = 512 * 1024 * 1024; + + let cursor = std::io::Cursor::new(data); + let mut zip = ZipArchive::new(cursor)?; + let mut entries = Vec::with_capacity(zip.len()); + let mut total_decompressed: u64 = 0; + + for i in 0..zip.len() { + if total_decompressed >= MAX_INMEM_ZIP_DECOMPRESSED_BYTES { + tracing::warn!( + "in-memory zip {archive_label} exceeded {MAX_INMEM_ZIP_DECOMPRESSED_BYTES} byte aggregate cap at entry {i}/{}; truncating", + zip.len() + ); + break; + } + + let mut zipped_file = match zip.by_index(i) { + Ok(f) => f, + Err(e) => { + tracing::debug!("zip entry {i} read failed: {e}"); + continue; + } + }; + if !zipped_file.is_file() { + continue; + } + let name_in_zip = zipped_file.name().to_string(); + // Defense in depth: refuse traversal-style names. The in-memory + // path never writes to disk, but downstream code may construct + // file URLs from these strings. + if !is_safe_extract_path(Path::new(&name_in_zip)) { + tracing::warn!("unsafe zip entry name in {archive_label}: {name_in_zip}"); + continue; + } + + // The remaining-budget cap on this read serves two purposes: + // (1) honor the aggregate budget exactly even if one entry would + // individually push us over it, and (2) keep the existing + // per-entry zip-bomb cap of 512 MB as a hard upper bound. + let remaining = MAX_INMEM_ZIP_DECOMPRESSED_BYTES.saturating_sub(total_decompressed); + let entry_cap = remaining.min(MAX_ZIP_ENTRY_DECOMPRESSED_BYTES); + + let mut buf = Vec::new(); + let mut limited = (&mut zipped_file).take(entry_cap); + if let Err(e) = limited.read_to_end(&mut buf) { + tracing::debug!( + "failed to decompress zip entry {name_in_zip} from {archive_label}: {e}" + ); + continue; + } + if buf.len() as u64 == entry_cap && entry_cap == MAX_ZIP_ENTRY_DECOMPRESSED_BYTES { + tracing::warn!( + "zip entry {name_in_zip} in {archive_label} exceeded {MAX_ZIP_ENTRY_DECOMPRESSED_BYTES} byte cap; truncating" + ); + } + total_decompressed += buf.len() as u64; + entries.push((format!("{archive_label}!{name_in_zip}"), buf)); + } + Ok(entries) +} + +/// Return true if `data` begins with a standard ZIP signature — used to +/// short-circuit extraction attempts on blobs whose extension matches a +/// ZIP-based format but whose contents are not actually a real ZIP. +pub fn looks_like_zip(data: &[u8]) -> bool { + data.starts_with(b"PK\x03\x04") + || data.starts_with(b"PK\x05\x06") + || data.starts_with(b"PK\x07\x08") +} + fn handle_zip_archive_streaming( file: &mut fs::File, archive_path: &Path, @@ -130,11 +254,26 @@ fn handle_zip_archive_streaming( let mut zip = ZipArchive::new(file)?; let mut entries_on_disk = Vec::new(); + let mut total_decompressed: u64 = 0; for i in 0..zip.len() { + if total_decompressed >= MAX_INMEM_ZIP_DECOMPRESSED_BYTES { + tracing::warn!( + "zip archive {} exceeded {} byte aggregate cap at entry {i}/{}; truncating", + archive_path.display(), + MAX_INMEM_ZIP_DECOMPRESSED_BYTES, + zip.len() + ); + break; + } + let mut zipped_file = zip.by_index(i)?; if zipped_file.is_file() { let name_in_zip = zipped_file.name().to_string(); + if !is_safe_extract_path(Path::new(&name_in_zip)) { + tracing::warn!("unsafe zip path: {name_in_zip}"); + continue; + } let logical_path = format!("{}!{}", archive_path.display(), name_in_zip); let out_path = base_dir.join(&name_in_zip); @@ -144,13 +283,12 @@ fn handle_zip_archive_streaming( continue; } } - if !is_safe_extract_path(&out_path) { - tracing::warn!("unsafe zip path: {}", out_path.display()); - continue; - } match fs::File::create(&out_path) { Ok(mut out_file) => { - let mut limited = (&mut zipped_file).take(MAX_ZIP_ENTRY_DECOMPRESSED_BYTES); + let remaining = + MAX_INMEM_ZIP_DECOMPRESSED_BYTES.saturating_sub(total_decompressed); + let entry_cap = remaining.min(MAX_ZIP_ENTRY_DECOMPRESSED_BYTES); + let mut limited = (&mut zipped_file).take(entry_cap); let copied = match std::io::copy(&mut limited, &mut out_file) { Ok(n) => n, Err(e) => { @@ -158,7 +296,8 @@ fn handle_zip_archive_streaming( continue; } }; - if copied == MAX_ZIP_ENTRY_DECOMPRESSED_BYTES { + total_decompressed += copied; + if copied == entry_cap && entry_cap == MAX_ZIP_ENTRY_DECOMPRESSED_BYTES { tracing::warn!( "zip entry {} exceeded {} byte cap; truncating", out_path.display(), @@ -166,6 +305,14 @@ fn handle_zip_archive_streaming( ); } entries_on_disk.push((logical_path, out_path)); + if total_decompressed >= MAX_INMEM_ZIP_DECOMPRESSED_BYTES { + tracing::warn!( + "zip archive {} reached {} byte aggregate cap; truncating remaining entries", + archive_path.display(), + MAX_INMEM_ZIP_DECOMPRESSED_BYTES + ); + break; + } } Err(e) => { tracing::debug!("failed to create file {}: {}", out_path.display(), e); @@ -277,7 +424,7 @@ fn handle_asar_archive_in_memory(buffer: &[u8], archive_path: &Path) -> Result Result { - if !is_safe_extract_path(path) { + if has_parent_or_embedded_prefix(path) { anyhow::bail!("unsafe input path during decompression: {}", path.display()); } Ok(fs::File::open(path)?) @@ -285,7 +432,7 @@ fn safe_open_for_read(path: &Path) -> Result { /// Validate and create a file for writing, checking for path traversal attacks. fn safe_create_for_write(path: &Path) -> Result { - if !is_safe_extract_path(path) { + if has_parent_or_embedded_prefix(path) { anyhow::bail!("unsafe output path during decompression: {}", path.display()); } Ok(fs::File::create(path)?) @@ -346,7 +493,7 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result { + "gz" | "gzip" | "tgz" => { let out_path = make_output_path(path, base_dir, "decomp.tar"); let decoder = GzDecoder::new(BufReader::new(safe_open_for_read(path)?)); return stream_to_file(decoder, &out_path); @@ -383,12 +530,13 @@ pub fn decompress_file(path: &Path, base_dir: Option<&Path>) -> Result; loop { + let should_extract_tar = is_tar_wrapped_compression(current_path); let content = decompress_once(current_path, base_dir)?; // If the step produced a single on-disk file that is itself a .tar, // recurse on that file. if let CompressedContent::RawFile(ref p) = content { - if split_extensions(p).0.as_deref() == Some("tar") { + if should_extract_tar { owned_buf = Some(p.clone()); // own the path current_path = owned_buf.as_ref().unwrap(); continue; @@ -466,7 +614,7 @@ mod tests { use tempfile::tempdir; use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions}; - use super::{CompressedContent, decompress_once}; + use super::{CompressedContent, decompress_file_to_temp, decompress_once}; /// 1) Fully unpack: /// - 1st decompress `.gz` -- get a `.tar` file @@ -523,6 +671,45 @@ mod tests { Ok(()) } + #[test] + fn smoke_decompress_tgz_archive() -> anyhow::Result<()> { + let dir = tempdir()?; + let tgz = dir.path().join("payload.tgz"); + let github_pat = "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"; // this is not a real secret + + { + let f = File::create(&tgz)?; + let gz = GzEncoder::new(f, Compression::default()); + let mut tar = Builder::new(gz); + + let data = format!("token={github_pat}\n"); + let mut hdr = tar::Header::new_gnu(); + hdr.set_size(data.len() as u64); + hdr.set_mode(0o644); + hdr.set_cksum(); + tar.append_data(&mut hdr, "secret.txt", data.as_bytes())?; + + tar.into_inner()?.finish()?; + } + + let (content, _tmp) = decompress_file_to_temp(&tgz)?; + if let CompressedContent::ArchiveFiles(files) = content { + let mut found = false; + for (logical, path) in files { + if logical.ends_with("payload.tgz!secret.txt") { + let txt = std::fs::read_to_string(&path)?; + assert!(txt.contains(github_pat)); + found = true; + } + } + assert!(found, "did not find secret.txt in tgz ArchiveFiles"); + } else { + panic!("expected ArchiveFiles for tgz archive, got {:?}", content); + } + + Ok(()) + } + /// 2) No-extract flag: just peel the `.gz` layer (no base_dir -- use NamedTempFile), and verify /// you get back a RawFile, whose contents are the tar archive itself. #[test] @@ -696,6 +883,50 @@ mod tests { Ok(()) } + #[test] + fn smoke_decompress_apk_archive() -> anyhow::Result<()> { + // APKs are ZIP containers. We expect Kingfisher to recognize the .apk + // extension and extract its entries so embedded secrets get scanned. + let dir = tempdir()?; + let apk_path = dir.path().join("aws_leak.apk"); + let aws_key = "AKIAIOSFODNN7EXAMPLE"; // canonical AWS sample, not real + + { + let file = File::create(&apk_path)?; + let mut zip = ZipWriter::new(file); + let options = SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .unix_permissions(0o644); + + zip.start_file("res/values/strings.xml", options)?; + zip.write_all( + format!( + "{aws_key}" + ) + .as_bytes(), + )?; + zip.finish()?; + } + + let tmp = tempdir()?; + let content = decompress_once(&apk_path, Some(tmp.path()))?; + if let CompressedContent::ArchiveFiles(files) = content { + let mut found = false; + for (logical, path) in files { + if logical.ends_with("!res/values/strings.xml") { + let txt = std::fs::read_to_string(&path)?; + assert!(txt.contains(aws_key)); + found = true; + } + } + assert!(found, "did not find res/values/strings.xml in apk ArchiveFiles"); + } else { + panic!("expected ArchiveFiles for apk archive, got {:?}", content); + } + + Ok(()) + } + #[test] fn smoke_decompress_hwpx_archive() -> anyhow::Result<()> { let dir = tempdir()?; diff --git a/src/github.rs b/src/github.rs index 29c6f0c..cd9f921 100644 --- a/src/github.rs +++ b/src/github.rs @@ -8,12 +8,8 @@ use std::{ use anyhow::{Context, Result}; use indicatif::{ProgressBar, ProgressStyle}; -use octorust::{ - Client, - auth::Credentials, - types::{Order, ReposListOrgSort, ReposListOrgType, ReposListUserType}, -}; use reqwest::StatusCode; +use reqwest::header::HeaderMap; use serde::Deserialize; use serde_json::Value; use tracing::{info, warn}; @@ -33,6 +29,11 @@ struct GitHubRepo { fork: bool, } +#[derive(Deserialize)] +struct GitHubOrg { + login: String, +} + #[derive(Debug)] pub struct RepoSpecifiers { pub user: Vec, @@ -52,21 +53,20 @@ pub enum RepoType { Source, Fork, } -impl From for ReposListUserType { - fn from(repo_type: RepoType) -> Self { - match repo_type { - RepoType::All => ReposListUserType::All, - RepoType::Source => ReposListUserType::Owner, - RepoType::Fork => ReposListUserType::Member, +impl RepoType { + fn user_query_value(&self) -> &'static str { + match self { + RepoType::All => "all", + RepoType::Source => "owner", + RepoType::Fork => "member", } } -} -impl From for ReposListOrgType { - fn from(repo_type: RepoType) -> Self { - match repo_type { - RepoType::All => ReposListOrgType::All, - RepoType::Source => ReposListOrgType::Sources, - RepoType::Fork => ReposListOrgType::Forks, + + fn org_query_value(&self) -> &'static str { + match self { + RepoType::All => "all", + RepoType::Source => "sources", + RepoType::Fork => "forks", } } } @@ -128,34 +128,13 @@ fn build_exclude_matcher(exclude_repos: &[String]) -> git_host::ExcludeMatcher { fn should_exclude_repo(clone_url: &str, excludes: &git_host::ExcludeMatcher) -> bool { git_host::should_exclude_repo(clone_url, excludes, parse_repo_name_from_url) } -fn create_github_client(github_url: &url::Url, ignore_certs: bool) -> Result> { - // Try personal access token - let credentials = if let Ok(token) = env::var("KF_GITHUB_TOKEN") { - Credentials::Token(token) - } else { - Credentials::Token("".to_string()) // Anonymous access - }; - +fn create_github_client(ignore_certs: bool) -> Result> { let mut client_builder = reqwest::Client::builder(); if ignore_certs { client_builder = client_builder.danger_accept_invalid_certs(ignore_certs); } - let reqwest_client = client_builder.build().context("Failed to build HTTP client")?; - - let http_client = reqwest_middleware_octorust::ClientBuilder::new(reqwest_client).build(); - - let mut client = Client::custom( - concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")), - credentials, - http_client, - ); - - // Override host if not using api.github.com - if github_url.host_str() != Some("api.github.com") { - client.with_host_override(github_url.as_str()); - } - Ok(Arc::new(client)) + Ok(Arc::new(client_builder.build().context("Failed to build HTTP client")?)) } fn normalize_api_base(api_url: &Url) -> Url { @@ -167,6 +146,108 @@ fn normalize_api_base(api_url: &Url) -> Url { base } +fn github_token() -> Option { + env::var("KF_GITHUB_TOKEN").ok().filter(|t| !t.is_empty()) +} + +fn github_get(client: &reqwest::Client, url: Url, token: Option<&str>) -> reqwest::RequestBuilder { + let req = client.get(url).header("User-Agent", GLOBAL_USER_AGENT.as_str()); + if let Some(token) = token { req.bearer_auth(token) } else { req } +} + +async fn ensure_github_success(resp: reqwest::Response, action: &str) -> Result { + if resp.status().is_success() { + return Ok(resp); + } + + let status = resp.status(); + let url = resp.url().clone(); + warn_on_rate_limit("GitHub", status, action); + + let mut body = resp.text().await.unwrap_or_default(); + if body.len() > 512 { + body.truncate(512); + } + anyhow::bail!("GitHub API request failed while {action}: HTTP {status} ({url}): {body}"); +} + +fn is_github_soft_limit_status(status: StatusCode) -> bool { + matches!(status, StatusCode::FORBIDDEN | StatusCode::TOO_MANY_REQUESTS) +} + +fn github_next_link(headers: &HeaderMap) -> Option { + let raw = headers.get(reqwest::header::LINK)?.to_str().ok()?; + raw.split(',').find_map(|part| { + let (url_part, params) = part.trim().split_once(';')?; + if !params.split(';').any(|param| param.trim() == "rel=\"next\"") { + return None; + } + let url = url_part.trim().strip_prefix('<')?.strip_suffix('>')?; + Url::parse(url).ok() + }) +} + +async fn fetch_github_orgs( + client: &reqwest::Client, + api_base: &Url, + token: Option<&str>, +) -> Result> { + let mut orgs = Vec::new(); + let mut next_url = { + let mut url = api_base.join("organizations").context("Failed to build GitHub orgs URL")?; + url.query_pairs_mut().append_pair("per_page", "100"); + Some(url) + }; + + while let Some(url) = next_url { + let resp = ensure_github_success( + github_get(client, url, token).send().await?, + "listing organizations", + ) + .await?; + next_url = github_next_link(resp.headers()); + let page_orgs: Vec = resp.json().await?; + if page_orgs.is_empty() { + break; + } + orgs.extend(page_orgs.into_iter().map(|org| org.login)); + } + + Ok(orgs) +} + +async fn fetch_github_repos( + client: &reqwest::Client, + api_base: &Url, + path: &str, + repo_type: &str, + token: Option<&str>, + action: &str, +) -> Result> { + let mut repos = Vec::new(); + let mut page = 1; + + loop { + let mut url = api_base.join(path).context("Failed to build GitHub repositories URL")?; + url.query_pairs_mut() + .append_pair("per_page", "100") + .append_pair("page", &page.to_string()) + .append_pair("type", repo_type) + .append_pair("sort", "created") + .append_pair("direction", "desc"); + let resp = + ensure_github_success(github_get(client, url, token).send().await?, action).await?; + let page_repos: Vec = resp.json().await?; + if page_repos.is_empty() { + break; + } + repos.extend(page_repos); + page += 1; + } + + Ok(repos) +} + pub async fn enumerate_contributor_repo_urls( repo_url: &GitUrl, github_api_url: &Url, @@ -179,7 +260,7 @@ pub async fn enumerate_contributor_repo_urls( let (_, owner, repo) = parse_repo(repo_url).context("invalid GitHub repo URL")?; let exclude_set = build_exclude_matcher(exclude_repos); let client = reqwest::Client::builder().danger_accept_invalid_certs(ignore_certs).build()?; - let token = env::var("KF_GITHUB_TOKEN").ok().filter(|t| !t.is_empty()); + let token = github_token(); let api_base = normalize_api_base(github_api_url); let mut contributor_logins = Vec::new(); @@ -190,15 +271,12 @@ pub async fn enumerate_contributor_repo_urls( .join(&format!("repos/{owner}/{repo}/contributors")) .context("Failed to build GitHub contributors URL")?; url.query_pairs_mut().append_pair("per_page", "100").append_pair("page", &page.to_string()); - let mut req = client.get(url).header("User-Agent", GLOBAL_USER_AGENT.as_str()); - if let Some(token) = token.as_ref() { - req = req.bearer_auth(token); - } - let resp = req.send().await?; - if !resp.status().is_success() { + let resp = github_get(&client, url, token.as_deref()).send().await?; + if is_github_soft_limit_status(resp.status()) { warn_on_rate_limit("GitHub", resp.status(), "listing contributors"); break; } + let resp = ensure_github_success(resp, "listing contributors").await?; let contributors: Vec = resp.json().await?; if contributors.is_empty() { break; @@ -251,15 +329,12 @@ pub async fn enumerate_contributor_repo_urls( .append_pair("type", "all") .append_pair("sort", "updated") .append_pair("direction", "desc"); - let mut req = client.get(url).header("User-Agent", GLOBAL_USER_AGENT.as_str()); - if let Some(token) = token.as_ref() { - req = req.bearer_auth(token); - } - let resp = req.send().await?; - if !resp.status().is_success() { + let resp = github_get(&client, url, token.as_deref()).send().await?; + if is_github_soft_limit_status(resp.status()) { warn_on_rate_limit("GitHub", resp.status(), "listing user repositories"); break; } + let resp = ensure_github_success(resp, "listing user repositories").await?; let repos: Vec = resp.json().await?; if repos.is_empty() { break; @@ -351,22 +426,22 @@ pub async fn enumerate_repo_urls( ignore_certs: bool, mut progress: Option<&mut ProgressBar>, ) -> Result> { - let client = create_github_client(&github_url, ignore_certs)?; + let client = create_github_client(ignore_certs)?; let mut repo_urls = Vec::new(); let exclude_set = build_exclude_matcher(&repo_specifiers.exclude_repos); - let user_repo_type: ReposListUserType = repo_specifiers.repo_filter.clone().into(); - let org_repo_type: ReposListOrgType = repo_specifiers.repo_filter.clone().into(); + let api_base = normalize_api_base(&github_url); + let token = github_token(); for username in &repo_specifiers.user { - let repos = client - .repos() - .list_all_for_user( - username, - user_repo_type.clone(), - ReposListOrgSort::Created, - Order::Desc, - ) - .await?; - repo_urls.extend(repos.body.into_iter().filter_map(|repo| { + let repos = fetch_github_repos( + &client, + &api_base, + &format!("users/{username}/repos"), + repo_specifiers.repo_filter.user_query_value(), + token.as_deref(), + "listing user repositories", + ) + .await?; + repo_urls.extend(repos.into_iter().filter_map(|repo| { let clone_url = repo.clone_url; if should_exclude_repo(&clone_url, &exclude_set) { None } else { Some(clone_url) } })); @@ -375,24 +450,21 @@ pub async fn enumerate_repo_urls( } } let orgs = if repo_specifiers.all_organizations { - let mut all_orgs = Vec::new(); - let org_list = client.orgs().list_all(100).await?; - all_orgs.extend(org_list.body.into_iter().map(|org| org.login)); - all_orgs + fetch_github_orgs(&client, &api_base, token.as_deref()).await? } else { repo_specifiers.organization.clone() }; for org_name in orgs { - let repos = client - .repos() - .list_all_for_org( - &org_name, - org_repo_type.clone(), - ReposListOrgSort::Created, - Order::Desc, - ) - .await?; - repo_urls.extend(repos.body.into_iter().filter_map(|repo| { + let repos = fetch_github_repos( + &client, + &api_base, + &format!("orgs/{org_name}/repos"), + repo_specifiers.repo_filter.org_query_value(), + token.as_deref(), + "listing organization repositories", + ) + .await?; + repo_urls.extend(repos.into_iter().filter_map(|repo| { let clone_url = repo.clone_url; if should_exclude_repo(&clone_url, &exclude_set) { None } else { Some(clone_url) } })); @@ -675,4 +747,29 @@ mod tests { assert!(should_exclude_repo("https://github.com/owner/project-archive.git", &excludes)); assert!(!should_exclude_repo("https://github.com/owner/project.git", &excludes)); } + + #[test] + fn github_next_link_parses_next_relation() { + let mut headers = HeaderMap::new(); + headers.insert( + reqwest::header::LINK, + r#"; rel="next", ; rel="first""# + .parse() + .unwrap(), + ); + + let next = github_next_link(&headers).unwrap(); + assert_eq!(next.as_str(), "https://api.github.com/organizations?since=42"); + } + + #[test] + fn github_next_link_returns_none_without_next_relation() { + let mut headers = HeaderMap::new(); + headers.insert( + reqwest::header::LINK, + r#"; rel="first""#.parse().unwrap(), + ); + + assert!(github_next_link(&headers).is_none()); + } } diff --git a/src/lib.rs b/src/lib.rs index f4b5314..d9b5c32 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,6 +94,9 @@ struct EnumeratorConfig { repo_scan_timeout: Duration, exclude_globset: Option>, git_diff: Option, + /// Whether archive blobs encountered during git scanning should be + /// transparently extracted before pattern matching. + extract_archives: bool, } pub enum FoundInput { diff --git a/src/main.rs b/src/main.rs index 7752885..4cda1d7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -979,11 +979,7 @@ fn build_config_yaml( // round-trip. Pull the raw CLI/env string from `ArgMatches` instead so // the emitted YAML matches what the user actually passed. fn raw_arg_string(matches: &clap::ArgMatches, id: &str) -> Option { - matches - .get_raw(id) - .and_then(|mut v| v.next()) - .and_then(|s| s.to_str()) - .map(str::to_owned) + matches.get_raw(id).and_then(|mut v| v.next()).and_then(|s| s.to_str()).map(str::to_owned) } if user_set(sub_matches, "github_api_url") { git.github_api_url = raw_arg_string(sub_matches, "github_api_url"); @@ -2326,6 +2322,7 @@ alerts: assert_eq!( cfg.git.github_api_url.as_deref(), Some("https://ghe.corp.example.com/api/v3/"), + "github_api_url must preserve a user-supplied trailing slash", ); } diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index c9aafc8..77dae44 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -1,4 +1,5 @@ use std::{ + io::Read, marker::PhantomData, path::Path, process::Command, @@ -30,7 +31,10 @@ use crate::{ binary::is_binary, blob::{Blob, BlobAppearance, BlobId, BlobIdMap}, cli::commands::{github::GitHistoryMode, scan}, - decompress::{CompressedContent, decompress_file_to_temp}, + decompress::{ + CompressedContent, MAX_INMEM_ZIP_ARCHIVE_BYTES, ZIP_BASED_FORMATS, decompress_file_to_temp, + extract_zip_archive_in_memory, looks_like_zip, + }, findings_store, git_commit_metadata::CommitMetadata, git_repo_enumerator::{GitBlobMetadata, GitBlobSource, MIN_SCANNABLE_BLOB_SIZE}, @@ -156,6 +160,7 @@ pub fn enumerate_filesystem_inputs( repo_scan_timeout, exclude_globset: exclude_globset.clone(), git_diff: diff_config.clone(), + extract_archives: !args.content_filtering_args.no_extract_archives, }; let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs); let datastore_writer_thread = @@ -208,12 +213,10 @@ pub fn enumerate_filesystem_inputs( } Ok(entry) => entry, }; - // Check if this is an archive file - let is_archive = if let Origin::File(file_origin) = &origin.first() { - is_compressed_file(&file_origin.path) - } else { - false - }; + // Check if this is an archive file. `blob_path()` covers both filesystem and git + // origins, so archive/binary filtering stays consistent across input modes. + let is_archive = + origin.first().blob_path().map(is_compressed_file).unwrap_or(false); let is_binary = is_binary(&blob.bytes()); let should_skip = if is_archive { // For archives: skip only if --no_extract_archives is true @@ -517,10 +520,248 @@ impl FileResult { } } +/// Extract an archive blob loaded from a git ODB. +/// +/// `blob_path` is the in-tree path the blob was first seen at (used both to +/// pick an extension and to label the resulting per-entry origins so reports +/// look like `aws_leak.apk!classes4.dex`). `data` is the raw blob bytes. +/// +/// Returns `Ok(None)` when the path is not a recognized archive format — +/// the caller should fall back to scanning the blob's raw bytes. Returns +/// `Ok(Some(entries))` with one element per extracted entry on success. +/// Returns `Err` only on infrastructure failures (failed to write temp +/// file, etc.); decompression errors return `Ok(None)` so the caller can +/// still scan the raw blob. +fn try_extract_git_blob_archive( + blob_path: &str, + data: &[u8], +) -> Result)>>> { + let pb = PathBuf::from(blob_path); + if !is_compressed_file(&pb) { + return Ok(None); + } + + // Use the repo-relative path in reports while staging the blob under its basename so the + // decompressor still dispatches on the original extension. + let archive_label = blob_path.to_string(); + let staged_name = pb.file_name().and_then(|s| s.to_str()).unwrap_or("blob").to_string(); + + // ── fast path: ZIP-based archives extract entirely in memory ── + // + // For monorepos with many committed `.jar`/`.zip`/`.apk`/`.aar` + // artifacts, the disk-staging path below imposes substantial + // overhead per blob (mkdir + stage write + per-entry tempfile + + // re-read into memory). Since the blob bytes are already in memory + // here, we skip the round-trip entirely for ZIP-based formats — + // this is the dominant archive type committed to git in practice. + // + // Memory bound: archives larger than `MAX_INMEM_ZIP_ARCHIVE_BYTES` + // (64 MB) fall through to the disk-streaming path so a single + // worker never holds the archive bytes AND every decompressed + // entry resident at once. The fast path additionally caps total + // decompressed bytes per archive (see + // `MAX_INMEM_ZIP_DECOMPRESSED_BYTES` in `decompress.rs`). + let zip_based_ext = pb + .extension() + .and_then(|s| s.to_str()) + .map(|s| s.to_ascii_lowercase()) + .filter(|ext| ZIP_BASED_FORMATS.iter().any(|z| z == ext)); + + if let Some(_ext) = zip_based_ext.as_ref() { + // Cheap magic-byte check first: if a `.zip`-named blob is not + // actually a ZIP (truncated download, stub file, accidental + // rename), skip extraction so the caller scans the raw bytes. + if !looks_like_zip(data) { + return Ok(None); + } + if data.len() <= MAX_INMEM_ZIP_ARCHIVE_BYTES { + return match extract_zip_archive_in_memory(data, &archive_label) { + Ok(entries) => Ok(Some(entries)), + Err(e) => { + debug!( + "in-memory zip extract failed for {archive_label}: {e:#}; falling back to raw scan" + ); + Ok(None) + } + }; + } + debug!( + "{archive_label} is {} bytes (> {} MB cap); falling back to disk streaming extractor", + data.len(), + MAX_INMEM_ZIP_ARCHIVE_BYTES / (1024 * 1024) + ); + // fall through to the disk-streaming path below + } + + // ── slow path: tar/gz/bz2/xz/zlib/asar/hwp/egg etc. via tempfile, + // and large ZIP-based archives that exceeded the + // in-memory cap above. ── + let staging = tempfile::tempdir().context("Failed to create staging tempdir for git blob")?; + let staged_path = staging.path().join(&staged_name); + std::fs::write(&staged_path, data) + .with_context(|| format!("Failed to stage blob to {}", staged_path.display()))?; + + let (content, _td) = match decompress_file_to_temp(&staged_path) { + Ok(c) => c, + Err(e) => { + debug!("decompress_file_to_temp({}) failed: {e:#}", staged_path.display()); + return Ok(None); + } + }; + + use crate::decompress::CompressedContent; + let strip_logical_prefix = |logical: String| -> String { + // decompress_file_to_temp builds logicals as + // `!`. Replace the staged-path prefix with the + // real repo-relative archive path so report paths look like + // `dir/aws_leak.apk!res/values/strings.xml`. + match logical.split_once('!') { + Some((_, entry)) => format!("{}!{}", archive_label, entry), + None => format!("{}!{}", archive_label, logical), + } + }; + + // Aggregate cap on bytes accumulated by this wrapper. The on-disk + // entries themselves were already bounded during decompression by + // per-entry caps; this cap bounds the size of the final + // `Vec<(String, Vec)>` we hand back. Without it, a JAR with N + // medium-sized entries could push num_jobs * N * entry_size bytes + // resident across the rayon pool. + const MAX_DISK_PATH_AGGREGATE_BYTES: u64 = 256 * 1024 * 1024; + + let entries = match content { + CompressedContent::Archive(files) => { + let mut out = Vec::with_capacity(files.len()); + let mut total: u64 = 0; + for (logical, bytes) in files { + if total >= MAX_DISK_PATH_AGGREGATE_BYTES { + debug!( + "{archive_label} disk-archive aggregate cap of {MAX_DISK_PATH_AGGREGATE_BYTES} bytes reached; truncating remaining entries" + ); + break; + } + let remaining = MAX_DISK_PATH_AGGREGATE_BYTES - total; + if bytes.len() as u64 > remaining { + debug!( + "{archive_label} disk-archive aggregate cap reached while reading {}; truncating entry", + logical + ); + let take = remaining as usize; + out.push((strip_logical_prefix(logical), bytes[..take].to_vec())); + break; + } + total += bytes.len() as u64; + out.push((strip_logical_prefix(logical), bytes)); + } + out + } + + CompressedContent::ArchiveFiles(disk_entries) => { + let mut out = Vec::with_capacity(disk_entries.len()); + let mut total: u64 = 0; + for (logical, disk_path) in disk_entries { + if total >= MAX_DISK_PATH_AGGREGATE_BYTES { + debug!( + "{archive_label} disk-archive aggregate cap of {MAX_DISK_PATH_AGGREGATE_BYTES} bytes reached; truncating remaining entries" + ); + break; + } + let remaining = MAX_DISK_PATH_AGGREGATE_BYTES - total; + let entry_len = match std::fs::metadata(&disk_path) { + Ok(md) => md.len(), + Err(e) => { + debug!("Failed to stat extracted entry {}: {e}", disk_path.display()); + continue; + } + }; + let file = match std::fs::File::open(&disk_path) { + Ok(file) => file, + Err(e) => { + debug!("Failed to open extracted entry {}: {e}", disk_path.display()); + continue; + } + }; + let to_read = entry_len.min(remaining); + let mut bytes = Vec::with_capacity(to_read as usize); + match file.take(to_read).read_to_end(&mut bytes) { + Ok(_) => { + total += bytes.len() as u64; + out.push((strip_logical_prefix(logical), bytes)); + if entry_len > remaining { + debug!( + "{archive_label} disk-archive aggregate cap reached while reading {}; truncating entry", + disk_path.display() + ); + break; + } + } + Err(e) => { + debug!("Failed to read extracted entry {}: {e}", disk_path.display()); + } + } + } + out + } + + // Single-stream decompression (gz/bz2/xz/zlib) gives one logical + // payload; cap it just like aggregate archive-entry reads. + CompressedContent::Raw(mut bytes) => { + if bytes.len() as u64 > MAX_DISK_PATH_AGGREGATE_BYTES { + debug!( + "{archive_label} single-stream payload exceeded {MAX_DISK_PATH_AGGREGATE_BYTES} byte cap; truncating" + ); + bytes.truncate(MAX_DISK_PATH_AGGREGATE_BYTES as usize); + } + vec![(format!("{}!content", archive_label), bytes)] + } + CompressedContent::RawFile(path) => { + let payload_len = match std::fs::metadata(&path) { + Ok(md) => md.len(), + Err(e) => { + debug!("Failed to stat decompressed payload {}: {e}", path.display()); + return Ok(None); + } + }; + let file = match std::fs::File::open(&path) { + Ok(file) => file, + Err(e) => { + debug!("Failed to open decompressed payload {}: {e}", path.display()); + return Ok(None); + } + }; + let to_read = payload_len.min(MAX_DISK_PATH_AGGREGATE_BYTES); + let mut bytes = Vec::with_capacity(to_read as usize); + if let Err(e) = file.take(to_read).read_to_end(&mut bytes) { + debug!("Failed to read decompressed payload {}: {e}", path.display()); + return Ok(None); + } + if payload_len > MAX_DISK_PATH_AGGREGATE_BYTES { + debug!( + "{archive_label} single-stream payload exceeded {MAX_DISK_PATH_AGGREGATE_BYTES} byte cap; truncating" + ); + } + vec![(format!("{}!content", archive_label), bytes)] + } + }; + + if entries.is_empty() { Ok(None) } else { Ok(Some(entries)) } +} + +fn archive_entry_suffix<'a>(entry_logical: &'a str, archive_path: &str) -> Option<&'a str> { + entry_logical.strip_prefix(archive_path).filter(|suffix| suffix.starts_with('!')).or_else( + || entry_logical.split_once('!').map(|(archive, _)| &entry_logical[archive.len()..]), + ) +} + // A marker so the struct itself carries the lifetime. struct GitRepoResultIter<'a> { inner: GitRepoResult, deadline: std::time::Instant, + /// When true, blobs whose in-tree path matches a known archive format + /// (zip/jar/apk/tar/gz/...) are extracted before scanning, so secrets + /// inside the archive can be matched. When false, archive blobs are + /// scanned as raw compressed bytes (legacy behavior). + extract_archives: bool, _marker: std::marker::PhantomData<&'a ()>, } @@ -534,6 +775,8 @@ impl ParallelBlobIterator for GitRepoResult { Ok(Some(GitRepoResultIter { inner: self, deadline: Instant::now() + PLACEHOLDER, + // Default to enabled; the dispatch site overrides from CLI args. + extract_archives: true, _marker: std::marker::PhantomData, })) } @@ -551,12 +794,18 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { let repo_path = Arc::new(self.inner.path.clone()); let deadline = self.deadline; let flag = Arc::new(AtomicBool::new(false)); // first-timeout gate + let extract_archives = self.extract_archives; + // Loads one git blob and returns one *or more* `(OriginSet, Blob)` + // tuples: a single tuple for normal blobs, multiple tuples for + // archive blobs (zip/jar/apk/...) whose entries get unpacked into + // synthetic per-entry blobs so pattern matchers can see the + // contents. See `try_extract_git_blob_archive` below. let load_blob = { let repo_path = Arc::clone(&repo_path); let flag = Arc::clone(&flag); - move |repo: &mut GixRepo, md: GitBlobMetadata| -> Result<(OriginSet, Blob)> { + move |repo: &mut GixRepo, md: GitBlobMetadata| -> Result)>> { if StdInstant::now() > deadline { if flag.swap(true, Ordering::Relaxed) { bail!("__timeout_silenced__"); @@ -566,7 +815,61 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { let blob_id = md.blob_oid; let mut raw = repo.find_object(blob_id)?.try_into_blob()?; - let blob = Blob::new(BlobId::from(&blob_id), std::mem::take(&mut raw.data)); + let data = std::mem::take(&mut raw.data); + + // Try archive extraction if any first-seen path looks like + // a known archive format. We don't need to keep the raw + // archive bytes around — its compressed contents won't + // produce useful matches anyway. + if extract_archives { + let archive_path: Option = md + .first_seen + .iter() + .map(|e| String::from_utf8_lossy(&e.path).to_string()) + .find(|p| is_compressed_file(Path::new(p))); + + if let Some(archive_path) = archive_path { + match try_extract_git_blob_archive(&archive_path, &data) { + Ok(Some(entries)) => { + let mut out = Vec::with_capacity(entries.len()); + for (entry_logical, entry_bytes) in entries { + let entry_suffix = + archive_entry_suffix(&entry_logical, &archive_path); + let origin = + OriginSet::try_from_iter(md.first_seen.iter().map(|e| { + let repo_relative_path = + String::from_utf8_lossy(&e.path).to_string(); + let per_appearance_logical = entry_suffix + .map(|suffix| { + format!("{repo_relative_path}{suffix}") + }) + .unwrap_or_else(|| entry_logical.clone()); + Origin::from_git_repo_with_first_commit( + Arc::clone(&repo_path), + Arc::clone(&e.commit_metadata), + per_appearance_logical, + ) + })) + .unwrap_or_else( + || Origin::from_git_repo(Arc::clone(&repo_path)).into(), + ); + out.push((origin, Blob::from_bytes(entry_bytes))); + } + return Ok(out); + } + Ok(None) => { /* not an archive we can crack — fall through */ } + Err(e) => { + debug!( + "Failed to extract git archive blob {} ({}): {e:#}", + blob_id, archive_path + ); + // fall through and scan raw bytes + } + } + } + } + + let blob = Blob::new(BlobId::from(&blob_id), data); let origin = OriginSet::try_from_iter(md.first_seen.iter().map(|e| { Origin::from_git_repo_with_first_commit( @@ -577,14 +880,32 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { })) .unwrap_or_else(|| Origin::from_git_repo(Arc::clone(&repo_path)).into()); - Ok((origin, blob)) + Ok(vec![(origin, blob)]) } }; - let timeout_filter = |res: &Result<(OriginSet, Blob)>| -> bool { + // After flat-mapping, errors and successes both flow as + // `Result<(OriginSet, Blob<'a>)>`. Filter out the silenced timeout + // marker before handing items to the scan consumer. + let timeout_filter = |res: &Result<(OriginSet, Blob<'a>)>| -> bool { !matches!(res, Err(e) if e.to_string() == "__timeout_silenced__") }; + // Convert `Result>` into a sequential iterator of `Result`, + // suitable for rayon's `flat_map_iter`. A failed load yields a single + // `Err`; a successful load fans out into one item per extracted blob. + // A closure is used (rather than a free function) so the produced + // `Blob<'static>` items can coerce into the iterator's + // `Blob<'a>` Item type — Blob is covariant in its lifetime, but a + // free fn would lose that link. + let fan_out = |res: Result)>>| + -> Box)>> + Send + 'a> { + match res { + Ok(v) => Box::new(v.into_iter().map(Ok)), + Err(e) => Box::new(std::iter::once(Err(e))), + } + }; + match self.inner.blobs { GitBlobSource::Precomputed(blobs) => { let rs = Arc::clone(&repo_sync); @@ -592,6 +913,7 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { .into_par_iter() .with_min_len(1024) .map_init(move || rs.to_thread_local(), load_blob) + .flat_map_iter(fan_out) .filter(timeout_filter) .drive_unindexed(consumer) } @@ -640,6 +962,7 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { .into_iter() .par_bridge() .map_init(move || rs.to_thread_local(), load_blob) + .flat_map_iter(fan_out) .filter(timeout_filter) .drive_unindexed(consumer) } @@ -829,12 +1152,14 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) { t_start.elapsed().as_secs_f64() ); - // Convert to a blob iterator, then patch the deadline + // Convert to a blob iterator, then patch deadline + extraction. + let extract_archives = cfg.extract_archives; repo_result .into_blob_iter() // Option .map(|iter| { iter.map(|mut gri| { gri.deadline = Instant::now() + timeout; + gri.extract_archives = extract_archives; FoundInputIter::GitRepo(gri) }) }) @@ -1165,12 +1490,12 @@ fn reference_candidates(reference: &str) -> Vec { #[cfg(test)] mod tests { - use std::fs; use std::path::Path; + use std::{fs, io::Write}; use super::{ FileResult, GitBlobSource, GitDiffConfig, ParallelBlobIterator, enumerate_git_diff_repo, - reference_candidates, + reference_candidates, try_extract_git_blob_archive, }; use anyhow::Result; use bstr::ByteSlice; @@ -1179,6 +1504,7 @@ mod tests { use rayon::iter::ParallelIterator; use rusqlite::Connection; use tempfile::tempdir; + use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions}; #[test] fn reference_candidates_for_plain_branch() { @@ -1278,6 +1604,40 @@ mod tests { Ok(()) } + #[test] + fn archive_entry_suffix_preserves_entry_component() { + assert_eq!( + super::archive_entry_suffix("dir/archive.zip!nested/secret.txt", "dir/archive.zip"), + Some("!nested/secret.txt") + ); + assert_eq!( + super::archive_entry_suffix("archive.zip!nested/secret.txt", "other/archive.zip"), + Some("!nested/secret.txt") + ); + } + + #[test] + fn git_blob_archive_extraction_preserves_repo_relative_paths() -> Result<()> { + let mut cursor = std::io::Cursor::new(Vec::new()); + { + let mut zip = ZipWriter::new(&mut cursor); + let options = SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .unix_permissions(0o644); + zip.start_file("nested/secret.txt", options)?; + zip.write_all(b"token=not-a-real-secret")?; + zip.finish()?; + } + + let entries = try_extract_git_blob_archive("dir/payload.zip", &cursor.into_inner())? + .expect("zip blob should extract"); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].0, "dir/payload.zip!nested/secret.txt"); + assert_eq!(entries[0].1, b"token=not-a-real-secret"); + Ok(()) + } + fn collect_file_bytes(file: FileResult) -> Result)>> { let iter = file.into_blob_iter()?.expect("file result should yield a blob"); iter.collect::>() diff --git a/src/scanner/util.rs b/src/scanner/util.rs index ac33c9c..f36bb41 100644 --- a/src/scanner/util.rs +++ b/src/scanner/util.rs @@ -10,7 +10,9 @@ pub fn is_compressed_file(path: &Path) -> bool { }; // Check for compound extensions first if filename.ends_with(".tar.gz") + || filename.ends_with(".tar.gzip") || filename.ends_with(".tar.bz2") + || filename.ends_with(".tar.bzip2") || filename.ends_with(".tar.xz") { return true; @@ -19,8 +21,10 @@ pub fn is_compressed_file(path: &Path) -> bool { if let Some(ext) = path.extension().and_then(|s| s.to_str()) { let ext_lower = ext.to_lowercase(); ext_lower == "gz" + || ext_lower == "gzip" || ext_lower == "tgz" || ext_lower == "bz2" + || ext_lower == "bzip2" || ext_lower == "xz" || ext_lower == "tar" || ext_lower == "zlib" @@ -63,3 +67,22 @@ pub fn is_sqlite_file(path: &Path) -> bool { pub fn has_sqlite_magic(data: &[u8]) -> bool { data.len() >= SQLITE_MAGIC.len() && data[..SQLITE_MAGIC.len()] == *SQLITE_MAGIC } + +#[cfg(test)] +mod tests { + use std::path::Path; + + use super::is_compressed_file; + + #[test] + fn recognizes_tar_wrapped_long_compression_extensions() { + assert!(is_compressed_file(Path::new("archive.tar.gzip"))); + assert!(is_compressed_file(Path::new("archive.tar.bzip2"))); + } + + #[test] + fn recognizes_long_single_compression_extensions() { + assert!(is_compressed_file(Path::new("payload.gzip"))); + assert!(is_compressed_file(Path::new("payload.bzip2"))); + } +} diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 1695ae4..fbdb7d2 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -725,7 +725,7 @@ pub async fn run_secret_validation( let mut by_key: FxHashMap> = FxHashMap::default(); for om in owned { - by_key.entry(build_cache_key(&om, &dep_vars)).or_default().push(om); + by_key.entry(build_cache_key(&om)).or_default().push(om); } let reps: Vec<_> = by_key.into_iter().map(|(_k, mut v)| (v.remove(0), v)).collect(); @@ -859,17 +859,7 @@ async fn validate_single( validation_retries: u32, max_body_len: usize, ) { - // Build key - let dep_vars_str = dep_vars - .get(om.rule.id()) - .map(|hm| { - let mut sorted: Vec<_> = hm.iter().collect(); - sorted.sort_by(|(k, _), (k2, _)| k.cmp(k2)); - sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::>().join("|") - }) - .unwrap_or_default(); - let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); - let cache_key = format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str); + let cache_key = build_cache_key(om); // Check cache first if let Some(cached) = cache.get(&cache_key) { om.validation_success = cached.is_valid; @@ -966,24 +956,29 @@ fn is_counted_validation_status(status: StatusCode) -> bool { !matches!(status, StatusCode::CONTINUE | StatusCode::PRECONDITION_REQUIRED) } -// Helper to compute the cache key for an OwnedBlobMatch -fn build_cache_key( - om: &OwnedBlobMatch, - dep_vars: &FxHashMap>, -) -> String { - // Build key - let dep_vars_str = dep_vars - .get(om.rule.id()) - .map(|hm| { - let mut sorted: Vec<_> = hm.iter().collect(); - sorted.sort_by(|(k, _), (k2, _)| k.cmp(k2)); - sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::>().join("|") - }) - .unwrap_or_default(); - // For demonstration, we’ll do a simplistic approach - // You can adapt from your existing logic +// Helper to compute the cache key for an OwnedBlobMatch. +fn build_cache_key(om: &OwnedBlobMatch) -> String { let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); - format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str) + + let has_context_dependency = om + .rule + .syntax() + .depends_on_rule + .iter() + .flatten() + .any(|dep| !dep.variable.eq_ignore_ascii_case("TOKEN")); + if has_context_dependency { + return format!( + "{}|{}|{}|{}|{}", + om.rule.name(), + capture0, + om.blob_id, + om.matching_input_offset_span.start, + om.matching_input_offset_span.end + ); + } + + format!("{}|{}", om.rule.name(), capture0) } fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapCollector>) { @@ -1006,8 +1001,9 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); - let mut akid = utils::find_closest_variable(&captures, &secret, "TOKEN", "AKID") - .unwrap_or_default(); + let mut akid = + utils::find_closest_variable(&captures, secret.as_str(), "TOKEN", "AKID") + .unwrap_or_default(); if akid.is_empty() { akid = extract_akid_from_body(&om.validation_response_body).unwrap_or_default(); @@ -1031,7 +1027,7 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); let storage_account = - utils::find_closest_variable(&captures, &storage_key, "TOKEN", "AZURENAME") + utils::find_closest_variable(&captures, storage_key.as_str(), "TOKEN", "AZURENAME") .unwrap_or_default(); let mut storage_account = storage_account; @@ -1086,9 +1082,13 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .find(|(name, ..)| name == "TOKEN") .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); - let mut organization = - utils::find_closest_variable(&captures, &token, "TOKEN", "AZURE_DEVOPS_ORG") - .unwrap_or_default(); + let mut organization = utils::find_closest_variable( + &captures, + token.as_str(), + "TOKEN", + "AZURE_DEVOPS_ORG", + ) + .unwrap_or_default(); if organization.is_empty() { organization = extract_azure_devops_org_from_body(&om.validation_response_body) .unwrap_or_default(); @@ -1105,7 +1105,7 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); let access_key = - utils::find_closest_variable(&captures, &secret_key, "TOKEN", "AKID") + utils::find_closest_variable(&captures, secret_key.as_str(), "TOKEN", "AKID") .or_else(|| om.dependent_captures.get("AKID").cloned()) .unwrap_or_default(); @@ -1119,14 +1119,22 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .find(|(name, ..)| name == "TOKEN") .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); - let access_key = - utils::find_closest_variable(&captures, &secret_key, "TOKEN", "STS_AKID") - .or_else(|| om.dependent_captures.get("STS_AKID").cloned()) - .unwrap_or_default(); - let session_token = - utils::find_closest_variable(&captures, &secret_key, "TOKEN", "SECURITY_TOKEN") - .or_else(|| om.dependent_captures.get("SECURITY_TOKEN").cloned()) - .unwrap_or_default(); + let access_key = utils::find_closest_variable( + &captures, + secret_key.as_str(), + "TOKEN", + "STS_AKID", + ) + .or_else(|| om.dependent_captures.get("STS_AKID").cloned()) + .unwrap_or_default(); + let session_token = utils::find_closest_variable( + &captures, + secret_key.as_str(), + "TOKEN", + "SECURITY_TOKEN", + ) + .or_else(|| om.dependent_captures.get("SECURITY_TOKEN").cloned()) + .unwrap_or_default(); if !access_key.is_empty() && !secret_key.is_empty() && !session_token.is_empty() { collector.record_alibaba( diff --git a/src/validation.rs b/src/validation.rs index be12ecd..d390f8a 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -15,7 +15,7 @@ use http::StatusCode; use liquid::Object; use liquid_core::{Value, ValueView}; use reqwest::{Client, Url, header, header::HeaderValue, multipart}; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use tokio::{sync::Notify, time}; use tracing::{debug, trace}; @@ -260,7 +260,9 @@ type Cache = kingfisher_scanner::validation::Cache; /// Returns an opaque 64-bit key for internal validation deduplication. /// /// This is an INTERNAL key used only for validation deduplication within a single scan. -/// It uses `captures.get(0)` to get the primary secret value. +/// It uses `captures.get(0)` to get the primary secret value. Rules with dependent +/// variables also include blob location because validation can depend on nearby context +/// such as an AWS access-key ID paired with a secret access key. /// /// **Important**: This is distinct from the EXTERNAL `finding_fingerprint` used for: /// - Baseline comparisons across scans @@ -279,6 +281,13 @@ fn validation_dedup_key(m: &OwnedBlobMatch) -> u64 { if let Some(val) = capture_value { val.hash(&mut hasher); } + + if !m.rule.syntax().depends_on_rule.is_empty() { + m.blob_id.hash(&mut hasher); + m.matching_input_offset_span.start.hash(&mut hasher); + m.matching_input_offset_span.end.hash(&mut hasher); + } + let key = hasher.finish(); trace!( @@ -693,7 +702,7 @@ async fn timed_validate_single_match<'a>( validate_jwt_rule(m, &captured_values, use_lax_tls, clients.allow_internal_ips).await; } Some(Validation::AWS) => { - validate_aws_rule(m, &captured_values, cache).await; + validate_aws_rule(m, &captured_values, dependent_variables, cache).await; } Some(Validation::GCP) => { validate_gcp_rule(m, &globals, cache).await; @@ -1186,7 +1195,7 @@ async fn validate_azure_storage( .map(|(_, v, ..)| v.clone()) .unwrap_or_default(); let storage_account = - utils::find_closest_variable(captured_values, &storage_key, "TOKEN", "AZURENAME") + utils::find_closest_variable(captured_values, storage_key.as_str(), "TOKEN", "AZURENAME") .unwrap_or_default(); if storage_account.is_empty() || storage_key.is_empty() { @@ -1391,6 +1400,7 @@ async fn validate_jwt_rule( async fn validate_aws_rule( m: &mut OwnedBlobMatch, captured_values: &[(String, String, usize, usize)], + dependent_variables: &FxHashMap>, cache: &Cache, ) { let secret = captured_values @@ -1398,10 +1408,8 @@ async fn validate_aws_rule( .find(|(n, ..)| n == "TOKEN") .map(|(_, v, ..)| v.clone()) .unwrap_or_default(); - let akid = - utils::find_closest_variable(captured_values, &secret, "TOKEN", "AKID").unwrap_or_default(); - if akid.is_empty() || secret.is_empty() { + if secret.is_empty() { m.validation_success = false; m.validation_response_body = validation_body::from_string("Missing AWS access-key ID or secret.".to_string()); @@ -1409,77 +1417,169 @@ async fn validate_aws_rule( return; } - let cache_key = aws::generate_aws_cache_key(&akid, &secret); - if let Some(cached) = cache.get(&cache_key) { - let c = cached.value(); - if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { - m.validation_success = c.is_valid; - m.validation_response_body = c.body.clone(); - m.validation_response_status = c.status; - return; - } - } + let akid_candidates = aws_akid_candidates( + captured_values, + dependent_variables.get("AKID"), + m.matching_input_offset_span, + &secret, + ); - if let Some(account_id) = aws::should_skip_aws_validation(&akid) { - m.validation_success = false; - m.validation_response_body = validation_body::from_string(format!( - "(skip list entry) AWS validation not attempted for account {}.", - account_id - )); - m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; - cache.insert( - cache_key, - CachedResponse { - body: m.validation_response_body.clone(), - status: m.validation_response_status, - is_valid: m.validation_success, - timestamp: Instant::now(), - }, - ); - return; - } - - if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) { + if akid_candidates.is_empty() { m.validation_success = false; m.validation_response_body = - validation_body::from_string(format!("Invalid AWS credentials ({}): {}", akid, e)); + validation_body::from_string("Missing AWS access-key ID or secret.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; return; } - match aws::validate_aws_credentials(&akid, &secret).await { - Ok((ok, msg)) => { - m.validation_success = ok; - if ok { - let mut body = format!("{} --- ARN: {}", akid, msg); - if let Ok(acct) = aws::aws_key_to_account_number(&akid) { - body.push_str(&format!(" --- AWS Account Number: {:012}", acct)); + let mut last_body = None; + let mut last_status = StatusCode::UNAUTHORIZED; + + for akid in akid_candidates { + let cache_key = aws::generate_aws_cache_key(&akid, &secret); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + if c.is_valid { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + return; } - m.validation_response_body = validation_body::from_string(body); - m.validation_response_status = StatusCode::OK; - } else { - m.validation_response_body = validation_body::from_string(format!( - "AWS validation error ({}): {}", - akid, msg - )); - m.validation_response_status = StatusCode::UNAUTHORIZED; + last_body = Some(c.body.clone()); + last_status = c.status; + continue; } + } + + if let Some(account_id) = aws::should_skip_aws_validation(&akid) { + let body = validation_body::from_string(format!( + "(skip list entry) AWS validation not attempted for account {}.", + account_id + )); cache.insert( cache_key, CachedResponse { - body: m.validation_response_body.clone(), - status: m.validation_response_status, - is_valid: m.validation_success, + body: body.clone(), + status: StatusCode::PRECONDITION_REQUIRED, + is_valid: false, timestamp: Instant::now(), }, ); + last_body = Some(body); + last_status = StatusCode::PRECONDITION_REQUIRED; + continue; } - Err(e) => { - m.validation_success = false; - m.validation_response_body = - validation_body::from_string(format!("AWS validation error ({}): {}", akid, e)); - m.validation_response_status = StatusCode::BAD_GATEWAY; + + if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) { + let body = + validation_body::from_string(format!("Invalid AWS credentials ({}): {}", akid, e)); + cache.insert( + cache_key, + CachedResponse { + body: body.clone(), + status: StatusCode::BAD_REQUEST, + is_valid: false, + timestamp: Instant::now(), + }, + ); + last_body = Some(body); + last_status = StatusCode::BAD_REQUEST; + continue; } + + match aws::validate_aws_credentials(&akid, &secret).await { + Ok((ok, msg)) => { + if ok { + m.validation_success = true; + let mut body = format!("{} --- ARN: {}", akid, msg); + if let Ok(acct) = aws::aws_key_to_account_number(&akid) { + body.push_str(&format!(" --- AWS Account Number: {:012}", acct)); + } + m.validation_response_body = validation_body::from_string(body); + m.validation_response_status = StatusCode::OK; + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: true, + timestamp: Instant::now(), + }, + ); + return; + } + + let body = validation_body::from_string(format!( + "AWS validation error ({}): {}", + akid, msg + )); + cache.insert( + cache_key, + CachedResponse { + body: body.clone(), + status: StatusCode::UNAUTHORIZED, + is_valid: false, + timestamp: Instant::now(), + }, + ); + last_body = Some(body); + last_status = StatusCode::UNAUTHORIZED; + } + Err(e) => { + last_body = Some(validation_body::from_string(format!( + "AWS validation error ({}): {}", + akid, e + ))); + last_status = StatusCode::BAD_GATEWAY; + } + } + } + + m.validation_success = false; + m.validation_response_body = last_body.unwrap_or_else(|| { + validation_body::from_string("AWS validation failed for all nearby access-key IDs.") + }); + m.validation_response_status = last_status; +} + +fn aws_akid_candidates( + captured_values: &[(String, String, usize, usize)], + dependent_akids: Option<&Vec<(String, OffsetSpan)>>, + target_span: OffsetSpan, + secret: &str, +) -> Vec { + let mut candidates = Vec::new(); + + if let Some(closest) = utils::find_closest_variable(captured_values, secret, "TOKEN", "AKID") { + candidates.push((0usize, closest)); + } + + if let Some(values) = dependent_akids { + candidates.extend( + values + .iter() + .map(|(value, span)| (dependency_distance(*span, target_span), value.clone())), + ); + } + + candidates.sort_by_key(|(distance, _)| *distance); + + let mut seen = FxHashSet::default(); + candidates + .into_iter() + .filter_map(|(_, value)| if seen.insert(value.clone()) { Some(value) } else { None }) + .take(64) + .collect() +} + +fn dependency_distance(span: OffsetSpan, target_span: OffsetSpan) -> usize { + if span.end <= target_span.start { + target_span.start - span.end + } else if span.start >= target_span.end { + span.start - target_span.end + } else { + 0 } } @@ -1746,6 +1846,47 @@ mod tests { assert_eq!(selected.1, OffsetSpan::from_range(70..80)); } + #[test] + fn aws_akid_candidates_orders_by_proximity_and_deduplicates() { + let captured_values = vec![ + ("TOKEN".to_string(), "secret".to_string(), 100usize, 140usize), + ("AKID".to_string(), "closest_capture".to_string(), 80usize, 90usize), + ]; + let dependent_akids = vec![ + ("far_before".to_string(), OffsetSpan::from_range(10..20)), + ("near_after".to_string(), OffsetSpan::from_range(150..160)), + ("overlap".to_string(), OffsetSpan::from_range(110..120)), + ("closest_capture".to_string(), OffsetSpan::from_range(80..90)), + ]; + + let candidates = aws_akid_candidates( + &captured_values, + Some(&dependent_akids), + OffsetSpan::from_range(100..140), + "secret", + ); + + assert_eq!(candidates, vec!["closest_capture", "overlap", "near_after", "far_before"]); + } + + #[test] + fn aws_akid_candidates_caps_unique_candidates() { + let dependent_akids = (0..70) + .map(|i| (format!("akid{i}"), OffsetSpan::from_range((i * 2)..(i * 2 + 1)))) + .collect::>(); + + let candidates = aws_akid_candidates( + &[], + Some(&dependent_akids), + OffsetSpan::from_range(1_000..1_010), + "secret", + ); + + assert_eq!(candidates.len(), 64); + assert_eq!(candidates.first().map(String::as_str), Some("akid69")); + assert_eq!(candidates.last().map(String::as_str), Some("akid6")); + } + #[test] fn truncate_to_char_boundary_handles_multibyte_characters() { let max_len = 2048; diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 10f6871..cb03d5a 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -153,7 +153,7 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, pub fn find_closest_variable( captures: &[(String, String, usize, usize)], - target_value: &String, + target_value: &str, target_variable_name: &str, search_variable_name: &str, ) -> Option { @@ -161,7 +161,7 @@ pub fn find_closest_variable( // compare relative offsets with candidate variables. let mut target_positions = Vec::new(); for (name, value, start, end) in captures { - if name == target_variable_name && value == target_value { + if name == target_variable_name && value.as_str() == target_value { target_positions.push((*start, *end)); } } @@ -346,8 +346,7 @@ mod tests { ("AKID".to_string(), "following".to_string(), 180usize, 200usize), ]; - let result = - find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap(); + let result = find_closest_variable(&captures, "secret", "TOKEN", "AKID").unwrap(); assert_eq!(result, "preceding".to_string()); } @@ -359,8 +358,7 @@ mod tests { ("AKID".to_string(), "after".to_string(), 60usize, 80usize), ]; - let result = - find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap(); + let result = find_closest_variable(&captures, "secret", "TOKEN", "AKID").unwrap(); assert_eq!(result, "after".to_string()); } diff --git a/tests/smoke_archive.rs b/tests/smoke_archive.rs index 4d4f8c6..bb10e01 100644 --- a/tests/smoke_archive.rs +++ b/tests/smoke_archive.rs @@ -21,7 +21,11 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> { let mut t = Builder::new(gz); let data = format!("token={github_pat}\n"); - t.append_data(&mut tar::Header::new_gnu(), "secret.txt", data.as_bytes())?; + let mut header = tar::Header::new_gnu(); + header.set_size(data.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + t.append_data(&mut header, "secret.txt", data.as_bytes())?; t.into_inner()?.finish()?; }