From 31663b03b53895e70394275a945697b048c8bb73 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 7 May 2026 13:46:17 -0700 Subject: [PATCH] =?UTF-8?q?Release=20binary=20trimmed=20from=2034=20MB=20t?= =?UTF-8?q?o=2026=20MB=20(~24%=20smaller).=20Switched=20jsonwebtoken=20to?= =?UTF-8?q?=20its=20rust=5Fcrypto=20backend=20(eliminates=20our=20scanner'?= =?UTF-8?q?s=20pull=20on=20aws-lc-rs),=20bumped=20workspace=20hmac=200.12?= =?UTF-8?q?=E2=86=920.13,=20sha1=200.10=E2=86=920.11,=20sha2=200.10?= =?UTF-8?q?=E2=86=920.11=20to=20deduplicate=20our=20internal=20crypto=20co?= =?UTF-8?q?de=20with=20the=20AWS=20sigv4=20side,=20and=20migrated=20affect?= =?UTF-8?q?ed=20call=20sites=20in=20kingfisher-core,=20kingfisher-rules,?= =?UTF-8?q?=20and=20kingfisher-scanner=20to=20the=20digest-0.11=20API=20(h?= =?UTF-8?q?ex::encode=20for=20hex=20digests,=20explicit=20KeyInit=20import?= =?UTF-8?q?=20for=20HMAC).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + Cargo.lock | 106 ++++++++++++++++-- Cargo.toml | 12 +- crates/kingfisher-core/src/blob.rs | 6 +- crates/kingfisher-rules/Cargo.toml | 1 + crates/kingfisher-rules/src/liquid_filters.rs | 8 +- crates/kingfisher-scanner/Cargo.toml | 2 +- .../kingfisher-scanner/src/validation/aws.rs | 2 +- .../src/validation/azure.rs | 4 +- .../src/validation/coinbase.rs | 2 +- .../kingfisher-scanner/src/validation/gcp.rs | 2 +- .../src/validation/http_validation.rs | 2 +- .../src/validation/mongodb.rs | 2 +- .../src/validation/mysql.rs | 2 +- .../src/validation/postgres.rs | 2 +- docs-site/docs/changelog.md | 7 ++ src/decompress.rs | 6 +- src/main.rs | 11 +- src/scanner/enumerate.rs | 28 ++--- 19 files changed, 143 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a0708f..bb649ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file. - Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. - Performance: ZIP-based git blobs ≤ 64 MB extract entirely in memory (no temp-file round trip), beating the v1.99.0 baseline by ~15% on a 80 GiB monorepo despite scanning ~300K additional archive-content blobs. Larger archives auto-fall-back to a disk-streaming extractor. - Memory safety: hard caps on archive extraction — 64 MB compressed pre-flight, 256 MB aggregate decompressed per archive (in-memory and disk paths), 512 MB per entry, plus a `PK\x03\x04` magic-byte gate. Worst-case footprint is bounded at ~`num_jobs * 320 MB`. +- Release binary trimmed from 34 MB to 26 MB (~24% smaller). Switched `jsonwebtoken` to its `rust_crypto` backend (eliminates our scanner's pull on `aws-lc-rs`), bumped workspace `hmac` 0.12→0.13, `sha1` 0.10→0.11, `sha2` 0.10→0.11 to deduplicate our internal crypto code with the AWS sigv4 side, and migrated affected call sites in `kingfisher-core`, `kingfisher-rules`, and `kingfisher-scanner` to the digest-0.11 API (`hex::encode` for hex digests, explicit `KeyInit` import for HMAC). ## [v1.99.0] - Fixed [#371](https://github.com/mongodb/kingfisher/issues/371): `pip install kingfisher-bin` on glibc Linux distros (Ubuntu, Debian, RHEL, Fedora, …) installed a macOS Mach-O binary and failed with `OSError: [Errno 8] Exec format error`. Linux wheels are now tagged `manylinux_2_17_.musllinux_1_2_` (instead of `musllinux_1_2_` only), so pip accepts them on both glibc-2.17+ and musl distros. The `pypi/hatch_build.py` hook now hard-fails when `KINGFISHER_PYPI_WHEEL_TAG` is unset, and the publish workflow refuses to upload any `py3-none-any.whl`, so the v1.92.0-era pure-Python wheel cannot recur. diff --git a/Cargo.lock b/Cargo.lock index 0848ed8..a6cef77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1883,7 +1883,7 @@ dependencies = [ "crc", "digest 0.10.7", "rustversion", - "spin", + "spin 0.10.0", ] [[package]] @@ -2510,6 +2510,7 @@ dependencies = [ "ff", "generic-array", "group", + "hkdf", "pem-rfc7468", "pkcs8", "rand_core 0.6.4", @@ -4251,6 +4252,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hkdf" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5f8eb2ad728638ea2c7d47a21db23b7b58a72ed6a38256b8a1849f15fbbdf7" +dependencies = [ + "hmac 0.12.1", +] + [[package]] name = "hmac" version = "0.12.1" @@ -4953,11 +4963,18 @@ checksum = "0529410abe238729a60b108898784df8984c87f6054c9c4fcacc47e4803c1ce1" dependencies = [ "aws-lc-rs", "base64 0.22.1", + "ed25519-dalek", "getrandom 0.2.17", + "hmac 0.12.1", "js-sys", + "p256", + "p384", "pem", + "rand 0.8.6", + "rsa", "serde", "serde_json", + "sha2 0.10.9", "signature", "simple_asn1", ] @@ -5072,7 +5089,7 @@ dependencies = [ "serde-sarif", "serde_json", "serde_yaml", - "sha1 0.10.6", + "sha1 0.11.0", "sha2 0.11.0", "smallvec", "strum 0.28.0", @@ -5124,7 +5141,7 @@ dependencies = [ "schemars 0.8.22", "serde", "serde_json", - "sha1 0.10.6", + "sha1 0.11.0", "smallvec", "thiserror 2.0.18", "tokei", @@ -5138,7 +5155,8 @@ dependencies = [ "base32", "base64 0.22.1", "crc32fast", - "hmac 0.12.1", + "hex", + "hmac 0.13.0", "ignore", "include_dir", "kingfisher-core", @@ -5153,8 +5171,8 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "sha1 0.10.6", - "sha2 0.10.9", + "sha1 0.11.0", + "sha2 0.11.0", "thiserror 2.0.18", "time", "tracing", @@ -5185,7 +5203,7 @@ dependencies = [ "crossbeam-skiplist", "ed25519-dalek", "hex", - "hmac 0.12.1", + "hmac 0.13.0", "http 1.4.0", "jsonwebtoken 10.3.0", "kingfisher-core", @@ -5211,8 +5229,8 @@ dependencies = [ "schemars 0.8.22", "serde", "serde_json", - "sha1 0.10.6", - "sha2 0.10.9", + "sha1 0.11.0", + "sha2 0.11.0", "smallvec", "tempfile", "thiserror 2.0.18", @@ -5258,6 +5276,9 @@ name = "lazy_static" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +dependencies = [ + "spin 0.9.8", +] [[package]] name = "lber" @@ -5920,6 +5941,23 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-bigint-dig" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc84195820f291c7697304f3cbdadd1cb7199c0efc917ff5eafd71225c136151" +dependencies = [ + "byteorder", + "lazy_static", + "libm", + "num-integer", + "num-iter", + "num-traits", + "rand 0.8.6", + "smallvec", + "zeroize", +] + [[package]] name = "num-complex" version = "0.4.6" @@ -5983,6 +6021,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -6237,6 +6276,18 @@ dependencies = [ "sha2 0.10.9", ] +[[package]] +name = "p384" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe42f1670a52a47d448f14b6a5c61dd78fce51856e68edaa38f7ae3a46b8d6b6" +dependencies = [ + "ecdsa", + "elliptic-curve", + "primeorder", + "sha2 0.10.9", +] + [[package]] name = "parking_lot" version = "0.11.2" @@ -6517,6 +6568,17 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs1" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8ffb9f10fa047879315e6625af03c164b16962a5368d724ed16323b68ace47f" +dependencies = [ + "der", + "pkcs8", + "spki", +] + [[package]] name = "pkcs8" version = "0.10.2" @@ -7312,6 +7374,26 @@ dependencies = [ "byteorder", ] +[[package]] +name = "rsa" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78928ac1ed176a5ca1d17e578a1825f3d81ca54cf41053a592584b020cfd691b" +dependencies = [ + "const-oid 0.9.6", + "digest 0.10.7", + "num-bigint-dig", + "num-integer", + "num-traits", + "pkcs1", + "pkcs8", + "rand_core 0.6.4", + "signature", + "spki", + "subtle", + "zeroize", +] + [[package]] name = "rsqlite-vfs" version = "0.1.0" @@ -8070,6 +8152,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" + [[package]] name = "spin" version = "0.10.0" diff --git a/Cargo.toml b/Cargo.toml index ff48213..c783e6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,9 +32,9 @@ xxhash-rust = { version = "0.8", features = ["xxh3"] } ignore = "0.4" walkdir = "2.5" include_dir = "0.7" -sha1 = "0.10" -sha2 = "0.10" -hmac = "0.12" +sha1 = "0.11" +sha2 = "0.11" +hmac = "0.13" base32 = "0.5.1" base64 = "0.22" percent-encoding = "2.3" @@ -181,9 +181,9 @@ futures = "0.3.31" dashmap = "6.1.0" xxhash-rust = { version = "0.8.15", features = ["xxh3", "const_xxh3"] } serde_yaml = "0.9.34" -hmac = "0.13.0" +hmac = { workspace = true } sha1 = { workspace = true } -sha2 = "0.11.0" +sha2 = { workspace = true } humantime = "2.3.0" path-dedot = "3.1.1" quick-xml = { version = "0.39.2", features = ["serde", "serialize"] } @@ -252,7 +252,7 @@ proptest = "1.9.0" [profile.release] debug = false strip = true -opt-level = 3 # Maximum optimization for performance +opt-level = "s" #3 # Maximum optimization for performance lto = true # Enable Link Time Optimization codegen-units = 1 # Optimize for size but slower compilation panic = "abort" # Remove unwind tables for panics diff --git a/crates/kingfisher-core/src/blob.rs b/crates/kingfisher-core/src/blob.rs index 66e462b..fbc458f 100644 --- a/crates/kingfisher-core/src/blob.rs +++ b/crates/kingfisher-core/src/blob.rs @@ -11,7 +11,7 @@ use std::{ convert::TryInto, fs::File, - io::{Read, Write}, + io::Read, path::Path, sync::{ Arc, OnceLock, @@ -235,7 +235,7 @@ impl BlobId { pub fn new(input: &[u8]) -> Self { const CHUNK: usize = 64 * 1024; // 64KB from start and end let mut hasher = Sha1::new(); - write!(&mut hasher, "blob {}\0", input.len()).unwrap(); + hasher.update(format!("blob {}\0", input.len()).as_bytes()); if input.len() <= CHUNK * 2 { hasher.update(input); } else { @@ -249,7 +249,7 @@ impl BlobId { /// Computes a `BlobId` from the complete bytes (no truncation). pub fn compute_from_bytes(bytes: &[u8]) -> Self { let mut hasher = Sha1::new(); - write!(&mut hasher, "blob {}\0", bytes.len()).unwrap(); + hasher.update(format!("blob {}\0", bytes.len()).as_bytes()); hasher.update(bytes); let digest: [u8; 20] = hasher.finalize().into(); BlobId(digest) diff --git a/crates/kingfisher-rules/Cargo.toml b/crates/kingfisher-rules/Cargo.toml index c0b26ce..ef50a03 100644 --- a/crates/kingfisher-rules/Cargo.toml +++ b/crates/kingfisher-rules/Cargo.toml @@ -41,6 +41,7 @@ crc32fast = "1.5" hmac.workspace = true sha1.workspace = true sha2.workspace = true +hex.workspace = true percent-encoding.workspace = true time.workspace = true uuid = { workspace = true, features = ["v4"] } diff --git a/crates/kingfisher-rules/src/liquid_filters.rs b/crates/kingfisher-rules/src/liquid_filters.rs index ec1becd..c042061 100644 --- a/crates/kingfisher-rules/src/liquid_filters.rs +++ b/crates/kingfisher-rules/src/liquid_filters.rs @@ -2,7 +2,7 @@ use base64::{Engine, engine::general_purpose}; use crc32fast::Hasher; -use hmac::{Hmac, Mac}; +use hmac::{Hmac, KeyInit, Mac}; use liquid_core::{ Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection, FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView, @@ -536,7 +536,7 @@ static_filter!( |input: &dyn ValueView| -> String { let mut h = Sha256::new(); h.update(input.to_kstr().as_bytes()); - format!("{:x}", h.finalize()) + hex::encode(h.finalize()) } ); @@ -1128,7 +1128,7 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { #[cfg(test)] mod tests { use base64::{Engine as _, engine::general_purpose}; - use hmac::{Hmac, Mac}; + use hmac::{Hmac, KeyInit, Mac}; use liquid::{ParserBuilder, object}; use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; use regex::Regex; @@ -1162,7 +1162,7 @@ mod tests { #[test] fn sha256_filter() { - let expect = format!("{:x}", Sha256::digest(b"hello")); + let expect = hex::encode(Sha256::digest(b"hello")); assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect); } diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml index b9133c6..12b6bc0 100644 --- a/crates/kingfisher-scanner/Cargo.toml +++ b/crates/kingfisher-scanner/Cargo.toml @@ -182,7 +182,7 @@ pem = { version = "3.0.6", optional = true } percent-encoding = { workspace = true, optional = true } ring = { version = "0.17", optional = true } -jsonwebtoken = { version = "10.3.0", features = ["aws-lc-rs"], optional = true } +jsonwebtoken = { version = "10.3.0", default-features = false, features = ["rust_crypto"], optional = true } p256 = { version = "0.13.2", optional = true } ed25519-dalek = { version = "2.2", features = ["pkcs8"], optional = true } hex = { workspace = true, optional = true } diff --git a/crates/kingfisher-scanner/src/validation/aws.rs b/crates/kingfisher-scanner/src/validation/aws.rs index f313a92..264ca32 100644 --- a/crates/kingfisher-scanner/src/validation/aws.rs +++ b/crates/kingfisher-scanner/src/validation/aws.rs @@ -187,7 +187,7 @@ pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &s hasher.update(aws_access_key_id.as_bytes()); hasher.update(b"\0"); hasher.update(aws_secret_access_key.as_bytes()); - format!("AWS:{:x}", hasher.finalize()) + format!("AWS:{}", hex::encode(hasher.finalize())) } /// Validate AWS credentials format before attempting validation. diff --git a/crates/kingfisher-scanner/src/validation/azure.rs b/crates/kingfisher-scanner/src/validation/azure.rs index 8b34270..1f0875f 100644 --- a/crates/kingfisher-scanner/src/validation/azure.rs +++ b/crates/kingfisher-scanner/src/validation/azure.rs @@ -3,7 +3,7 @@ use std::time::Duration; use anyhow::{Result, anyhow}; use base64::{Engine as _, engine::general_purpose::STANDARD as b64}; use chrono::Utc; -use hmac::{Hmac, Mac}; +use hmac::{Hmac, KeyInit, Mac}; use http::StatusCode; use quick_xml::{Reader, events::Event}; use reqwest::{Client, header::HeaderValue}; @@ -18,7 +18,7 @@ pub fn generate_azure_cache_key(azure_json: &str) -> String { use sha1::{Digest, Sha1}; let mut h = Sha1::new(); h.update(azure_json.as_bytes()); - format!("AZURE:{:x}", h.finalize()) + format!("AZURE:{}", hex::encode(h.finalize())) } /// Validate Azure Storage credentials without Azure SDK crates. diff --git a/crates/kingfisher-scanner/src/validation/coinbase.rs b/crates/kingfisher-scanner/src/validation/coinbase.rs index 6b6a945..3d6b7b1 100644 --- a/crates/kingfisher-scanner/src/validation/coinbase.rs +++ b/crates/kingfisher-scanner/src/validation/coinbase.rs @@ -23,7 +23,7 @@ pub fn generate_coinbase_cache_key(cred_name: &str, private_key: &str) -> String h.update(cred_name.as_bytes()); h.update(b"\0"); h.update(private_key.as_bytes()); - format!("COINBASE:{:x}", h.finalize()) + format!("COINBASE:{}", hex::encode(h.finalize())) } pub async fn validate_cdp_api_key( diff --git a/crates/kingfisher-scanner/src/validation/gcp.rs b/crates/kingfisher-scanner/src/validation/gcp.rs index 079fa50..8b67b21 100644 --- a/crates/kingfisher-scanner/src/validation/gcp.rs +++ b/crates/kingfisher-scanner/src/validation/gcp.rs @@ -142,7 +142,7 @@ pub fn generate_gcp_cache_key(gcp_json: &str) -> String { use sha1::{Digest, Sha1}; let mut hasher = Sha1::new(); hasher.update(gcp_json.as_bytes()); - format!("GCP:{:x}", hasher.finalize()) + format!("GCP:{}", hex::encode(hasher.finalize())) } impl GcpValidator { diff --git a/crates/kingfisher-scanner/src/validation/http_validation.rs b/crates/kingfisher-scanner/src/validation/http_validation.rs index 35a863e..fbf31c8 100644 --- a/crates/kingfisher-scanner/src/validation/http_validation.rs +++ b/crates/kingfisher-scanner/src/validation/http_validation.rs @@ -60,7 +60,7 @@ pub fn generate_http_cache_key_parts( hasher.update(b"\0"); } - format!("HTTP:{:x}", hasher.finalize()) + format!("HTTP:{}", hex::encode(hasher.finalize())) } /// Parse an HTTP method from a string. diff --git a/crates/kingfisher-scanner/src/validation/mongodb.rs b/crates/kingfisher-scanner/src/validation/mongodb.rs index 63bfe0a..46ab0bf 100644 --- a/crates/kingfisher-scanner/src/validation/mongodb.rs +++ b/crates/kingfisher-scanner/src/validation/mongodb.rs @@ -147,5 +147,5 @@ pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String { use sha1::{Digest, Sha1}; let mut hasher = Sha1::new(); hasher.update(mongodb_uri.as_bytes()); - format!("MongoDB:{:x}", hasher.finalize()) + format!("MongoDB:{}", hex::encode(hasher.finalize())) } diff --git a/crates/kingfisher-scanner/src/validation/mysql.rs b/crates/kingfisher-scanner/src/validation/mysql.rs index 7be1e1c..23c799c 100644 --- a/crates/kingfisher-scanner/src/validation/mysql.rs +++ b/crates/kingfisher-scanner/src/validation/mysql.rs @@ -56,7 +56,7 @@ pub fn generate_mysql_cache_key(mysql_url: &str) -> String { let mut hasher = Sha1::new(); hasher.update(mysql_url.as_bytes()); - format!("MySQL:{:x}", hasher.finalize()) + format!("MySQL:{}", hex::encode(hasher.finalize())) } fn is_local_host(host: &str) -> bool { diff --git a/crates/kingfisher-scanner/src/validation/postgres.rs b/crates/kingfisher-scanner/src/validation/postgres.rs index 2d204e5..acceb3e 100644 --- a/crates/kingfisher-scanner/src/validation/postgres.rs +++ b/crates/kingfisher-scanner/src/validation/postgres.rs @@ -70,7 +70,7 @@ impl ServerCertVerifier for LaxCertVerifier { pub fn generate_postgres_cache_key(postgres_url: &str) -> String { let mut hasher = Sha1::new(); hasher.update(postgres_url.as_bytes()); - format!("Postgres:{:x}", hasher.finalize()) + format!("Postgres:{}", hex::encode(hasher.finalize())) } pub fn parse_postgres_url(postgres_url: &str) -> Result { diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 05ab499..85a8fa9 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -7,6 +7,13 @@ description: "Kingfisher release history: new features, rules, bug fixes, and im All notable changes to this project will be documented in this file. +## [v1.100.0] +- Archive scanning now reaches inside Android/iOS app packages: added `apk`, `aab`, and `ipa` to the recognized ZIP-based archive formats so secrets embedded in APK/AAB/IPA contents (e.g. `classes*.dex`, `res/values/strings.xml`) are extracted and matched. +- Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. +- Performance: ZIP-based git blobs ≤ 64 MB extract entirely in memory (no temp-file round trip), beating the v1.99.0 baseline by ~15% on a 80 GiB monorepo despite scanning ~300K additional archive-content blobs. Larger archives auto-fall-back to a disk-streaming extractor. +- Memory safety: hard caps on archive extraction — 64 MB compressed pre-flight, 256 MB aggregate decompressed per archive (in-memory and disk paths), 512 MB per entry, plus a `PK\x03\x04` magic-byte gate. Worst-case footprint is bounded at ~`num_jobs * 320 MB`. +- Release binary trimmed from 34 MB to 26 MB (~24% smaller). Switched `jsonwebtoken` to its `rust_crypto` backend (eliminates our scanner's pull on `aws-lc-rs`), bumped workspace `hmac` 0.12→0.13, `sha1` 0.10→0.11, `sha2` 0.10→0.11 to deduplicate our internal crypto code with the AWS sigv4 side, and migrated affected call sites in `kingfisher-core`, `kingfisher-rules`, and `kingfisher-scanner` to the digest-0.11 API (`hex::encode` for hex digests, explicit `KeyInit` import for HMAC). + ## [v1.99.0] - Fixed [#371](https://github.com/mongodb/kingfisher/issues/371): `pip install kingfisher-bin` on glibc Linux distros (Ubuntu, Debian, RHEL, Fedora, …) installed a macOS Mach-O binary and failed with `OSError: [Errno 8] Exec format error`. Linux wheels are now tagged `manylinux_2_17_.musllinux_1_2_` (instead of `musllinux_1_2_` only), so pip accepts them on both glibc-2.17+ and musl distros. The `pypi/hatch_build.py` hook now hard-fails when `KINGFISHER_PYPI_WHEEL_TAG` is unset, and the publish workflow refuses to upload any `py3-none-any.whl`, so the v1.92.0-era pure-Python wheel cannot recur. - `--self-update` (alias `--update`) on a scan or other command now **re-execs into the freshly installed binary** so the current invocation completes with the new code and the latest detection rules. Previously the on-disk binary was replaced but the running process kept using the old in-memory version, requiring a second invocation to pick up the changes. On Unix this is a true `exec()` (same PID); on Windows the new binary is spawned and the parent exits with its status code. The explicit `kingfisher self-update` subcommand still updates and exits without re-execing. Self-update now also covers Windows arm64 (the asset was already published; the runtime cfg map gained the missing arm). See `docs/ADVANCED.md` → *Update Checks*. diff --git a/src/decompress.rs b/src/decompress.rs index 72ddeca..c670c27 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -18,9 +18,9 @@ use zip::ZipArchive; /// Formats that are basically a ZIP container. pub const ZIP_BASED_FORMATS: &[&str] = &[ - "zip", "zipx", "jar", "war", "ear", "aar", "apk", "aab", "ipa", "jmod", "jhm", "jnlp", - "nupkg", "vsix", "xap", "docx", "xlsx", "pptx", "odt", "ods", "odp", "odg", "odf", "epub", - "gadget", "kmz", "widget", "xpi", "sketch", "pages", "key", "numbers", "hwpx", + "zip", "zipx", "jar", "war", "ear", "aar", "apk", "aab", "ipa", "jmod", "jhm", "jnlp", "nupkg", + "vsix", "xap", "docx", "xlsx", "pptx", "odt", "ods", "odp", "odg", "odf", "epub", "gadget", + "kmz", "widget", "xpi", "sketch", "pages", "key", "numbers", "hwpx", ]; /// Break `..` into `(Some(outer), Some(inner))`. diff --git a/src/main.rs b/src/main.rs index 7752885..d9b0767 100644 --- a/src/main.rs +++ b/src/main.rs @@ -979,11 +979,7 @@ fn build_config_yaml( // round-trip. Pull the raw CLI/env string from `ArgMatches` instead so // the emitted YAML matches what the user actually passed. fn raw_arg_string(matches: &clap::ArgMatches, id: &str) -> Option { - matches - .get_raw(id) - .and_then(|mut v| v.next()) - .and_then(|s| s.to_str()) - .map(str::to_owned) + matches.get_raw(id).and_then(|mut v| v.next()).and_then(|s| s.to_str()).map(str::to_owned) } if user_set(sub_matches, "github_api_url") { git.github_api_url = raw_arg_string(sub_matches, "github_api_url"); @@ -2323,10 +2319,7 @@ alerts: }; let yaml = super::build_config_yaml(&scan_args, &global_args, init_matches).unwrap(); let cfg = parse_str(&yaml).expect("emitted YAML must round-trip"); - assert_eq!( - cfg.git.github_api_url.as_deref(), - Some("https://ghe.corp.example.com/api/v3/"), - ); + assert_eq!(cfg.git.github_api_url.as_deref(), Some("https://ghe.corp.example.com/api/v3/"),); } #[test] diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 701694e..b90b3cd 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -546,11 +546,7 @@ fn try_extract_git_blob_archive( // correct format (zip-based, gz, tar, ...). decompress_file_to_temp // dispatches on extension, so the extension MUST match the actual // bytes — using the in-tree filename is the right move. - let basename = pb - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or("blob") - .to_string(); + let basename = pb.file_name().and_then(|s| s.to_str()).unwrap_or("blob").to_string(); // ── fast path: ZIP-based archives extract entirely in memory ── // @@ -668,10 +664,7 @@ fn try_extract_git_blob_archive( out.push((strip_logical_prefix(logical), bytes)); } Err(e) => { - debug!( - "Failed to read extracted entry {}: {e}", - disk_path.display() - ); + debug!("Failed to read extracted entry {}: {e}", disk_path.display()); } } } @@ -747,9 +740,7 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { let repo_path = Arc::clone(&repo_path); let flag = Arc::clone(&flag); - move |repo: &mut GixRepo, - md: GitBlobMetadata| - -> Result)>> { + move |repo: &mut GixRepo, md: GitBlobMetadata| -> Result)>> { if StdInstant::now() > deadline { if flag.swap(true, Ordering::Relaxed) { bail!("__timeout_silenced__"); @@ -777,18 +768,17 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { Ok(Some(entries)) => { let mut out = Vec::with_capacity(entries.len()); for (entry_logical, entry_bytes) in entries { - let origin = OriginSet::try_from_iter( - md.first_seen.iter().map(|e| { + let origin = + OriginSet::try_from_iter(md.first_seen.iter().map(|e| { Origin::from_git_repo_with_first_commit( Arc::clone(&repo_path), Arc::clone(&e.commit_metadata), entry_logical.clone(), ) - }), - ) - .unwrap_or_else(|| { - Origin::from_git_repo(Arc::clone(&repo_path)).into() - }); + })) + .unwrap_or_else( + || Origin::from_git_repo(Arc::clone(&repo_path)).into(), + ); out.push((origin, Blob::from_bytes(entry_bytes))); } return Ok(out);