From 1636b07810a2147fe8042a0851caa4f2e00a120d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 18 May 2026 09:42:04 -0700 Subject: [PATCH] preparing for v1.100.0 --- .gitignore | 1 + CHANGELOG.md | 1 + crates/kingfisher-rules/data/rules/aws.yml | 11 +- .../kingfisher-rules/data/rules/voyageai.yml | 56 ++++- .../kingfisher-scanner/src/validation/aws.rs | 4 +- docs-site/docs/changelog.md | 1 + src/decompress.rs | 68 ++++-- src/scanner/validation.rs | 12 + src/validation.rs | 222 +++++++++++++----- tests/smoke_archive.rs | 6 +- 10 files changed, 295 insertions(+), 87 deletions(-) diff --git a/.gitignore b/.gitignore index 420511a..1ccd1e3 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ logs/* !testdata/html_embedded_vulnerable.html !docs/viewer/index.html !docs-site/overrides/*.html +private-notes/ *.dot fuzz/* !fuzz/Cargo.toml diff --git a/CHANGELOG.md b/CHANGELOG.md index bb649ae..4d81983 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to this project will be documented in this file. ## [v1.100.0] - Archive scanning now reaches inside Android/iOS app packages: added `apk`, `aab`, and `ipa` to the recognized ZIP-based archive formats so secrets embedded in APK/AAB/IPA contents (e.g. `classes*.dex`, `res/values/strings.xml`) are extracted and matched. - Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. +- Fixed tar-wrapped archive extraction for `.tgz` and `.tar.*` files, and made dependent credential validation deduplication preserve per-occurrence context so repeated secrets validate with the correct nearby companion value. - Performance: ZIP-based git blobs ≤ 64 MB extract entirely in memory (no temp-file round trip), beating the v1.99.0 baseline by ~15% on a 80 GiB monorepo despite scanning ~300K additional archive-content blobs. Larger archives auto-fall-back to a disk-streaming extractor. - Memory safety: hard caps on archive extraction — 64 MB compressed pre-flight, 256 MB aggregate decompressed per archive (in-memory and disk paths), 512 MB per entry, plus a `PK\x03\x04` magic-byte gate. Worst-case footprint is bounded at ~`num_jobs * 320 MB`. - Release binary trimmed from 34 MB to 26 MB (~24% smaller). Switched `jsonwebtoken` to its `rust_crypto` backend (eliminates our scanner's pull on `aws-lc-rs`), bumped workspace `hmac` 0.12→0.13, `sha1` 0.10→0.11, `sha2` 0.10→0.11 to deduplicate our internal crypto code with the AWS sigv4 side, and migrated affected call sites in `kingfisher-core`, `kingfisher-rules`, and `kingfisher-scanner` to the digest-0.11 API (`hex::encode` for hex digests, explicit `KeyInit` import for HMAC). diff --git a/crates/kingfisher-rules/data/rules/aws.yml b/crates/kingfisher-rules/data/rules/aws.yml index 1bc7f95..b73620d 100644 --- a/crates/kingfisher-rules/data/rules/aws.yml +++ b/crates/kingfisher-rules/data/rules/aws.yml @@ -5,16 +5,15 @@ rules: (?x) \b ( - (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:A3T[A-Z0-9]|AKIA|ASIA) [A-Z0-9]{16} ) \b pattern_requirements: - min_digits: 1 ignore_if_contains: - "EXAMPLE" - "TEST" - min_entropy: 3.2 + min_entropy: 3.0 visible: false confidence: medium examples: @@ -25,14 +24,14 @@ rules: pattern: | (?xi) (?: - \b - (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:\b|_) + (?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|ASIA) (?:.|[\n\r]){0,64}? [^A-Za-z0-9_+!@\#$%^&*()\]./] ([A-Za-z0-9/+]{40}) [^A-Za-z0-9_+!@\#$%^&*()\]./] | - \b(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) + (?:\b|_)(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|ASIA) (?:.|[\n\r]){0,96}? (?:SECRET|PRIVATE|ACCESS) (?:.|[\n\r]){0,16}? diff --git a/crates/kingfisher-rules/data/rules/voyageai.yml b/crates/kingfisher-rules/data/rules/voyageai.yml index ce2a0a8..9f187c1 100644 --- a/crates/kingfisher-rules/data/rules/voyageai.yml +++ b/crates/kingfisher-rules/data/rules/voyageai.yml @@ -5,12 +5,13 @@ rules: # Matches keys starting with 'pa-' followed by 43 URL-safe base64 characters pattern: | (?x) + \b ( pa-[a-zA-Z0-9\-_]{43} ) \b min_entropy: 4.0 - confidence: high + confidence: medium examples: - pa-r4yuCYCuPhNO-10Lu9aO7dR4jxUWlLmlUjm_NOVVdSs validation: @@ -22,7 +23,56 @@ rules: headers: Authorization: "Bearer {{ TOKEN }}" response_matcher: + # 200 = key has /v1/files permission, 403 = valid key without that permission + # (e.g. an inference-only key). 401 with "Provided API key is invalid." is the + # only response Voyage AI returns for a bad key, so any non-401 status is live. - type: StatusMatch - status: [200] + status: [401] + negative: true + - type: WordMatch + words: + - "Provided API key is invalid" + negative: true references: - - https://docs.voyageai.com/reference \ No newline at end of file + - https://docs.voyageai.com/reference + - https://docs.voyageai.com/docs/api-key-and-installation + + - name: Voyage AI API Key + id: kingfisher.voyageai.api_key.2 + description: Detects Voyage AI API keys (al- prefix variant) used for embedding and retrieval models. + # Matches keys starting with 'al-' followed by 43 URL-safe base64 characters + pattern: | + (?x) + \b + ( + al-[a-zA-Z0-9\-_]{43} + ) + \b + min_entropy: 4.0 + confidence: medium + examples: + - al-Qf7M2bZ8xnLpvE4hRcDsJtAo1KyU93WgIBmXrNVoYTu + validation: + type: Http + content: + request: + method: GET + url: https://api.voyageai.com/v1/files + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + # 200 = key has /v1/files permission, 403 = valid key without that permission. + # 401 with "Provided API key is invalid." is the only invalid-key response. + - type: StatusMatch + status: [401] + negative: true + - type: WordMatch + words: + - "Provided API key is invalid" + negative: true + references: + - https://docs.voyageai.com/reference + - https://docs.voyageai.com/docs/api-key-and-installation +# NOTE: Revocation is not implemented because Voyage AI does not document a public REST +# endpoint for programmatic API key revocation. All probed admin/key-management paths +# under api.voyageai.com return 404. Keys must be revoked via the Voyage AI dashboard. diff --git a/crates/kingfisher-scanner/src/validation/aws.rs b/crates/kingfisher-scanner/src/validation/aws.rs index 264ca32..d0d3a5e 100644 --- a/crates/kingfisher-scanner/src/validation/aws.rs +++ b/crates/kingfisher-scanner/src/validation/aws.rs @@ -200,9 +200,7 @@ pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> return Err("AWS access key ID contains invalid characters".to_string()); } let prefix = &access_key_id[..4]; - let valid_prefix = - matches!(prefix, "AKIA" | "AGPA" | "AIDA" | "AROA" | "AIPA" | "ANPA" | "ANVA" | "ASIA") - || prefix.starts_with("A3T"); + let valid_prefix = matches!(prefix, "AKIA" | "ASIA") || prefix.starts_with("A3T"); if !valid_prefix { return Err("Invalid AWS access key ID format".to_string()); } diff --git a/docs-site/docs/changelog.md b/docs-site/docs/changelog.md index 85a8fa9..42d7283 100644 --- a/docs-site/docs/changelog.md +++ b/docs-site/docs/changelog.md @@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file. ## [v1.100.0] - Archive scanning now reaches inside Android/iOS app packages: added `apk`, `aab`, and `ipa` to the recognized ZIP-based archive formats so secrets embedded in APK/AAB/IPA contents (e.g. `classes*.dex`, `res/values/strings.xml`) are extracted and matched. - Git repository scans now extract archive blobs encountered in the object database, not just on the filesystem. Previously a `.zip`/`.jar`/`.apk`/`.tar.gz` committed to a repo was scanned as raw compressed bytes, so secrets inside it were invisible. The git enumerator fans each archive entry out as a synthetic `!` blob with the original commit metadata. Honors `--no-extract-archives` for opt-out. +- Fixed tar-wrapped archive extraction for `.tgz` and `.tar.*` files, and made dependent credential validation deduplication preserve per-occurrence context so repeated secrets validate with the correct nearby companion value. - Performance: ZIP-based git blobs ≤ 64 MB extract entirely in memory (no temp-file round trip), beating the v1.99.0 baseline by ~15% on a 80 GiB monorepo despite scanning ~300K additional archive-content blobs. Larger archives auto-fall-back to a disk-streaming extractor. - Memory safety: hard caps on archive extraction — 64 MB compressed pre-flight, 256 MB aggregate decompressed per archive (in-memory and disk paths), 512 MB per entry, plus a `PK\x03\x04` magic-byte gate. Worst-case footprint is bounded at ~`num_jobs * 320 MB`. - Release binary trimmed from 34 MB to 26 MB (~24% smaller). Switched `jsonwebtoken` to its `rust_crypto` backend (eliminates our scanner's pull on `aws-lc-rs`), bumped workspace `hmac` 0.12→0.13, `sha1` 0.10→0.11, `sha2` 0.10→0.11 to deduplicate our internal crypto code with the AWS sigv4 side, and migrated affected call sites in `kingfisher-core`, `kingfisher-rules`, and `kingfisher-scanner` to the digest-0.11 API (`hex::encode` for hex digests, explicit `KeyInit` import for HMAC). diff --git a/src/decompress.rs b/src/decompress.rs index c670c27..c5cbc1d 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -23,18 +23,18 @@ pub const ZIP_BASED_FORMATS: &[&str] = &[ "kmz", "widget", "xpi", "sketch", "pages", "key", "numbers", "hwpx", ]; -/// Break `..` into `(Some(outer), Some(inner))`. -/// For `foo.tar.gz` this returns `("tar", "gz")`. -fn split_extensions(path: &Path) -> (Option, Option) { - let ext_inner = path.extension().and_then(|e| e.to_str()).map(|s| s.to_ascii_lowercase()); +fn is_tar_wrapped_compression(path: &Path) -> bool { + let filename = match path.file_name().and_then(|s| s.to_str()) { + Some(name) => name.to_ascii_lowercase(), + None => return false, + }; - let ext_outer = path - .file_stem() - .and_then(|s| Path::new(s).extension()) - .and_then(|e| e.to_str()) - .map(|s| s.to_ascii_lowercase()); - - (ext_outer, ext_inner) + filename.ends_with(".tgz") + || filename.ends_with(".tar.gz") + || filename.ends_with(".tar.gzip") + || filename.ends_with(".tar.bz2") + || filename.ends_with(".tar.bzip2") + || filename.ends_with(".tar.xz") } #[derive(Debug)] @@ -450,7 +450,7 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result { + "gz" | "gzip" | "tgz" => { let out_path = make_output_path(path, base_dir, "decomp.tar"); let decoder = GzDecoder::new(BufReader::new(safe_open_for_read(path)?)); return stream_to_file(decoder, &out_path); @@ -487,12 +487,13 @@ pub fn decompress_file(path: &Path, base_dir: Option<&Path>) -> Result; loop { + let should_extract_tar = is_tar_wrapped_compression(current_path); let content = decompress_once(current_path, base_dir)?; // If the step produced a single on-disk file that is itself a .tar, // recurse on that file. if let CompressedContent::RawFile(ref p) = content { - if split_extensions(p).0.as_deref() == Some("tar") { + if should_extract_tar { owned_buf = Some(p.clone()); // own the path current_path = owned_buf.as_ref().unwrap(); continue; @@ -570,7 +571,7 @@ mod tests { use tempfile::tempdir; use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions}; - use super::{CompressedContent, decompress_once}; + use super::{CompressedContent, decompress_file_to_temp, decompress_once}; /// 1) Fully unpack: /// - 1st decompress `.gz` -- get a `.tar` file @@ -627,6 +628,45 @@ mod tests { Ok(()) } + #[test] + fn smoke_decompress_tgz_archive() -> anyhow::Result<()> { + let dir = tempdir()?; + let tgz = dir.path().join("payload.tgz"); + let github_pat = "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"; // this is not a real secret + + { + let f = File::create(&tgz)?; + let gz = GzEncoder::new(f, Compression::default()); + let mut tar = Builder::new(gz); + + let data = format!("token={github_pat}\n"); + let mut hdr = tar::Header::new_gnu(); + hdr.set_size(data.len() as u64); + hdr.set_mode(0o644); + hdr.set_cksum(); + tar.append_data(&mut hdr, "secret.txt", data.as_bytes())?; + + tar.into_inner()?.finish()?; + } + + let (content, _tmp) = decompress_file_to_temp(&tgz)?; + if let CompressedContent::ArchiveFiles(files) = content { + let mut found = false; + for (logical, path) in files { + if logical.ends_with("payload.tgz!secret.txt") { + let txt = std::fs::read_to_string(&path)?; + assert!(txt.contains(github_pat)); + found = true; + } + } + assert!(found, "did not find secret.txt in tgz ArchiveFiles"); + } else { + panic!("expected ArchiveFiles for tgz archive, got {:?}", content); + } + + Ok(()) + } + /// 2) No-extract flag: just peel the `.gz` layer (no base_dir -- use NamedTempFile), and verify /// you get back a RawFile, whose contents are the tar archive itself. #[test] diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 1695ae4..dc2ecaf 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -983,6 +983,18 @@ fn build_cache_key( // For demonstration, we’ll do a simplistic approach // You can adapt from your existing logic let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); + + if !om.rule.syntax().depends_on_rule.is_empty() { + return format!( + "{}|{}|{}|{}|{}", + om.rule.name(), + capture0, + om.blob_id, + om.matching_input_offset_span.start, + om.matching_input_offset_span.end + ); + } + format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str) } diff --git a/src/validation.rs b/src/validation.rs index be12ecd..8d5a7c7 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -15,7 +15,7 @@ use http::StatusCode; use liquid::Object; use liquid_core::{Value, ValueView}; use reqwest::{Client, Url, header, header::HeaderValue, multipart}; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use tokio::{sync::Notify, time}; use tracing::{debug, trace}; @@ -260,7 +260,9 @@ type Cache = kingfisher_scanner::validation::Cache; /// Returns an opaque 64-bit key for internal validation deduplication. /// /// This is an INTERNAL key used only for validation deduplication within a single scan. -/// It uses `captures.get(0)` to get the primary secret value. +/// It uses `captures.get(0)` to get the primary secret value. Rules with dependent +/// variables also include blob location because validation can depend on nearby context +/// such as an AWS access-key ID paired with a secret access key. /// /// **Important**: This is distinct from the EXTERNAL `finding_fingerprint` used for: /// - Baseline comparisons across scans @@ -279,6 +281,13 @@ fn validation_dedup_key(m: &OwnedBlobMatch) -> u64 { if let Some(val) = capture_value { val.hash(&mut hasher); } + + if !m.rule.syntax().depends_on_rule.is_empty() { + m.blob_id.hash(&mut hasher); + m.matching_input_offset_span.start.hash(&mut hasher); + m.matching_input_offset_span.end.hash(&mut hasher); + } + let key = hasher.finish(); trace!( @@ -693,7 +702,7 @@ async fn timed_validate_single_match<'a>( validate_jwt_rule(m, &captured_values, use_lax_tls, clients.allow_internal_ips).await; } Some(Validation::AWS) => { - validate_aws_rule(m, &captured_values, cache).await; + validate_aws_rule(m, &captured_values, dependent_variables, cache).await; } Some(Validation::GCP) => { validate_gcp_rule(m, &globals, cache).await; @@ -1391,6 +1400,7 @@ async fn validate_jwt_rule( async fn validate_aws_rule( m: &mut OwnedBlobMatch, captured_values: &[(String, String, usize, usize)], + dependent_variables: &FxHashMap>, cache: &Cache, ) { let secret = captured_values @@ -1398,10 +1408,8 @@ async fn validate_aws_rule( .find(|(n, ..)| n == "TOKEN") .map(|(_, v, ..)| v.clone()) .unwrap_or_default(); - let akid = - utils::find_closest_variable(captured_values, &secret, "TOKEN", "AKID").unwrap_or_default(); - if akid.is_empty() || secret.is_empty() { + if secret.is_empty() { m.validation_success = false; m.validation_response_body = validation_body::from_string("Missing AWS access-key ID or secret.".to_string()); @@ -1409,77 +1417,171 @@ async fn validate_aws_rule( return; } - let cache_key = aws::generate_aws_cache_key(&akid, &secret); - if let Some(cached) = cache.get(&cache_key) { - let c = cached.value(); - if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { - m.validation_success = c.is_valid; - m.validation_response_body = c.body.clone(); - m.validation_response_status = c.status; - return; - } - } + let akid_candidates = aws_akid_candidates( + captured_values, + dependent_variables.get("AKID"), + m.matching_input_offset_span, + &secret, + ); - if let Some(account_id) = aws::should_skip_aws_validation(&akid) { - m.validation_success = false; - m.validation_response_body = validation_body::from_string(format!( - "(skip list entry) AWS validation not attempted for account {}.", - account_id - )); - m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; - cache.insert( - cache_key, - CachedResponse { - body: m.validation_response_body.clone(), - status: m.validation_response_status, - is_valid: m.validation_success, - timestamp: Instant::now(), - }, - ); - return; - } - - if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) { + if akid_candidates.is_empty() { m.validation_success = false; m.validation_response_body = - validation_body::from_string(format!("Invalid AWS credentials ({}): {}", akid, e)); + validation_body::from_string("Missing AWS access-key ID or secret.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; return; } - match aws::validate_aws_credentials(&akid, &secret).await { - Ok((ok, msg)) => { - m.validation_success = ok; - if ok { - let mut body = format!("{} --- ARN: {}", akid, msg); - if let Ok(acct) = aws::aws_key_to_account_number(&akid) { - body.push_str(&format!(" --- AWS Account Number: {:012}", acct)); + let mut last_body = None; + let mut last_status = StatusCode::UNAUTHORIZED; + + for akid in akid_candidates { + let cache_key = aws::generate_aws_cache_key(&akid, &secret); + if let Some(cached) = cache.get(&cache_key) { + let c = cached.value(); + if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { + if c.is_valid { + m.validation_success = c.is_valid; + m.validation_response_body = c.body.clone(); + m.validation_response_status = c.status; + return; } - m.validation_response_body = validation_body::from_string(body); - m.validation_response_status = StatusCode::OK; - } else { - m.validation_response_body = validation_body::from_string(format!( - "AWS validation error ({}): {}", - akid, msg - )); - m.validation_response_status = StatusCode::UNAUTHORIZED; + last_body = Some(c.body.clone()); + last_status = c.status; + continue; } + } + + if let Some(account_id) = aws::should_skip_aws_validation(&akid) { + let body = validation_body::from_string(format!( + "(skip list entry) AWS validation not attempted for account {}.", + account_id + )); cache.insert( cache_key, CachedResponse { - body: m.validation_response_body.clone(), - status: m.validation_response_status, - is_valid: m.validation_success, + body: body.clone(), + status: StatusCode::PRECONDITION_REQUIRED, + is_valid: false, timestamp: Instant::now(), }, ); + last_body = Some(body); + last_status = StatusCode::PRECONDITION_REQUIRED; + continue; } - Err(e) => { - m.validation_success = false; - m.validation_response_body = - validation_body::from_string(format!("AWS validation error ({}): {}", akid, e)); - m.validation_response_status = StatusCode::BAD_GATEWAY; + + if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) { + let body = + validation_body::from_string(format!("Invalid AWS credentials ({}): {}", akid, e)); + cache.insert( + cache_key, + CachedResponse { + body: body.clone(), + status: StatusCode::BAD_REQUEST, + is_valid: false, + timestamp: Instant::now(), + }, + ); + last_body = Some(body); + last_status = StatusCode::BAD_REQUEST; + continue; } + + match aws::validate_aws_credentials(&akid, &secret).await { + Ok((ok, msg)) => { + if ok { + m.validation_success = true; + let mut body = format!("{} --- ARN: {}", akid, msg); + if let Ok(acct) = aws::aws_key_to_account_number(&akid) { + body.push_str(&format!(" --- AWS Account Number: {:012}", acct)); + } + m.validation_response_body = validation_body::from_string(body); + m.validation_response_status = StatusCode::OK; + cache.insert( + cache_key, + CachedResponse { + body: m.validation_response_body.clone(), + status: m.validation_response_status, + is_valid: true, + timestamp: Instant::now(), + }, + ); + return; + } + + let body = validation_body::from_string(format!( + "AWS validation error ({}): {}", + akid, msg + )); + cache.insert( + cache_key, + CachedResponse { + body: body.clone(), + status: StatusCode::UNAUTHORIZED, + is_valid: false, + timestamp: Instant::now(), + }, + ); + last_body = Some(body); + last_status = StatusCode::UNAUTHORIZED; + } + Err(e) => { + last_body = Some(validation_body::from_string(format!( + "AWS validation error ({}): {}", + akid, e + ))); + last_status = StatusCode::BAD_GATEWAY; + } + } + } + + m.validation_success = false; + m.validation_response_body = last_body.unwrap_or_else(|| { + validation_body::from_string("AWS validation failed for all nearby access-key IDs.") + }); + m.validation_response_status = last_status; +} + +fn aws_akid_candidates( + captured_values: &[(String, String, usize, usize)], + dependent_akids: Option<&Vec<(String, OffsetSpan)>>, + target_span: OffsetSpan, + secret: &str, +) -> Vec { + let mut candidates = Vec::new(); + + if let Some(closest) = + utils::find_closest_variable(captured_values, &secret.to_string(), "TOKEN", "AKID") + { + candidates.push((0usize, closest)); + } + + if let Some(values) = dependent_akids { + candidates.extend( + values + .iter() + .map(|(value, span)| (dependency_distance(*span, target_span), value.clone())), + ); + } + + candidates.sort_by_key(|(distance, _)| *distance); + + let mut seen = FxHashSet::default(); + candidates + .into_iter() + .filter_map(|(_, value)| if seen.insert(value.clone()) { Some(value) } else { None }) + .take(64) + .collect() +} + +fn dependency_distance(span: OffsetSpan, target_span: OffsetSpan) -> usize { + if span.end <= target_span.start { + target_span.start - span.end + } else if span.start >= target_span.end { + span.start - target_span.end + } else { + 0 } } diff --git a/tests/smoke_archive.rs b/tests/smoke_archive.rs index 4d4f8c6..bb10e01 100644 --- a/tests/smoke_archive.rs +++ b/tests/smoke_archive.rs @@ -21,7 +21,11 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> { let mut t = Builder::new(gz); let data = format!("token={github_pat}\n"); - t.append_data(&mut tar::Header::new_gnu(), "secret.txt", data.as_bytes())?; + let mut header = tar::Header::new_gnu(); + header.set_size(data.len() as u64); + header.set_mode(0o644); + header.set_cksum(); + t.append_data(&mut header, "secret.txt", data.as_bytes())?; t.into_inner()?.finish()?; }