From a8e01c4a6e4e3a7ec9bafa3bc8911c17fed1bb1d Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 18 May 2026 18:33:42 -0700 Subject: [PATCH] preparing for v1.100.0 --- src/scanner/enumerate.rs | 29 +++++++++++++++++++++++++- src/scanner/validation.rs | 43 +++++++++++++++++++++++++-------------- src/validation.rs | 2 +- 3 files changed, 57 insertions(+), 17 deletions(-) diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 1e0cd83..f438b84 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -718,6 +718,12 @@ fn try_extract_git_blob_archive( if entries.is_empty() { Ok(None) } else { Ok(Some(entries)) } } +fn archive_entry_suffix<'a>(entry_logical: &'a str, archive_path: &str) -> Option<&'a str> { + entry_logical.strip_prefix(archive_path).filter(|suffix| suffix.starts_with('!')).or_else( + || entry_logical.split_once('!').map(|(archive, _)| &entry_logical[archive.len()..]), + ) +} + // A marker so the struct itself carries the lifetime. struct GitRepoResultIter<'a> { inner: GitRepoResult, @@ -798,12 +804,21 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { Ok(Some(entries)) => { let mut out = Vec::with_capacity(entries.len()); for (entry_logical, entry_bytes) in entries { + let entry_suffix = + archive_entry_suffix(&entry_logical, &archive_path); let origin = OriginSet::try_from_iter(md.first_seen.iter().map(|e| { + let repo_relative_path = + String::from_utf8_lossy(&e.path).to_string(); + let per_appearance_logical = entry_suffix + .map(|suffix| { + format!("{repo_relative_path}{suffix}") + }) + .unwrap_or_else(|| entry_logical.clone()); Origin::from_git_repo_with_first_commit( Arc::clone(&repo_path), Arc::clone(&e.commit_metadata), - entry_logical.clone(), + per_appearance_logical, ) })) .unwrap_or_else( @@ -1560,6 +1575,18 @@ mod tests { Ok(()) } + #[test] + fn archive_entry_suffix_preserves_entry_component() { + assert_eq!( + super::archive_entry_suffix("dir/archive.zip!nested/secret.txt", "dir/archive.zip"), + Some("!nested/secret.txt") + ); + assert_eq!( + super::archive_entry_suffix("archive.zip!nested/secret.txt", "other/archive.zip"), + Some("!nested/secret.txt") + ); + } + #[test] fn git_blob_archive_extraction_preserves_repo_relative_paths() -> Result<()> { let mut cursor = std::io::Cursor::new(Vec::new()); diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index a07d7a3..fbdb7d2 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -1001,8 +1001,9 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); - let mut akid = utils::find_closest_variable(&captures, &secret, "TOKEN", "AKID") - .unwrap_or_default(); + let mut akid = + utils::find_closest_variable(&captures, secret.as_str(), "TOKEN", "AKID") + .unwrap_or_default(); if akid.is_empty() { akid = extract_akid_from_body(&om.validation_response_body).unwrap_or_default(); @@ -1026,7 +1027,7 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); let storage_account = - utils::find_closest_variable(&captures, &storage_key, "TOKEN", "AZURENAME") + utils::find_closest_variable(&captures, storage_key.as_str(), "TOKEN", "AZURENAME") .unwrap_or_default(); let mut storage_account = storage_account; @@ -1081,9 +1082,13 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .find(|(name, ..)| name == "TOKEN") .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); - let mut organization = - utils::find_closest_variable(&captures, &token, "TOKEN", "AZURE_DEVOPS_ORG") - .unwrap_or_default(); + let mut organization = utils::find_closest_variable( + &captures, + token.as_str(), + "TOKEN", + "AZURE_DEVOPS_ORG", + ) + .unwrap_or_default(); if organization.is_empty() { organization = extract_azure_devops_org_from_body(&om.validation_response_body) .unwrap_or_default(); @@ -1100,7 +1105,7 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); let access_key = - utils::find_closest_variable(&captures, &secret_key, "TOKEN", "AKID") + utils::find_closest_variable(&captures, secret_key.as_str(), "TOKEN", "AKID") .or_else(|| om.dependent_captures.get("AKID").cloned()) .unwrap_or_default(); @@ -1114,14 +1119,22 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl .find(|(name, ..)| name == "TOKEN") .map(|(_, value, ..)| value.clone()) .unwrap_or_default(); - let access_key = - utils::find_closest_variable(&captures, &secret_key, "TOKEN", "STS_AKID") - .or_else(|| om.dependent_captures.get("STS_AKID").cloned()) - .unwrap_or_default(); - let session_token = - utils::find_closest_variable(&captures, &secret_key, "TOKEN", "SECURITY_TOKEN") - .or_else(|| om.dependent_captures.get("SECURITY_TOKEN").cloned()) - .unwrap_or_default(); + let access_key = utils::find_closest_variable( + &captures, + secret_key.as_str(), + "TOKEN", + "STS_AKID", + ) + .or_else(|| om.dependent_captures.get("STS_AKID").cloned()) + .unwrap_or_default(); + let session_token = utils::find_closest_variable( + &captures, + secret_key.as_str(), + "TOKEN", + "SECURITY_TOKEN", + ) + .or_else(|| om.dependent_captures.get("SECURITY_TOKEN").cloned()) + .unwrap_or_default(); if !access_key.is_empty() && !secret_key.is_empty() && !session_token.is_empty() { collector.record_alibaba( diff --git a/src/validation.rs b/src/validation.rs index f8aff4a..d390f8a 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1195,7 +1195,7 @@ async fn validate_azure_storage( .map(|(_, v, ..)| v.clone()) .unwrap_or_default(); let storage_account = - utils::find_closest_variable(captured_values, &storage_key, "TOKEN", "AZURENAME") + utils::find_closest_variable(captured_values, storage_key.as_str(), "TOKEN", "AZURENAME") .unwrap_or_default(); if storage_account.is_empty() || storage_key.is_empty() {