diff --git a/Cargo.toml b/Cargo.toml index dc1e875..b348e5e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -148,6 +148,8 @@ flate2 = "1.1" thousands = "0.2.0" crossbeam-skiplist = "0.1.3" tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"] } +# Temporary Git pin: keeps MongoDB SRV validation enabled while using the upstream +# Hickory 0.26 DNS-resolver fix before it is available in a crates.io release. mongodb = { git = "https://github.com/mongodb/mongo-rust-driver", rev = "bdddefc50c4794d51d10b944320d42c6eb216b04", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"] } mysql_async = { version = "0.36.2", default-features = false, features = ["default-rustls"] } aws-config = { version = "1.8.14", default-features = false, features = ["default-https-client", "rt-tokio", "credentials-process", "sso"] } diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml index 5ffa9ff..71e4856 100644 --- a/crates/kingfisher-scanner/Cargo.toml +++ b/crates/kingfisher-scanner/Cargo.toml @@ -187,6 +187,8 @@ p256 = { version = "0.13.2", optional = true } ed25519-dalek = { version = "2.2", features = ["pkcs8"], optional = true } hex = { workspace = true, optional = true } url = { version = "2.5.7", optional = true } +# Temporary Git pin: keeps MongoDB SRV validation enabled while using the upstream +# Hickory 0.26 DNS-resolver fix before it is available in a crates.io release. mongodb = { git = "https://github.com/mongodb/mongo-rust-driver", rev = "bdddefc50c4794d51d10b944320d42c6eb216b04", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"], optional = true } mysql_async = { version = "0.36.2", default-features = false, features = ["default-rustls"], optional = true } tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"], optional = true } diff --git a/src/github.rs b/src/github.rs index 94015ba..added70 100644 --- a/src/github.rs +++ b/src/github.rs @@ -9,6 +9,7 @@ use std::{ use anyhow::{Context, Result}; use indicatif::{ProgressBar, ProgressStyle}; use reqwest::StatusCode; +use reqwest::header::HeaderMap; use serde::Deserialize; use serde_json::Value; use tracing::{info, warn}; @@ -170,28 +171,42 @@ async fn ensure_github_success(resp: reqwest::Response, action: &str) -> Result< anyhow::bail!("GitHub API request failed while {action}: HTTP {status} ({url}): {body}"); } +fn github_next_link(headers: &HeaderMap) -> Option { + let raw = headers.get(reqwest::header::LINK)?.to_str().ok()?; + raw.split(',').find_map(|part| { + let (url_part, params) = part.trim().split_once(';')?; + if !params.split(';').any(|param| param.trim() == "rel=\"next\"") { + return None; + } + let url = url_part.trim().strip_prefix('<')?.strip_suffix('>')?; + Url::parse(url).ok() + }) +} + async fn fetch_github_orgs( client: &reqwest::Client, api_base: &Url, token: Option<&str>, ) -> Result> { let mut orgs = Vec::new(); - let mut page = 1; - - loop { + let mut next_url = { let mut url = api_base.join("organizations").context("Failed to build GitHub orgs URL")?; - url.query_pairs_mut().append_pair("per_page", "100").append_pair("page", &page.to_string()); + url.query_pairs_mut().append_pair("per_page", "100"); + Some(url) + }; + + while let Some(url) = next_url { let resp = ensure_github_success( github_get(client, url, token).send().await?, "listing organizations", ) .await?; + next_url = github_next_link(resp.headers()); let page_orgs: Vec = resp.json().await?; if page_orgs.is_empty() { break; } orgs.extend(page_orgs.into_iter().map(|org| org.login)); - page += 1; } Ok(orgs) @@ -726,4 +741,29 @@ mod tests { assert!(should_exclude_repo("https://github.com/owner/project-archive.git", &excludes)); assert!(!should_exclude_repo("https://github.com/owner/project.git", &excludes)); } + + #[test] + fn github_next_link_parses_next_relation() { + let mut headers = HeaderMap::new(); + headers.insert( + reqwest::header::LINK, + r#"; rel="next", ; rel="first""# + .parse() + .unwrap(), + ); + + let next = github_next_link(&headers).unwrap(); + assert_eq!(next.as_str(), "https://api.github.com/organizations?since=42"); + } + + #[test] + fn github_next_link_returns_none_without_next_relation() { + let mut headers = HeaderMap::new(); + headers.insert( + reqwest::header::LINK, + r#"; rel="first""#.parse().unwrap(), + ); + + assert!(github_next_link(&headers).is_none()); + } } diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index d34001e..d063ea0 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -1446,12 +1446,12 @@ fn reference_candidates(reference: &str) -> Vec { #[cfg(test)] mod tests { - use std::fs; use std::path::Path; + use std::{fs, io::Write}; use super::{ FileResult, GitBlobSource, GitDiffConfig, ParallelBlobIterator, enumerate_git_diff_repo, - reference_candidates, + reference_candidates, try_extract_git_blob_archive, }; use anyhow::Result; use bstr::ByteSlice; @@ -1460,6 +1460,7 @@ mod tests { use rayon::iter::ParallelIterator; use rusqlite::Connection; use tempfile::tempdir; + use zip::{CompressionMethod, ZipWriter, write::SimpleFileOptions}; #[test] fn reference_candidates_for_plain_branch() { @@ -1559,6 +1560,28 @@ mod tests { Ok(()) } + #[test] + fn git_blob_archive_extraction_preserves_repo_relative_paths() -> Result<()> { + let mut cursor = std::io::Cursor::new(Vec::new()); + { + let mut zip = ZipWriter::new(&mut cursor); + let options = SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .unix_permissions(0o644); + zip.start_file("nested/secret.txt", options)?; + zip.write_all(b"token=not-a-real-secret")?; + zip.finish()?; + } + + let entries = try_extract_git_blob_archive("dir/payload.zip", &cursor.into_inner())? + .expect("zip blob should extract"); + + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].0, "dir/payload.zip!nested/secret.txt"); + assert_eq!(entries[0].1, b"token=not-a-real-secret"); + Ok(()) + } + fn collect_file_bytes(file: FileResult) -> Result)>> { let iter = file.into_blob_iter()?.expect("file result should yield a blob"); iter.collect::>() diff --git a/src/validation.rs b/src/validation.rs index 8d5a7c7..0c7d911 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -1848,6 +1848,47 @@ mod tests { assert_eq!(selected.1, OffsetSpan::from_range(70..80)); } + #[test] + fn aws_akid_candidates_orders_by_proximity_and_deduplicates() { + let captured_values = vec![ + ("TOKEN".to_string(), "secret".to_string(), 100usize, 140usize), + ("AKID".to_string(), "closest_capture".to_string(), 80usize, 90usize), + ]; + let dependent_akids = vec![ + ("far_before".to_string(), OffsetSpan::from_range(10..20)), + ("near_after".to_string(), OffsetSpan::from_range(150..160)), + ("overlap".to_string(), OffsetSpan::from_range(110..120)), + ("closest_capture".to_string(), OffsetSpan::from_range(80..90)), + ]; + + let candidates = aws_akid_candidates( + &captured_values, + Some(&dependent_akids), + OffsetSpan::from_range(100..140), + "secret", + ); + + assert_eq!(candidates, vec!["closest_capture", "overlap", "near_after", "far_before"]); + } + + #[test] + fn aws_akid_candidates_caps_unique_candidates() { + let dependent_akids = (0..70) + .map(|i| (format!("akid{i}"), OffsetSpan::from_range((i * 2)..(i * 2 + 1)))) + .collect::>(); + + let candidates = aws_akid_candidates( + &[], + Some(&dependent_akids), + OffsetSpan::from_range(1_000..1_010), + "secret", + ); + + assert_eq!(candidates.len(), 64); + assert_eq!(candidates.first().map(String::as_str), Some("akid69")); + assert_eq!(candidates.last().map(String::as_str), Some("akid6")); + } + #[test] fn truncate_to_char_boundary_handles_multibyte_characters() { let max_len = 2048;