diff --git a/src/content_type.rs b/src/content_type.rs index 457ef00..3d9a25d 100644 --- a/src/content_type.rs +++ b/src/content_type.rs @@ -1,7 +1,22 @@ -// Requires: tokei = "12" in Cargo.toml +use once_cell::sync::Lazy; use std::path::Path; use tokei::LanguageType; + +// Precompute all (shebang_prefix_bytes, language) pairs once. +// Sort longest-first so more specific shebangs win. +static SHEBANG_PREFIXES: Lazy> = Lazy::new(|| { + let mut v = Vec::new(); + for &lang in LanguageType::list() { + for &sb in lang.shebangs() { + v.push((sb.as_bytes(), lang)); + } + } + // Longest prefix first to prefer e.g. "#!/usr/bin/env python3" over "#!/usr/bin/env python" + v.sort_by(|a, b| b.0.len().cmp(&a.0.len())); + v +}); + /// The type of content detected. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ContentType { @@ -127,16 +142,12 @@ impl ContentInspector { } } - // 3) Shebang detection (in-memory): compare the first line to known shebangs. + // 3) Shebang detection (in-memory): match by longest prefix, byte-wise (no UTF-8 needed). if let Some(first_line) = content.split(|&b| b == b'\n').next() { if first_line.starts_with(b"#!") { - if let Ok(line) = std::str::from_utf8(first_line) { - for &lang in LanguageType::list() { - for &sb in lang.shebangs() { - if line.starts_with(sb) { - return Some(lang.name().to_string()); - } - } + for (prefix, lang) in SHEBANG_PREFIXES.iter() { + if first_line.starts_with(prefix) { + return Some(lang.name().to_string()); } } } diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index f4bac9d..a999f31 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -455,8 +455,8 @@ impl<'a> rayon::iter::ParallelIterator for GitRepoResultIter<'a> { let origin = OriginSet::try_from_iter(md.first_seen.iter().map(|e| { Origin::from_git_repo_with_first_commit( Arc::clone(&repo_path), - Arc::clone(&e.commit_metadata), // ← clone Arc - String::from_utf8_lossy(&e.path).into_owned(), + Arc::clone(&e.commit_metadata), + String::from_utf8_lossy(&e.path).to_string(), ) })) .unwrap_or_else(|| Origin::from_git_repo(Arc::clone(&repo_path)).into());