diff --git a/Cargo.toml b/Cargo.toml
index 4e774bf..ac3b6a6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -65,7 +65,7 @@ smallvec = { version = "1", features = [
 tracing = "0.1.41"
 indicatif = { version = "0.17", features = ["improved_unicode"] }
 rayon = "1.10"
-sha1 = "0.10.6"
+sha1 = { version = "0.10.6", features = ["asm"] }
 hex = "0.4.3"
 vectorscan-rs = "0.0.5"
 regex = "1.11.1"
@@ -151,6 +151,7 @@ tar = "0.4.44"
 xz2 = "0.1.7"
 asar = "0.3.0"
 blake3 = "1.8.2"
+memchr = "2.7"
 memmap2 = "0.9.7"
 futures = "0.3.31"
 dashmap = "6.1.0"
diff --git a/src/blob.rs b/src/blob.rs
index a54a4a8..9530882 100644
--- a/src/blob.rs
+++ b/src/blob.rs
@@ -10,16 +10,20 @@ use anyhow::Result;
 use bstr::{BString, ByteSlice};
 use gix::ObjectId;
 use hex;
+use once_cell::sync::OnceCell;
 use parking_lot::Mutex;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use sha1::{Digest, Sha1};
 use smallvec::SmallVec;
+use std::sync::atomic::{AtomicU64, Ordering};
 
 use crate::git_commit_metadata::CommitMetadata;
 // const LARGE_FILE_THRESHOLD: u64 = 512 * 1024; // 512 KB
 const LARGE_FILE_THRESHOLD: u64 = 0; // always mmap
 
+static NEXT_ID: AtomicU64 = AtomicU64::new(1);
+
 /// The data of a blob, either owned (small files) or memory mapped (large files).
 pub enum BlobData<'a> {
     /// Small blobs – remains as-is.
@@ -75,47 +79,68 @@ pub type BlobAppearanceSet = SmallVec<[BlobAppearance; 1]>;
 /// A Git blob, storing its SHA-1 id and its contents.
 
 pub struct Blob<'a> {
-    pub id: BlobId,
-    pub data: BlobData<'a>,
+    id: OnceCell<BlobId>,
+    data: BlobData<'a>,
+    temp_id: u64,
 }
 
 impl Blob<'_> {
     #[inline]
-
     pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
         let mut file = File::open(&path)?;
         let file_size = file.metadata()?.len();
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
 
         if file_size > LARGE_FILE_THRESHOLD {
             // Large files: one mmap, zero extra copies.
             let mmap = unsafe { memmap2::Mmap::map(&file)? };
-            let id = BlobId::new(mmap.as_ref());
-            Ok(Blob { id, data: BlobData::Mapped(mmap) })
+            Ok(Blob { id: OnceCell::new(), data: BlobData::Mapped(mmap), temp_id })
         } else {
             // Small files: reuse the same handle and pre-allocate exact capacity
             let mut bytes = Vec::with_capacity(file_size as usize);
             file.read_to_end(&mut bytes)?;
-            let id = BlobId::new(&bytes);
-            Ok(Blob { id, data: BlobData::Owned(bytes) })
+            Ok(Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id })
         }
     }
+
     /// Returns the blob's bytes as a slice.
     #[inline]
     pub fn bytes(&self) -> &[u8] {
         self.data.as_ref()
     }
 
+    /// Lazily compute and return the blob's SHA-1 `BlobId`.
+    #[inline]
+    pub fn id(&self) -> BlobId {
+        *self.id.get_or_init(|| BlobId::new(self.bytes()))
+    }
+
+    /// Get a reference to the blob's SHA-1 `BlobId`, computing it if necessary.
+    #[inline]
+    pub fn id_ref(&self) -> &BlobId {
+        self.id.get_or_init(|| BlobId::new(self.bytes()))
+    }
+
+    /// Return the temporary identifier assigned on blob creation.
+    #[inline]
+    pub fn temp_id(&self) -> u64 {
+        self.temp_id
+    }
+
     /// Create a new `Blob` from a vector of bytes.
     #[inline]
     pub fn from_bytes(bytes: Vec<u8>) -> Self {
-        let id = BlobId::compute_from_bytes(&bytes);
-        Blob { id, data: BlobData::Owned(bytes) }
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
+        Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id }
     }
 
     /// Create a new `Blob` with the given id and data.
     #[inline]
     pub fn new(id: BlobId, bytes: Vec<u8>) -> Self {
-        Blob { id, data: BlobData::Owned(bytes) }
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
+        let cell = OnceCell::new();
+        let _ = cell.set(id);
+        Blob { id: cell, data: BlobData::Owned(bytes), temp_id }
     }
 
     #[inline]
@@ -360,9 +385,6 @@ pub struct BlobMetadata {
     /// The guessed multimedia type of the blob
     pub mime_essence: Option<String>,
 
-    /// The guessed charset of the blob
-    pub charset: Option<String>,
-
     /// The guessed programming language of the blob
     pub language: Option<String>,
 }
@@ -385,8 +407,4 @@ impl BlobMetadata {
         self.mime_essence.as_deref()
     }
 
-    #[inline]
-    pub fn charset(&self) -> Option<&str> {
-        self.charset.as_deref()
-    }
 }
diff --git a/src/location.rs b/src/location.rs
index 4af7021..05a7586 100644
--- a/src/location.rs
+++ b/src/location.rs
@@ -94,8 +94,7 @@ pub struct LocationMapping {
 impl LocationMapping {
     /// Scan once for all `\n` positions.
     pub fn new(input: &[u8]) -> Self {
-        let newline_offsets =
-            input.iter().enumerate().filter_map(|(i, &b)| (b == b'\n').then_some(i)).collect();
+        let newline_offsets = memchr::memchr_iter(b'\n', input).collect();
         LocationMapping { newline_offsets }
     }
 
diff --git a/src/matcher.rs b/src/matcher.rs
index 0ecc7ec..07f53d3 100644
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -288,17 +288,6 @@ impl<'a> Matcher<'a> {
         self.local_stats.blobs_scanned += 1;
         self.local_stats.bytes_scanned += blob.bytes().len() as u64;
 
-        // Check if blob was already seen and respect no_dedup flag
-        if !no_dedup {
-            if let Some(had_matches) = self.seen_blobs.get(&blob.id) {
-                return Ok(if had_matches {
-                    ScanResult::SeenWithMatches
-                } else {
-                    ScanResult::SeenSansMatches
-                });
-            }
-        }
-
         // Extract filename from origin
         let filename = origin
             .first()
@@ -316,16 +305,7 @@ impl<'a> Matcher<'a> {
         let mut b64_items = if no_base64 { Vec::new() } else { get_base64_strings(blob.bytes()) };
 
         if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() {
-            // Only record in seen_blobs if deduplication is enabled
-            if !no_dedup {
-                return Ok(match self.seen_blobs.insert(blob.id, false) {
-                    None => ScanResult::New(Vec::new()),
-                    Some(true) => ScanResult::SeenWithMatches,
-                    Some(false) => ScanResult::SeenSansMatches,
-                });
-            } else {
-                return Ok(ScanResult::New(Vec::new()));
-            }
+            return Ok(ScanResult::New(Vec::new()));
         }
 
         let rules_db = self.rules_db;
@@ -472,9 +452,15 @@ impl<'a> Matcher<'a> {
             }
         }
         // Finalize
-        // Only record in seen_blobs if deduplication is enabled
-        if !no_dedup {
-            self.seen_blobs.insert(blob.id, !matches.is_empty());
+        if !no_dedup && !matches.is_empty() {
+            let blob_id = blob.id();
+            if let Some(had_matches) = self.seen_blobs.insert(blob_id, true) {
+                return Ok(if had_matches {
+                    ScanResult::SeenWithMatches
+                } else {
+                    ScanResult::SeenSansMatches
+                });
+            }
         }
 
         // --- opportunistic capacity cap ---------------------------------
@@ -571,7 +557,7 @@ fn filter_match<'b>(
             SerializableCaptures::from_captures(&captures, byte_slice.as_ref(), re, redact);
         matches.push(BlobMatch {
             rule: Arc::clone(&rule),
-            blob_id: &blob.id,
+            blob_id: blob.id_ref(),
             matching_input: only_matching_input,
             matching_input_offset_span,
             captures: groups,
diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
index 7cf90c8..f2b065e 100644
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@@ -185,7 +185,6 @@ mod tests {
                 id: BlobId::new(b"mock_blob"),
                 num_bytes: 1024,
                 mime_essence: Some("text/plain".to_string()),
-                charset: Some("UTF-8".to_string()),
                 language: Some("Rust".to_string()),
             };
             let dedup = true;
@@ -219,7 +218,6 @@ mod tests {
                 id: BlobId::new(b"mock_blob"),
                 num_bytes: 1024,
                 mime_essence: Some("text/plain".to_string()),
-                charset: Some("UTF-8".to_string()),
                 language: Some("Rust".to_string()),
             },
             m: mock_match,
@@ -256,7 +254,6 @@ mod tests {
                     id: BlobId::new(b"mock_blob"),
                     num_bytes: 1024,
                     mime_essence: Some("text/plain".to_string()),
-                    charset: Some("UTF-8".to_string()),
                     language: Some("Rust".to_string()),
                 },
                 m: mock_match,
diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs
index 14861ff..970ca5c 100644
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@@ -172,10 +172,15 @@ pub fn enumerate_filesystem_inputs(
                 };
                 if should_skip {
                     progress.suspend(|| {
+                        let path = origin
+                            .first()
+                            .blob_path()
+                            .map(|p| p.display().to_string())
+                            .unwrap_or_else(|| blob.temp_id().to_string());
                         if is_archive {
-                            debug!("Skipping archive: {}", blob.id);
+                            debug!("Skipping archive: {path}");
                         } else {
-                            debug!("Skipping binary blob: {}", blob.id);
+                            debug!("Skipping binary blob: {path}");
                         }
                     });
                     return Ok(());
diff --git a/src/scanner/processing.rs b/src/scanner/processing.rs
index 9447dbf..410233c 100644
--- a/src/scanner/processing.rs
+++ b/src/scanner/processing.rs
@@ -27,8 +27,7 @@ impl<'a> BlobProcessor<'a> {
         redact: bool,
         no_base64: bool,
     ) -> Result<Option<DatastoreMessage>> {
-        let blob_id = blob.id.hex();
-        let _span = debug_span!("matcher", blob_id).entered();
+        let _span = debug_span!("matcher", temp_id = blob.temp_id()).entered();
         let t1 = Instant::now();
         let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup, no_base64)?;
         let scan_us = t1.elapsed().as_micros();
@@ -43,10 +42,9 @@ impl<'a> BlobProcessor<'a> {
             ScanResult::SeenWithMatches => {
                 trace!("({scan_us}us) blob already scanned with matches");
                 let metadata = BlobMetadata {
-                    id: blob.id,
+                    id: blob.id(),
                     num_bytes: blob.len(),
                     mime_essence: None,
-                    charset: None,
                     language: None,
                 };
                 Ok(Some((origin, metadata, Vec::new())))
@@ -63,10 +61,9 @@ impl<'a> BlobProcessor<'a> {
                 }
                 let md = MetadataResult::from_blob_and_origin(&blob, &origin);
                 let metadata = BlobMetadata {
-                    id: blob.id,
+                    id: blob.id(),
                     num_bytes: blob.len(),
                     mime_essence: md.mime_essence,
-                    charset: md.charset,
                     language: md.language,
                 };
 
diff --git a/tests/fingerprint_dedup.rs b/tests/fingerprint_dedup.rs
index 9311ddf..5d5e89f 100644
--- a/tests/fingerprint_dedup.rs
+++ b/tests/fingerprint_dedup.rs
@@ -119,7 +119,6 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> {
                 id: BlobId::new(b"dummy"),
                 num_bytes: 10,
                 mime_essence: None,
-                charset: None,
                 language: None,
             },
             m: m1,
@@ -136,7 +135,6 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> {
                 id: BlobId::new(b"dummy"),
                 num_bytes: 10,
                 mime_essence: None,
-                charset: None,
                 language: None,
             },
             m: m2,