diff --git a/Cargo.toml b/Cargo.toml
index 896bfe5..f80052d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,3 +1,12 @@
+[workspace]
+members = [
+    ".",
+    "crates/kingfisher-core",
+    "crates/kingfisher-rules",
+    "crates/kingfisher-scanner",
+]
+resolver = "2"
+
 [workspace.package]
 edition = "2021"
 rust-version = "1.90"
@@ -40,6 +49,11 @@ assets = [
 ]
 
 [dependencies]
+# Library crates
+kingfisher-core = { path = "crates/kingfisher-core" }
+kingfisher-rules = { path = "crates/kingfisher-rules" }
+kingfisher-scanner = { path = "crates/kingfisher-scanner" }
+
 clap = { version = "4.5", features = [
     "cargo",
     "derive",
diff --git a/README.md b/README.md
index f92e38a..157c525 100644
--- a/README.md
+++ b/README.md
@@ -39,6 +39,7 @@ Designed for offensive security engineers and blue-teamers alike, Kingfisher hel
 - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
 - **Checksum-aware detection**: verifies tokens with built-in checksums (e.g., GitHub, Confluent, Zuplo) — no API calls required
 - **Built-in Report Viewer**: Visualize and triage findings locally with `kingisher view ./report-file.json`
+- **Library crates**: Embed Kingfisher's scanning engine in your own Rust applications ([docs/LIBRARY.md](docs/LIBRARY.md))
 
 **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
 
@@ -58,7 +59,7 @@ NOTE: Replay has been slowed down for demo
 ![alt text](docs/kingfisher-usage-01.gif)
 
 ## Report Viewer Demo
-Explore Kingfisher’s built-in report viewer and its `--access-map`, which can show what the token (AWS, GCP, Azure, GitHub, and GitLab...more coming) can actually access : [Access map outputs and viewer](#access-map-outputs-and-viewer)
+Explore Kingfisher’s built-in report viewer and its `--access-map`, which can show what the token (AWS, GCP, Azure, GitHub, GitLab, and Slack...more coming) can actually access : [Access map outputs and viewer](#access-map-outputs-and-viewer)
 
 Note: when you pass `--view-report`, Kingfisher starts a **localhost-only** web server on port `7890` and opens it in your default browser. You’ll see this near the end of the scan output, and **Kingfisher will keep running** until you stop it.
 
@@ -117,6 +118,7 @@ kingfisher scan /path/to/scan --access-map --view-report
     - [Access map outputs and viewer](#access-map-outputs-and-viewer)
     - [View access-map reports locally](#view-access-map-reports-locally)
     - [Pipe any text directly into Kingfisher by passing `-`](#pipe-any-text-directly-into-kingfisher-by-passing--)
+    - [Direct secret validation with `kingfisher validate`](#direct-secret-validation-with-kingfisher-validate)
     - [Limit maximum file size scanned (`--max-file-size`)](#limit-maximum-file-size-scanned---max-file-size)
     - [Scan using a rule _family_ with one flag](#scan-using-a-rule-family-with-one-flag)
     - [Display rule performance statistics](#display-rule-performance-statistics)
@@ -189,6 +191,7 @@ kingfisher scan /path/to/scan --access-map --view-report
   - [Rule Performance Profiling](#rule-performance-profiling)
   - [CLI Options](#cli-options)
   - [Lineage and Evolution](#lineage-and-evolution)
+- [Library Usage](#library-usage)
 - [Roadmap](#roadmap)
 - [License](#license)
 
@@ -593,7 +596,7 @@ kingfisher scan /path/to/repo --format sarif --output findings.sarif
 
 Finding a leaked credential is only the first step. The critical question isn’t just “Is this a secret?”—it’s “What can an attacker do with it?”
 
-Kingfisher's `--access-map` feature transforms secret detection from a simple alert into a comprehensive threat assessment. Instead of leaving you with a cryptic API key, Kingfisher actively authenticates against your cloud provider (AWS, GCP, Azure Storage, Azure DevOps, GitHub, or GitLab) to map the full extent of the credential's power. 
+Kingfisher's `--access-map` feature transforms secret detection from a simple alert into a comprehensive threat assessment. Instead of leaving you with a cryptic API key, Kingfisher actively authenticates against your cloud provider (AWS, GCP, Azure Storage, Azure DevOps, GitHub, GitLab, or Slack) to map the full extent of the credential's power. 
 
 * Instant Identity Resolution: Immediately identify who the key belongs to—whether it's a specific IAM user, an assumed role, or a service account.
 * Visualize the Blast Radius: See exactly which resources (S3 buckets, EC2 instances, projects, storage containers) are exposed and at risk.
@@ -624,6 +627,77 @@ cat /path/to/file.py | kingfisher scan -
 
 ```
 
+### Direct secret validation with `kingfisher validate`
+
+When you already know a secret's type and have the raw value, use `kingfisher validate` to check if it's still active—without needing the surrounding context that detection rules require.
+
+This is useful for:
+- Re-validating a known secret from a previous scan
+- Checking if a credential is still active before rotation
+- Validating secrets from external sources (password managers, ticketing systems, etc.)
+
+```bash
+# Validate an OpsGenie API key (using rule prefix matching)
+kingfisher validate --rule kingfisher.opsgenie "12345678-9abc-def0-1234-56789abcdef0"
+
+# Validate from stdin
+echo "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" | kingfisher validate --rule kingfisher.github -
+
+# JSON output for scripting
+kingfisher validate --rule kingfisher.slack "xoxb-..." --format json
+
+# AWS credentials - use --arg to auto-assign additional values
+kingfisher validate --rule kingfisher.aws --arg AKIAIOSFODNN7EXAMPLE \
+  "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+
+# Or use --var if you know the variable name
+kingfisher validate --rule kingfisher.aws.2 --var AKID=AKIAIOSFODNN7EXAMPLE \
+  "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+
+# GCP service account (pass JSON as secret)
+kingfisher validate --rule kingfisher.gcp "$(cat service-account.json)"
+
+# MongoDB connection string
+kingfisher validate --rule kingfisher.mongodb.3 \
+  "mongodb+srv://user:password@cluster.mongodb.net/db"
+
+# PostgreSQL connection
+kingfisher validate --rule kingfisher.postgres \
+  "postgres://admin:password@db.example.com:5432/mydb"
+
+# JWT token
+kingfisher validate --rule kingfisher.jwt \
+  "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9..."
+```
+
+**Supported validators:** HTTP, AWS, GCP, MongoDB, MySQL, Postgres, JDBC, JWT, Azure Storage, and Coinbase.
+
+**Exit codes:** Returns `0` if any matching rule validates the secret as valid, `1` if all are invalid or an error occurred.
+
+**Passing additional values (`--arg` and `--var`):**
+
+Some validators need more than just the secret. For example, AWS needs both an access key ID and the secret key (see the rule for `dependent_rule` section):
+
+- `--arg VALUE` — Auto-assigns values to template variables (in alphabetical order). Use when you don't know the exact variable name.
+- `--var NAME=VALUE` — Explicitly sets a variable. Use when you know the exact name, or to override `--arg`.
+
+```bash
+# --arg auto-assigns to AKID (the only non-TOKEN variable for AWS)
+kingfisher validate --rule kingfisher.aws --arg AKIAEXAMPLE "secret_key"
+
+# --var for explicit assignment
+kingfisher validate --rule kingfisher.aws --var AKID=AKIAEXAMPLE "secret_key"
+```
+
+**Rule prefix matching:** Use partial rule IDs like `kingfisher.opsgenie` instead of the full `kingfisher.opsgenie.1`. If the prefix matches multiple rules, **all matching rules with compatible variables are tried**:
+
+```bash
+$ kingfisher validate --rule kingfisher.aws --arg AKIAEXAMPLE "secret_key"
+Rule:     AWS Secret Access Key (kingfisher.aws.2)
+Result:   ✓ VALID
+Response: arn:aws:iam::123456789012:user/example
+```
+
 ### Limit maximum file size scanned (`--max-file-size`)
 
 By default, Kingfisher skips files larger than **256 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**.
@@ -1613,6 +1687,41 @@ Since then it has evolved far beyond that starting point, introducing live valid
 - **Unified workflow** with JSON/BSON/SARIF outputs  
 - **Cross-platform builds** for Linux, macOS, and Windows
 
+# Library Usage
+
+Kingfisher's scanning engine is available as a set of Rust library crates that can be embedded into other applications:
+
+| Crate | Description |
+|-------|-------------|
+| `kingfisher-core` | Core types: `Blob`, `BlobId`, `Location`, `Origin`, entropy calculation |
+| `kingfisher-rules` | Rule definitions, YAML parsing, compiled rule database, 200+ builtin rules |
+| `kingfisher-scanner` | High-level scanning API with `Scanner` and `Finding` types |
+
+**Quick example:**
+
+```rust
+use std::sync::Arc;
+use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule};
+use kingfisher_scanner::Scanner;
+
+// Load builtin rules and compile
+let rules = get_builtin_rules(None)?;
+let rule_vec: Vec<Rule> = rules.iter_rules()
+    .map(|syntax| Rule::new(syntax.clone()))
+    .collect();
+let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
+
+// Create scanner and scan
+let scanner = Scanner::new(rules_db);
+let findings = scanner.scan_file("config.yml")?;
+
+for finding in findings {
+    println!("{}: {}", finding.rule_name, finding.secret);
+}
+```
+
+For complete documentation, see **[docs/LIBRARY.md](docs/LIBRARY.md)**.
+
 # Roadmap
 
 - More rules
diff --git a/crates/kingfisher-core/Cargo.toml b/crates/kingfisher-core/Cargo.toml
new file mode 100644
index 0000000..dfd9513
--- /dev/null
+++ b/crates/kingfisher-core/Cargo.toml
@@ -0,0 +1,48 @@
+[package]
+name = "kingfisher-core"
+version = "0.1.0"
+description = "Core types and traits for Kingfisher secret scanner"
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+homepage.workspace = true
+repository.workspace = true
+publish.workspace = true
+
+[dependencies]
+# Serialization
+serde = { version = "1.0", features = ["derive", "rc"] }
+serde_json = "1.0"
+schemars = "0.8"
+
+# Error handling
+anyhow = "1.0"
+thiserror = "1.0"
+
+# Hashing and crypto
+sha1 = "0.10"
+hex = "0.4"
+
+# Memory management
+memmap2 = "0.9"
+once_cell = "1.21"
+parking_lot = "0.12"
+
+# Collections
+smallvec = { version = "1", features = ["const_generics", "const_new", "union"] }
+rustc-hash = "2.1"
+dashmap = "6.1"
+
+# Byte string handling
+bstr = { version = "1.12", features = ["serde"] }
+memchr = "2.7"
+
+# Git types (minimal, for ObjectId and Time)
+gix = { version = "0.73", default-features = false, features = ["serde"] }
+
+# Console formatting
+console = "0.15"
+
+[dev-dependencies]
+pretty_assertions = "1.4"
diff --git a/crates/kingfisher-core/src/blob.rs b/crates/kingfisher-core/src/blob.rs
new file mode 100644
index 0000000..a25e60d
--- /dev/null
+++ b/crates/kingfisher-core/src/blob.rs
@@ -0,0 +1,495 @@
+//! Blob representation for scannable content.
+//!
+//! A [`Blob`] represents content that can be scanned for secrets. It can be
+//! created from:
+//! - In-memory bytes ([`Blob::from_bytes`])
+//! - A file path ([`Blob::from_file`])
+//! - Borrowed data ([`Blob::from_borrowed`])
+//!
+//! Large files are automatically memory-mapped for efficiency.
+
+use std::{
+    convert::TryInto,
+    fs::File,
+    io::{Read, Write},
+    path::Path,
+    sync::{
+        atomic::{AtomicU64, Ordering},
+        Arc,
+    },
+};
+
+use bstr::{BString, ByteSlice};
+use gix::ObjectId;
+use once_cell::sync::OnceCell;
+use parking_lot::Mutex;
+use rustc_hash::FxHashMap;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use sha1::{Digest, Sha1};
+use smallvec::SmallVec;
+
+use crate::error::Result;
+use crate::git_commit_metadata::CommitMetadata;
+
+/// Threshold above which files are memory-mapped instead of read into memory.
+const LARGE_FILE_THRESHOLD: u64 = 0; // Currently: always mmap
+
+/// Global counter for temporary blob IDs.
+static NEXT_ID: AtomicU64 = AtomicU64::new(1);
+
+/// Tracks where a blob was seen in git history.
+#[derive(Clone, Debug, serde::Serialize)]
+pub struct BlobAppearance {
+    /// Metadata about the commit where this blob appeared.
+    pub commit_metadata: Arc<CommitMetadata>,
+
+    /// The path of the blob within the repository.
+    pub path: BString,
+}
+
+impl BlobAppearance {
+    /// Returns the path as a `&Path`, if it's valid UTF-8.
+    #[inline]
+    pub fn path(&self) -> std::result::Result<&Path, bstr::Utf8Error> {
+        self.path.to_path()
+    }
+}
+
+/// A set of [`BlobAppearance`] entries, optimized for the common case of a single appearance.
+pub type BlobAppearanceSet = SmallVec<[BlobAppearance; 1]>;
+
+/// The underlying data storage for a [`Blob`].
+pub enum BlobData<'a> {
+    /// Small blobs stored as owned bytes.
+    Owned(Vec<u8>),
+
+    /// Large blobs that are memory-mapped from disk.
+    Mapped(memmap2::Mmap),
+
+    /// Borrowed bytes (e.g., from a git pack file).
+    Borrowed(&'a [u8]),
+}
+
+impl<'a> AsRef<[u8]> for BlobData<'a> {
+    fn as_ref(&self) -> &[u8] {
+        match self {
+            BlobData::Owned(v) => v,
+            BlobData::Mapped(m) => m,
+            BlobData::Borrowed(slice) => slice,
+        }
+    }
+}
+
+impl<'a> BlobData<'a> {
+    /// Returns the length of the blob data in bytes.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.as_ref().len()
+    }
+
+    /// Returns true if the blob data is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.as_ref().is_empty()
+    }
+}
+
+/// A scannable blob of content.
+///
+/// `Blob` is the primary type for representing content to be scanned. It lazily
+/// computes a content-based ID (SHA-1) and supports multiple backing storage types.
+///
+/// # Examples
+///
+/// ```
+/// use kingfisher_core::Blob;
+///
+/// // Create from bytes
+/// let blob = Blob::from_bytes(b"my secret content".to_vec());
+/// assert_eq!(blob.len(), 17);
+///
+/// // Create from file
+/// // let blob = Blob::from_file("path/to/file.txt")?;
+/// ```
+pub struct Blob<'a> {
+    /// Lazily computed content-based ID.
+    id: OnceCell<BlobId>,
+    /// The underlying data.
+    data: BlobData<'a>,
+    /// Temporary ID assigned at creation (for debugging/tracking).
+    temp_id: u64,
+}
+
+impl Blob<'_> {
+    /// Create a new `Blob` by reading from a file.
+    ///
+    /// Large files are automatically memory-mapped for efficiency.
+    #[inline]
+    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
+        let mut file = File::open(&path)?;
+        let file_size = file.metadata()?.len();
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
+
+        if file_size > LARGE_FILE_THRESHOLD {
+            // Large files: one mmap, zero extra copies.
+            let mmap = unsafe { memmap2::Mmap::map(&file)? };
+            Ok(Blob { id: OnceCell::new(), data: BlobData::Mapped(mmap), temp_id })
+        } else {
+            // Small files: read into memory.
+            let mut bytes = Vec::with_capacity(file_size as usize);
+            file.read_to_end(&mut bytes)?;
+            Ok(Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id })
+        }
+    }
+
+    /// Create a new `Blob` from a vector of bytes.
+    #[inline]
+    pub fn from_bytes(bytes: Vec<u8>) -> Self {
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
+        Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id }
+    }
+
+    /// Create a new `Blob` with a pre-computed ID and owned data.
+    #[inline]
+    pub fn new(id: BlobId, bytes: Vec<u8>) -> Self {
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
+        let cell = OnceCell::new();
+        let _ = cell.set(id);
+        Blob { id: cell, data: BlobData::Owned(bytes), temp_id }
+    }
+
+    /// Returns the blob's content as a byte slice.
+    #[inline]
+    pub fn bytes(&self) -> &[u8] {
+        self.data.as_ref()
+    }
+
+    /// Lazily computes and returns the blob's content-based [`BlobId`].
+    #[inline]
+    pub fn id(&self) -> BlobId {
+        *self.id.get_or_init(|| BlobId::new(self.bytes()))
+    }
+
+    /// Returns a reference to the blob's [`BlobId`], computing it if necessary.
+    #[inline]
+    pub fn id_ref(&self) -> &BlobId {
+        self.id.get_or_init(|| BlobId::new(self.bytes()))
+    }
+
+    /// Returns the temporary ID assigned when this blob was created.
+    #[inline]
+    pub fn temp_id(&self) -> u64 {
+        self.temp_id
+    }
+
+    /// Returns the length of the blob in bytes.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.bytes().len()
+    }
+
+    /// Returns true if the blob is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.bytes().is_empty()
+    }
+}
+
+impl<'a> Blob<'a> {
+    /// Create a new `Blob` from borrowed bytes.
+    ///
+    /// This is useful for zero-copy scanning of data that already exists
+    /// in memory (e.g., from a git pack file).
+    #[inline]
+    pub fn from_borrowed(bytes: &'a [u8]) -> Self {
+        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
+        Blob { id: OnceCell::new(), data: BlobData::Borrowed(bytes), temp_id }
+    }
+}
+
+impl Drop for Blob<'_> {
+    fn drop(&mut self) {
+        // For owned data, clear and shrink to free memory promptly.
+        if let BlobData::Owned(ref mut v) = self.data {
+            v.clear();
+            v.shrink_to_fit();
+        }
+    }
+}
+
+/// A content-based identifier for a blob, computed as a Git-compatible SHA-1 hash.
+#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone, Serialize)]
+#[serde(into = "String")]
+pub struct BlobId([u8; 20]);
+
+impl BlobId {
+    /// Creates a zero-filled (default) `BlobId`.
+    pub fn default() -> Self {
+        BlobId([0; 20])
+    }
+
+    /// Computes a `BlobId` from raw bytes.
+    ///
+    /// For large inputs, only the first and last 64KB are hashed for performance.
+    #[inline]
+    pub fn new(input: &[u8]) -> Self {
+        const CHUNK: usize = 64 * 1024; // 64KB from start and end
+        let mut hasher = Sha1::new();
+        write!(&mut hasher, "blob {}\0", input.len()).unwrap();
+        if input.len() <= CHUNK * 2 {
+            hasher.update(input);
+        } else {
+            hasher.update(&input[..CHUNK]);
+            hasher.update(&input[input.len() - CHUNK..]);
+        }
+        let digest: [u8; 20] = hasher.finalize().into();
+        BlobId(digest)
+    }
+
+    /// Computes a `BlobId` from the complete bytes (no truncation).
+    pub fn compute_from_bytes(bytes: &[u8]) -> Self {
+        let mut hasher = Sha1::new();
+        write!(&mut hasher, "blob {}\0", bytes.len()).unwrap();
+        hasher.update(bytes);
+        let digest: [u8; 20] = hasher.finalize().into();
+        BlobId(digest)
+    }
+
+    /// Parses a `BlobId` from a hex string.
+    #[inline]
+    pub fn from_hex(v: &str) -> crate::Result<Self> {
+        let bytes = hex::decode(v)?;
+        let arr: [u8; 20] =
+            bytes.as_slice().try_into().map_err(|_| crate::Error::InvalidBlobId(v.to_string()))?;
+        Ok(BlobId(arr))
+    }
+
+    /// Returns the blob ID as a hex string.
+    #[inline]
+    pub fn hex(&self) -> String {
+        hex::encode(self.0)
+    }
+
+    /// Returns the raw bytes of the blob ID.
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+impl<'de> Deserialize<'de> for BlobId {
+    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> std::result::Result<Self, D::Error> {
+        struct Vis;
+        impl serde::de::Visitor<'_> for Vis {
+            type Value = BlobId;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("a 40-character hex string")
+            }
+
+            fn visit_str<E: serde::de::Error>(
+                self,
+                v: &str,
+            ) -> std::result::Result<Self::Value, E> {
+                BlobId::from_hex(v).map_err(|e| serde::de::Error::custom(e))
+            }
+        }
+        d.deserialize_str(Vis)
+    }
+}
+
+impl std::fmt::Debug for BlobId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "BlobId({})", self.hex())
+    }
+}
+
+impl std::fmt::Display for BlobId {
+    #[inline]
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.hex())
+    }
+}
+
+impl JsonSchema for BlobId {
+    fn schema_name() -> String {
+        "BlobId".into()
+    }
+
+    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
+        let s = String::json_schema(gen);
+        let mut o = s.into_object();
+        o.string().pattern = Some("[0-9a-f]{40}".into());
+        let md = o.metadata();
+        md.description = Some("A hex-encoded blob ID as computed by Git".into());
+        schemars::schema::Schema::Object(o)
+    }
+}
+
+impl From<BlobId> for String {
+    #[inline]
+    fn from(blob_id: BlobId) -> String {
+        blob_id.hex()
+    }
+}
+
+impl TryFrom<&str> for BlobId {
+    type Error = crate::Error;
+
+    #[inline]
+    fn try_from(s: &str) -> std::result::Result<Self, Self::Error> {
+        BlobId::from_hex(s)
+    }
+}
+
+impl<'a> From<&'a gix::ObjectId> for BlobId {
+    #[inline]
+    fn from(id: &'a gix::ObjectId) -> Self {
+        BlobId(id.as_bytes().try_into().expect("oid should be a 20-byte value"))
+    }
+}
+
+impl From<gix::ObjectId> for BlobId {
+    #[inline]
+    fn from(id: gix::ObjectId) -> Self {
+        BlobId(id.as_bytes().try_into().expect("oid should be a 20-byte value"))
+    }
+}
+
+impl<'a> From<&'a BlobId> for gix::ObjectId {
+    #[inline]
+    fn from(blob_id: &'a BlobId) -> Self {
+        gix::hash::ObjectId::try_from(blob_id.as_bytes()).unwrap()
+    }
+}
+
+impl From<BlobId> for gix::ObjectId {
+    #[inline]
+    fn from(blob_id: BlobId) -> Self {
+        gix::hash::ObjectId::try_from(blob_id.as_bytes()).unwrap()
+    }
+}
+
+/// A concurrent map with [`BlobId`] keys, optimized for low contention.
+///
+/// This implementation uses 256 shards (based on the first byte of the blob ID)
+/// to minimize lock contention during parallel scanning.
+pub struct BlobIdMap<V> {
+    maps: [Mutex<FxHashMap<ObjectId, V>>; 256],
+}
+
+impl<V> BlobIdMap<V> {
+    /// Creates a new empty `BlobIdMap`.
+    pub fn new() -> Self {
+        BlobIdMap { maps: std::array::from_fn(|_| Mutex::new(FxHashMap::default())) }
+    }
+
+    /// Inserts a value, returning the previous value if one existed.
+    #[inline]
+    pub fn insert(&self, blob_id: BlobId, v: V) -> Option<V> {
+        let idx = blob_id.as_bytes()[0] as usize;
+        self.maps[idx].lock().insert(blob_id.into(), v)
+    }
+
+    /// Returns true if the map contains the given key.
+    #[inline]
+    pub fn contains_key(&self, blob_id: &BlobId) -> bool {
+        let idx = blob_id.as_bytes()[0] as usize;
+        self.maps[idx].lock().contains_key(&ObjectId::from(blob_id))
+    }
+
+    /// Returns the total number of entries in the map.
+    ///
+    /// Note: This is not a cheap operation as it must lock all shards.
+    pub fn len(&self) -> usize {
+        self.maps.iter().map(|m| m.lock().len()).sum()
+    }
+
+    /// Returns true if the map is empty.
+    pub fn is_empty(&self) -> bool {
+        self.maps.iter().all(|m| m.lock().is_empty())
+    }
+}
+
+impl<V: Copy> BlobIdMap<V> {
+    /// Gets a copy of the value for the given key.
+    #[inline]
+    pub fn get(&self, blob_id: &BlobId) -> Option<V> {
+        let idx = blob_id.as_bytes()[0] as usize;
+        self.maps[idx].lock().get(&ObjectId::from(blob_id)).copied()
+    }
+}
+
+impl<V> Default for BlobIdMap<V> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// Metadata about a blob.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, JsonSchema)]
+pub struct BlobMetadata {
+    /// The blob's content-based ID.
+    pub id: BlobId,
+
+    /// The length of the blob in bytes.
+    pub num_bytes: usize,
+
+    /// The guessed MIME type of the blob (e.g., "text/plain").
+    pub mime_essence: Option<String>,
+
+    /// The guessed programming language of the blob (e.g., "Python").
+    pub language: Option<String>,
+}
+
+impl BlobMetadata {
+    /// Returns the size in bytes.
+    #[inline]
+    pub fn num_bytes(&self) -> usize {
+        self.num_bytes
+    }
+
+    /// Returns the size in megabytes, rounded to 3 decimal places.
+    #[inline]
+    pub fn num_megabytes(&self) -> f64 {
+        let mb = self.num_bytes as f64 / 1_048_576.0;
+        format!("{:.3}", mb).parse::<f64>().unwrap_or(mb)
+    }
+
+    /// Returns the MIME essence if known.
+    #[inline]
+    pub fn mime_essence(&self) -> Option<&str> {
+        self.mime_essence.as_deref()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_blob_id_empty() {
+        assert_eq!(BlobId::new(&[]).hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
+    }
+
+    #[test]
+    fn test_blob_id_small() {
+        assert_eq!(BlobId::new(&vec![0; 1024]).hex(), "06d7405020018ddf3cacee90fd4af10487da3d20");
+    }
+
+    #[test]
+    fn test_blob_from_bytes() {
+        let blob = Blob::from_bytes(b"hello world".to_vec());
+        assert_eq!(blob.len(), 11);
+        assert_eq!(blob.bytes(), b"hello world");
+    }
+
+    #[test]
+    fn test_blob_id_roundtrip() {
+        let original = BlobId::new(b"test data");
+        let hex = original.hex();
+        let parsed = BlobId::from_hex(&hex).unwrap();
+        assert_eq!(original, parsed);
+    }
+}
diff --git a/crates/kingfisher-core/src/bstring_escape.rs b/crates/kingfisher-core/src/bstring_escape.rs
new file mode 100644
index 0000000..26e4c82
--- /dev/null
+++ b/crates/kingfisher-core/src/bstring_escape.rs
@@ -0,0 +1,128 @@
+//! Safe string escaping utilities.
+//!
+//! This module provides utilities for safely displaying byte strings that may
+//! contain non-UTF8 data, ANSI escape codes, or control characters.
+
+use std::{
+    borrow::Cow,
+    fmt::{Display, Formatter},
+};
+
+use console::strip_ansi_codes;
+
+/// Escapes non-printing characters in a string while preserving whitespace.
+///
+/// Returns borrowed data if no escaping was needed, avoiding allocations.
+fn escape_nonprinting(s: &str) -> Cow<'_, str> {
+    // Fast path - return original if no control chars (except whitespace)
+    if s.chars().all(|ch| !ch.is_control() || ch.is_whitespace()) {
+        return Cow::Borrowed(s);
+    }
+
+    // Allocate with extra capacity for possible escape sequences
+    let mut escaped = String::with_capacity(s.len() * 2);
+    let mut chars = s.chars().peekable();
+
+    while let Some(ch) = chars.next() {
+        match ch {
+            // Handle ANSI escape sequences
+            '\x1B' => continue,
+            // Escape non-whitespace control characters
+            ch if ch.is_control() && !ch.is_whitespace() => {
+                use std::fmt::Write;
+                write!(escaped, "{}", ch.escape_unicode()).expect("string writing must succeed");
+            }
+            // Pass through all other characters unchanged
+            ch => escaped.push(ch),
+        }
+    }
+
+    Cow::Owned(escaped)
+}
+
+/// A wrapper around `&[u8]` that provides safe string formatting.
+///
+/// When displayed, `Escaped` will:
+/// 1. Convert from UTF-8 with replacement of invalid sequences
+/// 2. Remove ANSI control sequences
+/// 3. Escape remaining control characters (except whitespace)
+///
+/// # Examples
+///
+/// ```
+/// use kingfisher_core::Escaped;
+///
+/// let bytes = b"Hello\x00World";
+/// let escaped = Escaped(bytes);
+/// assert_eq!(escaped.to_string(), "Hello\\u{0}World");
+/// ```
+#[derive(Debug, Clone, Copy)]
+pub struct Escaped<'a>(pub &'a [u8]);
+
+impl Display for Escaped<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        // First handle UTF-8 decoding with replacement characters
+        let decoded = String::from_utf8_lossy(self.0);
+        // Then strip ANSI sequences and escape control chars
+        let stripped = strip_ansi_codes(&decoded);
+        let escaped = escape_nonprinting(&stripped);
+        f.write_str(&escaped)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_escape_normal_text() {
+        let input = "Hello, World!";
+        let result = escape_nonprinting(input);
+        assert!(matches!(result, Cow::Borrowed(_)));
+        assert_eq!(result, "Hello, World!");
+    }
+
+    #[test]
+    fn test_escape_with_whitespace() {
+        let input = "Hello\n\t World!";
+        let result = escape_nonprinting(input);
+        assert!(matches!(result, Cow::Borrowed(_)));
+        assert_eq!(result, "Hello\n\t World!");
+    }
+
+    #[test]
+    fn test_escape_control_chars() {
+        let input = "Hello\x00World\x01";
+        let result = escape_nonprinting(input);
+        assert!(matches!(result, Cow::Owned(_)));
+        assert_eq!(result, "Hello\\u{0}World\\u{1}");
+    }
+
+    #[test]
+    fn test_escaped_struct_simple() {
+        let bytes = b"Hello World";
+        let escaped = Escaped(bytes);
+        assert_eq!(escaped.to_string(), "Hello World");
+    }
+
+    #[test]
+    fn test_escaped_struct_ansi_codes() {
+        let bytes = b"\x1b[31mRed\x1b[0m \x1b[32mGreen\x1b[0m";
+        let escaped = Escaped(bytes);
+        assert_eq!(escaped.to_string(), "Red Green");
+    }
+
+    #[test]
+    fn test_escaped_struct_invalid_utf8() {
+        let bytes = b"Hello\xFF\xFEWorld";
+        let escaped = Escaped(bytes);
+        assert_eq!(escaped.to_string(), "Hello\u{FFFD}\u{FFFD}World");
+    }
+
+    #[test]
+    fn test_escaped_struct_empty() {
+        let bytes = b"";
+        let escaped = Escaped(bytes);
+        assert_eq!(escaped.to_string(), "");
+    }
+}
diff --git a/crates/kingfisher-core/src/entropy.rs b/crates/kingfisher-core/src/entropy.rs
new file mode 100644
index 0000000..a3cebc6
--- /dev/null
+++ b/crates/kingfisher-core/src/entropy.rs
@@ -0,0 +1,89 @@
+//! Shannon entropy calculation.
+//!
+//! Entropy is used to filter out low-entropy strings that are unlikely
+//! to be real secrets.
+
+/// Calculates the Shannon entropy of a byte slice.
+///
+/// Returns a value between 0.0 (completely uniform) and 8.0 (maximum entropy
+/// for random bytes). Typical thresholds for secret detection are around 3.5-4.5.
+///
+/// # Examples
+///
+/// ```
+/// use kingfisher_core::calculate_shannon_entropy;
+///
+/// // Low entropy (repeated character)
+/// let entropy = calculate_shannon_entropy(b"aaaaaaaaaa");
+/// assert!(entropy < 0.1);
+///
+/// // High entropy (random-looking)
+/// let entropy = calculate_shannon_entropy(b"j2k#9K$mL*p&vN3");
+/// assert!(entropy > 3.5);
+/// ```
+pub fn calculate_shannon_entropy(bytes: &[u8]) -> f32 {
+    if bytes.is_empty() {
+        return 0.0;
+    }
+
+    // Count occurrences of each byte value (0-255)
+    let mut counts = [0u32; 256];
+    for &byte in bytes {
+        counts[byte as usize] += 1;
+    }
+
+    let total_bytes = bytes.len() as f32;
+
+    // Sum entropy contribution for each byte that appears at least once
+    counts.iter().filter(|&&count| count > 0).fold(0.0, |entropy, &count| {
+        let probability = count as f32 / total_bytes;
+        entropy - probability * probability.log2()
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_entropy_empty() {
+        let entropy = calculate_shannon_entropy(&[]);
+        assert_eq!(entropy, 0.0);
+        assert!(entropy.is_finite());
+    }
+
+    #[test]
+    fn test_entropy_uniform() {
+        // Single repeated byte should return 0.0
+        let entropy = calculate_shannon_entropy(&[65, 65, 65, 65]);
+        assert_eq!(entropy, 0.0);
+        assert!(entropy.is_finite());
+    }
+
+    #[test]
+    fn test_entropy_two_values() {
+        // Even distribution of two bytes should be exactly 1.0
+        let input = &[1, 2, 1, 2];
+        let entropy = calculate_shannon_entropy(input);
+        assert!((entropy - 1.0).abs() < 0.0001);
+        assert!(entropy.is_finite());
+    }
+
+    #[test]
+    fn test_entropy_password() {
+        // Real password example should have mid-range entropy
+        let password = "Password123!".as_bytes();
+        let entropy = calculate_shannon_entropy(password);
+        assert!(entropy > 2.5);
+        assert!(entropy.is_finite());
+    }
+
+    #[test]
+    fn test_entropy_random() {
+        // Random-looking string should have high entropy
+        let random = "j2k#9K$mL*p&vN3".as_bytes();
+        let entropy = calculate_shannon_entropy(random);
+        assert!(entropy > 3.5);
+        assert!(entropy.is_finite());
+    }
+}
diff --git a/crates/kingfisher-core/src/error.rs b/crates/kingfisher-core/src/error.rs
new file mode 100644
index 0000000..b288d3c
--- /dev/null
+++ b/crates/kingfisher-core/src/error.rs
@@ -0,0 +1,36 @@
+//! Error types for kingfisher-core.
+
+use thiserror::Error;
+
+/// The primary error type for kingfisher-core operations.
+#[derive(Error, Debug)]
+pub enum Error {
+    /// An I/O error occurred.
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
+
+    /// Failed to parse a blob ID from hex.
+    #[error("Invalid blob ID: {0}")]
+    InvalidBlobId(String),
+
+    /// A hex decoding error occurred.
+    #[error("Hex decode error: {0}")]
+    HexDecode(#[from] hex::FromHexError),
+
+    /// Failed to open or read a Git repository.
+    #[error("Git error: {0}")]
+    Git(String),
+
+    /// A generic error with a message.
+    #[error("{0}")]
+    Other(String),
+}
+
+impl From<gix::open::Error> for Error {
+    fn from(e: gix::open::Error) -> Self {
+        Error::Git(e.to_string())
+    }
+}
+
+/// A specialized Result type for kingfisher-core operations.
+pub type Result<T> = std::result::Result<T, Error>;
diff --git a/crates/kingfisher-core/src/git_commit_metadata.rs b/crates/kingfisher-core/src/git_commit_metadata.rs
new file mode 100644
index 0000000..4268d5d
--- /dev/null
+++ b/crates/kingfisher-core/src/git_commit_metadata.rs
@@ -0,0 +1,159 @@
+//! Git commit metadata types.
+//!
+//! This module provides types for tracking commit information associated
+//! with blobs found in git history.
+
+use gix::{date::Time, ObjectId};
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+// Wrapper for serializing gix::date::Time as text
+#[repr(transparent)]
+#[derive(Serialize, Deserialize, Copy, Clone)]
+#[serde(remote = "Time")]
+struct TextTime(
+    #[serde(
+        getter = "text_time::getter",
+        serialize_with = "text_time::serialize",
+        deserialize_with = "text_time::deserialize"
+    )]
+    Time,
+);
+
+impl From<TextTime> for Time {
+    fn from(v: TextTime) -> Self {
+        v.0
+    }
+}
+
+impl From<Time> for TextTime {
+    fn from(v: Time) -> Self {
+        Self(v)
+    }
+}
+
+mod text_time {
+    use super::*;
+
+    #[inline]
+    pub fn getter(v: &Time) -> &Time {
+        v
+    }
+
+    #[inline]
+    pub fn serialize<S: serde::Serializer>(v: &Time, serializer: S) -> Result<S::Ok, S::Error> {
+        serializer.collect_str(v)
+    }
+
+    pub fn deserialize<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Time, D::Error> {
+        struct Vis;
+        impl<'a> serde::de::Visitor<'a> for Vis {
+            type Value = Time;
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("a string representing a Git timestamp")
+            }
+            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
+                gix::date::parse(v, None).map_err(E::custom)
+            }
+        }
+        d.deserialize_str(Vis)
+    }
+}
+
+impl JsonSchema for TextTime {
+    fn schema_name() -> String {
+        "Time".into()
+    }
+
+    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
+        String::json_schema(gen)
+    }
+}
+
+// Wrapper for serializing gix::ObjectId as hex
+#[repr(transparent)]
+#[derive(Serialize, Deserialize, Copy, Clone)]
+#[serde(remote = "ObjectId")]
+struct HexObjectId(
+    #[serde(
+        getter = "hex_object_id::getter",
+        serialize_with = "hex_object_id::serialize",
+        deserialize_with = "hex_object_id::deserialize"
+    )]
+    ObjectId,
+);
+
+impl From<ObjectId> for HexObjectId {
+    fn from(v: ObjectId) -> Self {
+        HexObjectId(v)
+    }
+}
+
+impl From<HexObjectId> for ObjectId {
+    fn from(v: HexObjectId) -> Self {
+        v.0
+    }
+}
+
+mod hex_object_id {
+    use super::*;
+
+    #[inline]
+    pub fn getter(v: &ObjectId) -> &ObjectId {
+        v
+    }
+
+    #[inline]
+    pub fn serialize<S: serde::Serializer>(v: &ObjectId, serializer: S) -> Result<S::Ok, S::Error> {
+        serializer.collect_str(&v.to_hex())
+    }
+
+    pub fn deserialize<'de, D: serde::Deserializer<'de>>(d: D) -> Result<ObjectId, D::Error> {
+        struct Vis;
+        impl<'a> serde::de::Visitor<'a> for Vis {
+            type Value = ObjectId;
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("a 40-character hex string representing a Git object ID")
+            }
+            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
+                ObjectId::from_hex(v.as_bytes()).map_err(E::custom)
+            }
+        }
+        d.deserialize_str(Vis)
+    }
+}
+
+impl JsonSchema for HexObjectId {
+    fn schema_name() -> String {
+        "ObjectId".into()
+    }
+
+    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
+        let s = String::json_schema(gen);
+        let mut o = s.into_object();
+        o.string().pattern = Some("[0-9a-f]{40}".into());
+        let md = o.metadata();
+        md.description = Some("A hex-encoded object ID as computed by Git".into());
+        schemars::schema::Schema::Object(o)
+    }
+}
+
+/// Metadata about a Git commit.
+///
+/// This is used to track the provenance of blobs found in git history.
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
+pub struct CommitMetadata {
+    /// The SHA-1 commit ID.
+    #[serde(with = "HexObjectId")]
+    pub commit_id: ObjectId,
+
+    /// The committer's name.
+    pub committer_name: String,
+
+    /// The committer's email address.
+    pub committer_email: String,
+
+    /// The commit timestamp.
+    #[serde(with = "TextTime")]
+    pub committer_timestamp: Time,
+}
diff --git a/crates/kingfisher-core/src/lib.rs b/crates/kingfisher-core/src/lib.rs
new file mode 100644
index 0000000..dc964f3
--- /dev/null
+++ b/crates/kingfisher-core/src/lib.rs
@@ -0,0 +1,27 @@
+//! `kingfisher-core` provides the foundational types and traits shared across
+//! the Kingfisher secret scanning library.
+//!
+//! This crate contains:
+//! - [`Blob`] - Representation of scannable content (files, buffers, git objects)
+//! - [`Location`] - Source location tracking (byte offsets and line/column)
+//! - [`Origin`] - Provenance tracking (where content came from)
+//! - Utility functions for entropy calculation, string escaping, etc.
+
+pub mod blob;
+pub mod bstring_escape;
+pub mod entropy;
+pub mod error;
+pub mod git_commit_metadata;
+pub mod location;
+pub mod origin;
+
+// Re-export commonly used types at the crate root
+pub use blob::{
+    Blob, BlobAppearance, BlobAppearanceSet, BlobData, BlobId, BlobIdMap, BlobMetadata,
+};
+pub use bstring_escape::Escaped;
+pub use entropy::calculate_shannon_entropy;
+pub use error::{Error, Result};
+pub use git_commit_metadata::CommitMetadata;
+pub use location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan};
+pub use origin::{CommitOrigin, ExtendedOrigin, FileOrigin, GitRepoOrigin, Origin, OriginSet};
diff --git a/crates/kingfisher-core/src/location.rs b/crates/kingfisher-core/src/location.rs
new file mode 100644
index 0000000..df2ad97
--- /dev/null
+++ b/crates/kingfisher-core/src/location.rs
@@ -0,0 +1,296 @@
+//! Source location tracking.
+//!
+//! This module provides types for tracking locations within source content:
+//! - [`OffsetSpan`] - Byte offset ranges
+//! - [`SourceSpan`] - Line/column ranges
+//! - [`Location`] - Combined byte and source location
+//! - [`LocationMapping`] - Efficient offset-to-line/column conversion
+
+use core::ops::Range;
+use std::cell::RefCell;
+
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+/// A point defined by a byte offset.
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Copy, Clone)]
+pub struct OffsetPoint(pub usize);
+
+impl OffsetPoint {
+    #[inline]
+    pub fn new(idx: usize) -> Self {
+        OffsetPoint(idx)
+    }
+}
+
+/// A non-empty span defined by two byte offsets (half-open interval `[start, end)`).
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
+pub struct OffsetSpan {
+    pub start: usize,
+    pub end: usize,
+}
+
+impl std::fmt::Display for OffsetSpan {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}-{}", self.start, self.end)
+    }
+}
+
+impl OffsetSpan {
+    /// Creates an `OffsetSpan` from two `OffsetPoint`s.
+    #[inline]
+    pub fn from_offsets(start: OffsetPoint, end: OffsetPoint) -> Self {
+        OffsetSpan { start: start.0, end: end.0 }
+    }
+
+    /// Creates an `OffsetSpan` from a `Range<usize>`.
+    #[inline]
+    pub fn from_range(range: Range<usize>) -> Self {
+        OffsetSpan { start: range.start, end: range.end }
+    }
+
+    /// Returns the length in bytes.
+    #[inline]
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.end.saturating_sub(self.start)
+    }
+
+    /// Returns true if empty or inverted.
+    #[inline]
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.start >= self.end
+    }
+
+    /// Returns true if `other` lies entirely within `self`.
+    #[inline]
+    #[must_use]
+    pub fn fully_contains(&self, other: &Self) -> bool {
+        self.start <= other.start && other.end <= self.end
+    }
+}
+
+/// A point in source text (1-indexed line, 0-indexed column).
+#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct SourcePoint {
+    pub line: usize,
+    pub column: usize,
+}
+
+impl std::fmt::Display for SourcePoint {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}:{}", self.line, self.column)
+    }
+}
+
+/// A span between two source points (closed interval).
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct SourceSpan {
+    pub start: SourcePoint,
+    pub end: SourcePoint,
+}
+
+impl std::fmt::Display for SourceSpan {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}-{}", self.start, self.end)
+    }
+}
+
+/// Efficiently maps byte offsets to line/column positions.
+///
+/// This lazily scans for newlines as needed, avoiding upfront cost for
+/// files where only a few locations are needed.
+pub struct LocationMapping<'a> {
+    bytes: &'a [u8],
+    newline_offsets: RefCell<Vec<usize>>,
+}
+
+impl<'a> LocationMapping<'a> {
+    /// Creates a new mapping for the given input bytes.
+    pub fn new(input: &'a [u8]) -> Self {
+        LocationMapping { bytes: input, newline_offsets: RefCell::new(Vec::new()) }
+    }
+
+    fn ensure_offsets_up_to(&self, offset: usize) {
+        let mut offsets = self.newline_offsets.borrow_mut();
+        let start = offsets.last().map_or(0, |&last| last + 1);
+        if offset < start {
+            return;
+        }
+        let end = offset.min(self.bytes.len());
+        for nl in memchr::memchr_iter(b'\n', &self.bytes[start..end]) {
+            offsets.push(start + nl);
+        }
+    }
+
+    fn source_point_from_offsets(offsets: &[usize], offset: usize) -> SourcePoint {
+        let line = match offsets.binary_search(&offset) {
+            Ok(idx) => idx + 2,
+            Err(idx) => idx + 1,
+        };
+        let column = if let Some(&last) = offsets.get(line.saturating_sub(2)) {
+            offset.saturating_sub(last + 1)
+        } else {
+            offset
+        };
+        SourcePoint { line, column }
+    }
+
+    /// Maps a byte offset to a `SourcePoint`.
+    pub fn get_source_point(&self, offset: usize) -> SourcePoint {
+        self.ensure_offsets_up_to(offset);
+        let offsets = self.newline_offsets.borrow();
+        Self::source_point_from_offsets(&offsets, offset)
+    }
+
+    /// Maps an `OffsetSpan` to a `SourceSpan`.
+    pub fn get_source_span(&self, span: &OffsetSpan) -> SourceSpan {
+        self.ensure_offsets_up_to(span.end.saturating_sub(1));
+        let offsets = self.newline_offsets.borrow();
+        let start = Self::source_point_from_offsets(&offsets, span.start);
+        let end = Self::source_point_from_offsets(&offsets, span.end.saturating_sub(1));
+        SourceSpan { start, end }
+    }
+}
+
+/// Compact representation of a source span to reduce per-match footprint.
+#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema)]
+pub struct CompactSourceSpan {
+    pub start_line: u32,
+    pub start_column: u32,
+    pub end_line: u32,
+    pub end_column: u32,
+}
+
+impl CompactSourceSpan {
+    #[inline]
+    pub fn zero() -> Self {
+        Self { start_line: 0, start_column: 0, end_line: 0, end_column: 0 }
+    }
+
+    #[inline]
+    pub fn from_source_span(span: &SourceSpan) -> Self {
+        Self {
+            start_line: span.start.line.try_into().unwrap_or(0),
+            start_column: span.start.column.try_into().unwrap_or(0),
+            end_line: span.end.line.try_into().unwrap_or(0),
+            end_column: span.end.column.try_into().unwrap_or(0),
+        }
+    }
+
+    #[inline]
+    pub fn to_source_span(self) -> SourceSpan {
+        SourceSpan {
+            start: SourcePoint {
+                line: usize::try_from(self.start_line).unwrap_or(0),
+                column: usize::try_from(self.start_column).unwrap_or(0),
+            },
+            end: SourcePoint {
+                line: usize::try_from(self.end_line).unwrap_or(0),
+                column: usize::try_from(self.end_column).unwrap_or(0),
+            },
+        }
+    }
+}
+
+/// Combined byte offset and source location information.
+#[derive(Debug, Clone, Deserialize, JsonSchema)]
+pub struct Location {
+    /// The byte offset span.
+    pub offset_span: OffsetSpan,
+    /// The optional source (line/column) span.
+    #[serde(
+        default,
+        serialize_with = "serialize_compact_source_span",
+        deserialize_with = "deserialize_compact_source_span"
+    )]
+    #[schemars(with = "SourceSpan")]
+    pub source_span: Option<CompactSourceSpan>,
+}
+
+impl serde::Serialize for Location {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+
+        let mut state = serializer.serialize_struct("Location", 2)?;
+        state.serialize_field("offset_span", &self.offset_span)?;
+        let source_span = self.source_span().unwrap_or_else(CompactSourceSpan::zero);
+        state.serialize_field("source_span", &source_span.to_source_span())?;
+        state.end()
+    }
+}
+
+impl Location {
+    /// Creates a new `Location` with both offset and source spans.
+    #[inline]
+    pub fn with_source_span(offset_span: OffsetSpan, source_span: Option<SourceSpan>) -> Self {
+        Self {
+            offset_span,
+            source_span: source_span.as_ref().map(CompactSourceSpan::from_source_span),
+        }
+    }
+
+    /// Returns the compact source span if available.
+    #[inline]
+    pub fn source_span(&self) -> Option<CompactSourceSpan> {
+        self.source_span
+    }
+
+    /// Returns the source span, defaulting to zeros if not available.
+    #[inline]
+    pub fn resolved_source_span(&self) -> SourceSpan {
+        self.source_span.unwrap_or_else(CompactSourceSpan::zero).to_source_span()
+    }
+}
+
+fn serialize_compact_source_span<S>(
+    span: &Option<CompactSourceSpan>,
+    serializer: S,
+) -> Result<S::Ok, S::Error>
+where
+    S: serde::Serializer,
+{
+    let source_span = span.unwrap_or_else(CompactSourceSpan::zero).to_source_span();
+    source_span.serialize(serializer)
+}
+
+fn deserialize_compact_source_span<'de, D>(
+    deserializer: D,
+) -> Result<Option<CompactSourceSpan>, D::Error>
+where
+    D: serde::Deserializer<'de>,
+{
+    let span = SourceSpan::deserialize(deserializer)?;
+    Ok(Some(CompactSourceSpan::from_source_span(&span)))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_offset_span() {
+        let span = OffsetSpan::from_range(10..20);
+        assert_eq!(span.len(), 10);
+        assert!(!span.is_empty());
+    }
+
+    #[test]
+    fn test_location_mapping() {
+        let input = b"line1\nline2\nline3";
+        let mapping = LocationMapping::new(input);
+
+        // First line, first character
+        assert_eq!(mapping.get_source_point(0), SourcePoint { line: 1, column: 0 });
+
+        // First line, last character
+        assert_eq!(mapping.get_source_point(4), SourcePoint { line: 1, column: 4 });
+
+        // Second line, first character
+        assert_eq!(mapping.get_source_point(6), SourcePoint { line: 2, column: 0 });
+    }
+}
diff --git a/crates/kingfisher-core/src/origin.rs b/crates/kingfisher-core/src/origin.rs
new file mode 100644
index 0000000..278b9d7
--- /dev/null
+++ b/crates/kingfisher-core/src/origin.rs
@@ -0,0 +1,310 @@
+//! Provenance tracking for scanned content.
+//!
+//! This module provides types for tracking where content came from:
+//! - [`FileOrigin`] - Content from a file path
+//! - [`GitRepoOrigin`] - Content from a git repository
+//! - [`ExtendedOrigin`] - Content from other sources (Jira, Confluence, etc.)
+//! - [`OriginSet`] - A non-empty collection of origins
+
+use std::{
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+
+use dashmap::DashMap;
+use once_cell::sync::Lazy;
+use rustc_hash::FxHashSet;
+use schemars::JsonSchema;
+use serde::{ser::SerializeSeq, Deserialize, Serialize};
+use smallvec::SmallVec;
+
+use crate::git_commit_metadata::CommitMetadata;
+
+// Cache for git remote URLs to avoid repeated lookups
+static URL_CACHE: Lazy<DashMap<PathBuf, Arc<str>>> = Lazy::new(DashMap::default);
+
+fn compute_url(repo_path: &Path) -> anyhow::Result<String> {
+    let repo = gix::open(repo_path)?;
+    let config = repo.config_snapshot();
+
+    let url_bytes =
+        config.string("remote.origin.url").ok_or_else(|| anyhow::anyhow!("No remote URL found"))?;
+
+    use bstr::ByteSlice;
+    if url_bytes.starts_with(b"http://") || url_bytes.starts_with(b"https://") {
+        Ok(String::from_utf8_lossy(url_bytes.as_bytes()).into_owned())
+    } else if url_bytes.starts_with(b"git@") {
+        let url_str = String::from_utf8_lossy(url_bytes.as_bytes());
+        if let Some(stripped) = url_str.strip_prefix("git@") {
+            if let Some((domain, path)) = stripped.split_once(':') {
+                Ok(format!("https://{}/{}", domain, path))
+            } else {
+                Err(anyhow::anyhow!("Invalid SSH URL format"))
+            }
+        } else {
+            Err(anyhow::anyhow!("Invalid SSH URL format"))
+        }
+    } else {
+        Err(anyhow::anyhow!(
+            "Unsupported remote URL format: {}",
+            String::from_utf8_lossy(url_bytes.as_bytes())
+        ))
+    }
+}
+
+/// Gets the remote URL for a git repository, with caching.
+pub fn get_repo_url(repo_path: &Path) -> anyhow::Result<Arc<str>> {
+    // Fast path: cache hit
+    if let Some(u) = URL_CACHE.get(repo_path) {
+        return Ok(u.clone());
+    }
+
+    // Slow path: compute, intern, cache
+    let url_arc: Arc<str> = compute_url(repo_path)?.into();
+    URL_CACHE.insert(repo_path.to_path_buf(), url_arc.clone());
+    Ok(url_arc)
+}
+
+/// The provenance of a scanned blob.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
+#[serde(rename_all = "snake_case", tag = "kind")]
+#[allow(clippy::large_enum_variant)]
+pub enum Origin {
+    /// Content from a file on disk.
+    File(FileOrigin),
+    /// Content from a git repository.
+    GitRepo(GitRepoOrigin),
+    /// Content from an extended source (arbitrary JSON metadata).
+    Extended(ExtendedOrigin),
+}
+
+impl Origin {
+    /// Creates an `Origin` for a plain file.
+    pub fn from_file(path: PathBuf) -> Self {
+        Origin::File(FileOrigin::new(path))
+    }
+
+    /// Creates an `Origin` for a blob in a git repository without commit info.
+    pub fn from_git_repo(repo_path: Arc<PathBuf>) -> Self {
+        Origin::GitRepo(GitRepoOrigin { repo_path, first_commit: None })
+    }
+
+    /// Creates an `Origin` for a blob in a git repository with commit info.
+    pub fn from_git_repo_with_first_commit(
+        repo_path: Arc<PathBuf>,
+        commit_metadata: Arc<CommitMetadata>,
+        blob_path: String,
+    ) -> Self {
+        let first_commit = Some(CommitOrigin { commit_metadata, blob_path });
+        Origin::GitRepo(GitRepoOrigin { repo_path, first_commit })
+    }
+
+    /// Creates an `Origin` from arbitrary JSON metadata.
+    pub fn from_extended(value: serde_json::Value) -> Self {
+        Origin::Extended(ExtendedOrigin(value))
+    }
+
+    /// Returns the path of the blob, if available.
+    pub fn blob_path(&self) -> Option<&Path> {
+        match self {
+            Self::File(e) => Some(&e.path),
+            Self::GitRepo(e) => e.first_commit.as_ref().map(|c| Path::new(&c.blob_path)),
+            Self::Extended(e) => e.path(),
+        }
+    }
+
+    /// Returns the full filesystem path to the content, if available.
+    pub fn full_path(&self) -> Option<PathBuf> {
+        match self {
+            Self::File(e) => Some((*e.path).clone()),
+            Self::GitRepo(e) => e.first_commit.as_ref().map(|c| e.repo_path.join(&c.blob_path)),
+            Self::Extended(e) => e.path().map(PathBuf::from),
+        }
+    }
+}
+
+impl std::fmt::Display for Origin {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Origin::File(e) => write!(f, "file {}", e.path.display()),
+            Origin::GitRepo(e) => match &e.first_commit {
+                Some(md) => write!(
+                    f,
+                    "git repo {}: first seen in commit {} as {}",
+                    e.repo_path.display(),
+                    md.commit_metadata.commit_id,
+                    &md.blob_path,
+                ),
+                None => write!(f, "git repo {}", e.repo_path.display()),
+            },
+            Origin::Extended(e) => write!(f, "extended {}", e),
+        }
+    }
+}
+
+/// Origin information for a file on disk.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
+pub struct FileOrigin {
+    /// The file path.
+    pub path: Arc<PathBuf>,
+}
+
+impl FileOrigin {
+    /// Creates a new `FileOrigin` from a path.
+    pub fn new<P: Into<PathBuf>>(p: P) -> Self {
+        Self { path: Arc::new(p.into()) }
+    }
+}
+
+/// Origin information for a blob in a git repository.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
+pub struct GitRepoOrigin {
+    /// Path to the repository on disk.
+    pub repo_path: Arc<PathBuf>,
+    /// Information about the first commit where this blob was seen.
+    pub first_commit: Option<CommitOrigin>,
+}
+
+/// Information about where a blob was first seen in git history.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
+pub struct CommitOrigin {
+    /// Metadata about the commit.
+    pub commit_metadata: Arc<CommitMetadata>,
+    /// The path of the blob within the commit.
+    pub blob_path: String,
+}
+
+/// An extended origin with arbitrary JSON metadata.
+///
+/// This is used for sources like Jira, Confluence, Slack, etc.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
+pub struct ExtendedOrigin(pub serde_json::Value);
+
+impl std::fmt::Display for ExtendedOrigin {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        std::fmt::Display::fmt(&self.0, f)
+    }
+}
+
+impl ExtendedOrigin {
+    /// Returns the path from the extended origin, if available.
+    pub fn path(&self) -> Option<&Path> {
+        let p = self.0.get("path")?.as_str()?;
+        Some(Path::new(p))
+    }
+}
+
+/// A non-empty set of [`Origin`] entries.
+///
+/// This is used when a blob has been seen in multiple locations
+/// (e.g., the same content in multiple files or commits).
+#[derive(Debug, Clone)]
+pub struct OriginSet {
+    origin: Origin,
+    more_provenance: SmallVec<[Origin; 1]>,
+}
+
+impl serde::Serialize for OriginSet {
+    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
+        let mut seq = s.serialize_seq(Some(self.len()))?;
+        for p in self.iter() {
+            seq.serialize_element(p)?;
+        }
+        seq.end()
+    }
+}
+
+impl JsonSchema for OriginSet {
+    fn schema_name() -> String {
+        "OriginSet".into()
+    }
+
+    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
+        let s = <Vec<Origin>>::json_schema(gen);
+        let mut o = s.into_object();
+        o.array().min_items = Some(1);
+        let md = o.metadata();
+        md.description = Some("A non-empty set of `Origin` entries".into());
+        schemars::schema::Schema::Object(o)
+    }
+}
+
+impl OriginSet {
+    /// Creates a new `OriginSet` with a single origin.
+    #[inline]
+    pub fn single(origin: Origin) -> Self {
+        Self { origin, more_provenance: SmallVec::new() }
+    }
+
+    /// Creates a new `OriginSet` from multiple origins.
+    ///
+    /// Filters out redundant less-specific origins.
+    pub fn new(origin: Origin, more_origin: Vec<Origin>) -> Self {
+        let mut git_repos_with_detailed: FxHashSet<Arc<PathBuf>> = FxHashSet::default();
+        for p in std::iter::once(&origin).chain(&more_origin) {
+            if let Origin::GitRepo(e) = p {
+                if e.first_commit.is_some() {
+                    git_repos_with_detailed.insert(e.repo_path.clone());
+                }
+            }
+        }
+        let mut filtered = std::iter::once(origin).chain(more_origin).filter(|p| match p {
+            Origin::GitRepo(e) => {
+                e.first_commit.is_some() || !git_repos_with_detailed.contains(&e.repo_path)
+            }
+            Origin::File(_) => true,
+            Origin::Extended(_) => true,
+        });
+        Self { origin: filtered.next().unwrap(), more_provenance: filtered.collect() }
+    }
+
+    /// Attempts to create an `OriginSet` from an iterator.
+    ///
+    /// Returns `None` if the iterator is empty.
+    #[inline]
+    pub fn try_from_iter<I>(it: I) -> Option<Self>
+    where
+        I: IntoIterator<Item = Origin>,
+    {
+        let mut it = it.into_iter();
+        let provenance = it.next()?;
+        let more_provenance = it.collect();
+        Some(Self::new(provenance, more_provenance))
+    }
+
+    /// Returns the first origin in the set.
+    #[inline]
+    pub fn first(&self) -> &Origin {
+        &self.origin
+    }
+
+    /// Returns the number of origins in the set.
+    #[allow(clippy::len_without_is_empty)]
+    #[inline]
+    pub fn len(&self) -> usize {
+        1 + self.more_provenance.len()
+    }
+
+    /// Returns an iterator over all origins in the set.
+    #[inline]
+    pub fn iter(&self) -> impl Iterator<Item = &Origin> {
+        std::iter::once(&self.origin).chain(&self.more_provenance)
+    }
+}
+
+impl IntoIterator for OriginSet {
+    type IntoIter =
+        std::iter::Chain<std::iter::Once<Origin>, <Vec<Origin> as IntoIterator>::IntoIter>;
+    type Item = Origin;
+
+    #[inline]
+    fn into_iter(self) -> Self::IntoIter {
+        std::iter::once(self.origin).chain(self.more_provenance.into_vec().into_iter())
+    }
+}
+
+impl From<Origin> for OriginSet {
+    fn from(p: Origin) -> Self {
+        Self::single(p)
+    }
+}
diff --git a/crates/kingfisher-rules/Cargo.toml b/crates/kingfisher-rules/Cargo.toml
new file mode 100644
index 0000000..c63ade1
--- /dev/null
+++ b/crates/kingfisher-rules/Cargo.toml
@@ -0,0 +1,64 @@
+[package]
+name = "kingfisher-rules"
+version = "0.1.0"
+description = "Rule definitions and database for Kingfisher secret scanner"
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+homepage.workspace = true
+repository.workspace = true
+publish.workspace = true
+
+[dependencies]
+# Internal dependencies
+kingfisher-core = { path = "../kingfisher-core" }
+
+# Serialization
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+serde_yaml = "0.9"
+schemars = "0.8"
+
+# Error handling
+anyhow = "1.0"
+thiserror = "1.0"
+
+# Regex
+regex = "1.12"
+lazy_static = "1.5"
+
+# Hashing
+xxhash-rust = { version = "0.8", features = ["xxh3"] }
+
+# Liquid templating (for checksum validation)
+liquid = "0.26"
+liquid-core = "0.26"
+
+# Crypto for liquid filters
+base64 = "0.22"
+crc32fast = "1.5"
+hmac = "0.12"
+sha1 = "0.10"
+sha2 = "0.10"
+percent-encoding = "2.3"
+time = "0.3"
+uuid = { version = "1.19", features = ["v4"] }
+rand = "0.9"
+
+# Vectorscan for pattern matching
+vectorscan-rs = "0.0.5"
+
+# Logging
+tracing = "0.1"
+
+# File walking
+walkdir = "2.5"
+ignore = "0.4"
+
+# Embedded rule files
+include_dir = "0.7"
+
+[dev-dependencies]
+pretty_assertions = "1.4"
+proptest = "1.6"
diff --git a/data/rules/adafruitio.yml b/crates/kingfisher-rules/data/rules/adafruitio.yml
similarity index 100%
rename from data/rules/adafruitio.yml
rename to crates/kingfisher-rules/data/rules/adafruitio.yml
diff --git a/data/rules/adobe.yml b/crates/kingfisher-rules/data/rules/adobe.yml
similarity index 100%
rename from data/rules/adobe.yml
rename to crates/kingfisher-rules/data/rules/adobe.yml
diff --git a/data/rules/age.yml b/crates/kingfisher-rules/data/rules/age.yml
similarity index 100%
rename from data/rules/age.yml
rename to crates/kingfisher-rules/data/rules/age.yml
diff --git a/data/rules/ai21.yml b/crates/kingfisher-rules/data/rules/ai21.yml
similarity index 100%
rename from data/rules/ai21.yml
rename to crates/kingfisher-rules/data/rules/ai21.yml
diff --git a/data/rules/airbrake.yml b/crates/kingfisher-rules/data/rules/airbrake.yml
similarity index 100%
rename from data/rules/airbrake.yml
rename to crates/kingfisher-rules/data/rules/airbrake.yml
diff --git a/data/rules/airtable.yml b/crates/kingfisher-rules/data/rules/airtable.yml
similarity index 100%
rename from data/rules/airtable.yml
rename to crates/kingfisher-rules/data/rules/airtable.yml
diff --git a/data/rules/aiven.yml b/crates/kingfisher-rules/data/rules/aiven.yml
similarity index 100%
rename from data/rules/aiven.yml
rename to crates/kingfisher-rules/data/rules/aiven.yml
diff --git a/data/rules/alchemy.yml b/crates/kingfisher-rules/data/rules/alchemy.yml
similarity index 100%
rename from data/rules/alchemy.yml
rename to crates/kingfisher-rules/data/rules/alchemy.yml
diff --git a/data/rules/algolia.yml b/crates/kingfisher-rules/data/rules/algolia.yml
similarity index 100%
rename from data/rules/algolia.yml
rename to crates/kingfisher-rules/data/rules/algolia.yml
diff --git a/data/rules/alibaba.yml b/crates/kingfisher-rules/data/rules/alibaba.yml
similarity index 100%
rename from data/rules/alibaba.yml
rename to crates/kingfisher-rules/data/rules/alibaba.yml
diff --git a/data/rules/anthropic.yml b/crates/kingfisher-rules/data/rules/anthropic.yml
similarity index 100%
rename from data/rules/anthropic.yml
rename to crates/kingfisher-rules/data/rules/anthropic.yml
diff --git a/data/rules/anypoint.yml b/crates/kingfisher-rules/data/rules/anypoint.yml
similarity index 100%
rename from data/rules/anypoint.yml
rename to crates/kingfisher-rules/data/rules/anypoint.yml
diff --git a/data/rules/apify.yml b/crates/kingfisher-rules/data/rules/apify.yml
similarity index 100%
rename from data/rules/apify.yml
rename to crates/kingfisher-rules/data/rules/apify.yml
diff --git a/data/rules/apollo.yml b/crates/kingfisher-rules/data/rules/apollo.yml
similarity index 100%
rename from data/rules/apollo.yml
rename to crates/kingfisher-rules/data/rules/apollo.yml
diff --git a/data/rules/artifactory.yml b/crates/kingfisher-rules/data/rules/artifactory.yml
similarity index 100%
rename from data/rules/artifactory.yml
rename to crates/kingfisher-rules/data/rules/artifactory.yml
diff --git a/data/rules/asana.yml b/crates/kingfisher-rules/data/rules/asana.yml
similarity index 100%
rename from data/rules/asana.yml
rename to crates/kingfisher-rules/data/rules/asana.yml
diff --git a/data/rules/assemblyai.yml b/crates/kingfisher-rules/data/rules/assemblyai.yml
similarity index 100%
rename from data/rules/assemblyai.yml
rename to crates/kingfisher-rules/data/rules/assemblyai.yml
diff --git a/data/rules/atlassian.yml b/crates/kingfisher-rules/data/rules/atlassian.yml
similarity index 100%
rename from data/rules/atlassian.yml
rename to crates/kingfisher-rules/data/rules/atlassian.yml
diff --git a/data/rules/auth0.yml b/crates/kingfisher-rules/data/rules/auth0.yml
similarity index 100%
rename from data/rules/auth0.yml
rename to crates/kingfisher-rules/data/rules/auth0.yml
diff --git a/data/rules/authress.yml b/crates/kingfisher-rules/data/rules/authress.yml
similarity index 100%
rename from data/rules/authress.yml
rename to crates/kingfisher-rules/data/rules/authress.yml
diff --git a/data/rules/aws.yml b/crates/kingfisher-rules/data/rules/aws.yml
similarity index 100%
rename from data/rules/aws.yml
rename to crates/kingfisher-rules/data/rules/aws.yml
diff --git a/data/rules/azure.yml b/crates/kingfisher-rules/data/rules/azure.yml
similarity index 100%
rename from data/rules/azure.yml
rename to crates/kingfisher-rules/data/rules/azure.yml
diff --git a/data/rules/azuredevops.yml b/crates/kingfisher-rules/data/rules/azuredevops.yml
similarity index 100%
rename from data/rules/azuredevops.yml
rename to crates/kingfisher-rules/data/rules/azuredevops.yml
diff --git a/data/rules/azureopenai.yml b/crates/kingfisher-rules/data/rules/azureopenai.yml
similarity index 100%
rename from data/rules/azureopenai.yml
rename to crates/kingfisher-rules/data/rules/azureopenai.yml
diff --git a/data/rules/azuresearchquery.yml b/crates/kingfisher-rules/data/rules/azuresearchquery.yml
similarity index 100%
rename from data/rules/azuresearchquery.yml
rename to crates/kingfisher-rules/data/rules/azuresearchquery.yml
diff --git a/data/rules/azurestorage.yml b/crates/kingfisher-rules/data/rules/azurestorage.yml
similarity index 100%
rename from data/rules/azurestorage.yml
rename to crates/kingfisher-rules/data/rules/azurestorage.yml
diff --git a/data/rules/baremetrics.yml b/crates/kingfisher-rules/data/rules/baremetrics.yml
similarity index 100%
rename from data/rules/baremetrics.yml
rename to crates/kingfisher-rules/data/rules/baremetrics.yml
diff --git a/data/rules/baseten.yml b/crates/kingfisher-rules/data/rules/baseten.yml
similarity index 100%
rename from data/rules/baseten.yml
rename to crates/kingfisher-rules/data/rules/baseten.yml
diff --git a/data/rules/beamer.yml b/crates/kingfisher-rules/data/rules/beamer.yml
similarity index 100%
rename from data/rules/beamer.yml
rename to crates/kingfisher-rules/data/rules/beamer.yml
diff --git a/data/rules/bitbucket.yml b/crates/kingfisher-rules/data/rules/bitbucket.yml
similarity index 100%
rename from data/rules/bitbucket.yml
rename to crates/kingfisher-rules/data/rules/bitbucket.yml
diff --git a/data/rules/bitly.yml b/crates/kingfisher-rules/data/rules/bitly.yml
similarity index 100%
rename from data/rules/bitly.yml
rename to crates/kingfisher-rules/data/rules/bitly.yml
diff --git a/data/rules/blynk.yml b/crates/kingfisher-rules/data/rules/blynk.yml
similarity index 100%
rename from data/rules/blynk.yml
rename to crates/kingfisher-rules/data/rules/blynk.yml
diff --git a/data/rules/buildkite.yml b/crates/kingfisher-rules/data/rules/buildkite.yml
similarity index 100%
rename from data/rules/buildkite.yml
rename to crates/kingfisher-rules/data/rules/buildkite.yml
diff --git a/data/rules/cerebras.yml b/crates/kingfisher-rules/data/rules/cerebras.yml
similarity index 100%
rename from data/rules/cerebras.yml
rename to crates/kingfisher-rules/data/rules/cerebras.yml
diff --git a/data/rules/circleci.yml b/crates/kingfisher-rules/data/rules/circleci.yml
similarity index 100%
rename from data/rules/circleci.yml
rename to crates/kingfisher-rules/data/rules/circleci.yml
diff --git a/data/rules/ciscomeraki.yml b/crates/kingfisher-rules/data/rules/ciscomeraki.yml
similarity index 100%
rename from data/rules/ciscomeraki.yml
rename to crates/kingfisher-rules/data/rules/ciscomeraki.yml
diff --git a/data/rules/clarifai.yml b/crates/kingfisher-rules/data/rules/clarifai.yml
similarity index 100%
rename from data/rules/clarifai.yml
rename to crates/kingfisher-rules/data/rules/clarifai.yml
diff --git a/data/rules/clay.yml b/crates/kingfisher-rules/data/rules/clay.yml
similarity index 100%
rename from data/rules/clay.yml
rename to crates/kingfisher-rules/data/rules/clay.yml
diff --git a/data/rules/clearbit.yml b/crates/kingfisher-rules/data/rules/clearbit.yml
similarity index 100%
rename from data/rules/clearbit.yml
rename to crates/kingfisher-rules/data/rules/clearbit.yml
diff --git a/data/rules/clickhouse.yml b/crates/kingfisher-rules/data/rules/clickhouse.yml
similarity index 100%
rename from data/rules/clickhouse.yml
rename to crates/kingfisher-rules/data/rules/clickhouse.yml
diff --git a/data/rules/clojars.yml b/crates/kingfisher-rules/data/rules/clojars.yml
similarity index 100%
rename from data/rules/clojars.yml
rename to crates/kingfisher-rules/data/rules/clojars.yml
diff --git a/data/rules/cloudflare.yml b/crates/kingfisher-rules/data/rules/cloudflare.yml
similarity index 100%
rename from data/rules/cloudflare.yml
rename to crates/kingfisher-rules/data/rules/cloudflare.yml
diff --git a/data/rules/cloudsight.yml b/crates/kingfisher-rules/data/rules/cloudsight.yml
similarity index 100%
rename from data/rules/cloudsight.yml
rename to crates/kingfisher-rules/data/rules/cloudsight.yml
diff --git a/data/rules/codacy.yml b/crates/kingfisher-rules/data/rules/codacy.yml
similarity index 100%
rename from data/rules/codacy.yml
rename to crates/kingfisher-rules/data/rules/codacy.yml
diff --git a/data/rules/codeclimate.yml b/crates/kingfisher-rules/data/rules/codeclimate.yml
similarity index 100%
rename from data/rules/codeclimate.yml
rename to crates/kingfisher-rules/data/rules/codeclimate.yml
diff --git a/data/rules/codecov.yml b/crates/kingfisher-rules/data/rules/codecov.yml
similarity index 100%
rename from data/rules/codecov.yml
rename to crates/kingfisher-rules/data/rules/codecov.yml
diff --git a/data/rules/coderabbit.yml b/crates/kingfisher-rules/data/rules/coderabbit.yml
similarity index 100%
rename from data/rules/coderabbit.yml
rename to crates/kingfisher-rules/data/rules/coderabbit.yml
diff --git a/data/rules/cohere.yml b/crates/kingfisher-rules/data/rules/cohere.yml
similarity index 100%
rename from data/rules/cohere.yml
rename to crates/kingfisher-rules/data/rules/cohere.yml
diff --git a/data/rules/coinbase.yml b/crates/kingfisher-rules/data/rules/coinbase.yml
similarity index 100%
rename from data/rules/coinbase.yml
rename to crates/kingfisher-rules/data/rules/coinbase.yml
diff --git a/data/rules/confluent.yml b/crates/kingfisher-rules/data/rules/confluent.yml
similarity index 100%
rename from data/rules/confluent.yml
rename to crates/kingfisher-rules/data/rules/confluent.yml
diff --git a/data/rules/contentful.yml b/crates/kingfisher-rules/data/rules/contentful.yml
similarity index 100%
rename from data/rules/contentful.yml
rename to crates/kingfisher-rules/data/rules/contentful.yml
diff --git a/data/rules/coveralls.yml b/crates/kingfisher-rules/data/rules/coveralls.yml
similarity index 100%
rename from data/rules/coveralls.yml
rename to crates/kingfisher-rules/data/rules/coveralls.yml
diff --git a/data/rules/coze.yml b/crates/kingfisher-rules/data/rules/coze.yml
similarity index 100%
rename from data/rules/coze.yml
rename to crates/kingfisher-rules/data/rules/coze.yml
diff --git a/data/rules/crates.io.yml b/crates/kingfisher-rules/data/rules/crates.io.yml
similarity index 100%
rename from data/rules/crates.io.yml
rename to crates/kingfisher-rules/data/rules/crates.io.yml
diff --git a/data/rules/credentials.yml b/crates/kingfisher-rules/data/rules/credentials.yml
similarity index 100%
rename from data/rules/credentials.yml
rename to crates/kingfisher-rules/data/rules/credentials.yml
diff --git a/data/rules/curl.yml b/crates/kingfisher-rules/data/rules/curl.yml
similarity index 100%
rename from data/rules/curl.yml
rename to crates/kingfisher-rules/data/rules/curl.yml
diff --git a/data/rules/cursor.yml b/crates/kingfisher-rules/data/rules/cursor.yml
similarity index 100%
rename from data/rules/cursor.yml
rename to crates/kingfisher-rules/data/rules/cursor.yml
diff --git a/data/rules/customerio.yml b/crates/kingfisher-rules/data/rules/customerio.yml
similarity index 100%
rename from data/rules/customerio.yml
rename to crates/kingfisher-rules/data/rules/customerio.yml
diff --git a/data/rules/databricks.yml b/crates/kingfisher-rules/data/rules/databricks.yml
similarity index 100%
rename from data/rules/databricks.yml
rename to crates/kingfisher-rules/data/rules/databricks.yml
diff --git a/data/rules/datadog.yml b/crates/kingfisher-rules/data/rules/datadog.yml
similarity index 100%
rename from data/rules/datadog.yml
rename to crates/kingfisher-rules/data/rules/datadog.yml
diff --git a/data/rules/datagov.yml b/crates/kingfisher-rules/data/rules/datagov.yml
similarity index 100%
rename from data/rules/datagov.yml
rename to crates/kingfisher-rules/data/rules/datagov.yml
diff --git a/data/rules/deepgram.yml b/crates/kingfisher-rules/data/rules/deepgram.yml
similarity index 100%
rename from data/rules/deepgram.yml
rename to crates/kingfisher-rules/data/rules/deepgram.yml
diff --git a/data/rules/deepseek.yml b/crates/kingfisher-rules/data/rules/deepseek.yml
similarity index 100%
rename from data/rules/deepseek.yml
rename to crates/kingfisher-rules/data/rules/deepseek.yml
diff --git a/data/rules/definednetworking.yml b/crates/kingfisher-rules/data/rules/definednetworking.yml
similarity index 100%
rename from data/rules/definednetworking.yml
rename to crates/kingfisher-rules/data/rules/definednetworking.yml
diff --git a/data/rules/dependency_track.yml b/crates/kingfisher-rules/data/rules/dependency_track.yml
similarity index 100%
rename from data/rules/dependency_track.yml
rename to crates/kingfisher-rules/data/rules/dependency_track.yml
diff --git a/data/rules/diffbot.yml b/crates/kingfisher-rules/data/rules/diffbot.yml
similarity index 100%
rename from data/rules/diffbot.yml
rename to crates/kingfisher-rules/data/rules/diffbot.yml
diff --git a/data/rules/digitalocean.yml b/crates/kingfisher-rules/data/rules/digitalocean.yml
similarity index 100%
rename from data/rules/digitalocean.yml
rename to crates/kingfisher-rules/data/rules/digitalocean.yml
diff --git a/data/rules/discord.yml b/crates/kingfisher-rules/data/rules/discord.yml
similarity index 100%
rename from data/rules/discord.yml
rename to crates/kingfisher-rules/data/rules/discord.yml
diff --git a/data/rules/disqus.yml b/crates/kingfisher-rules/data/rules/disqus.yml
similarity index 100%
rename from data/rules/disqus.yml
rename to crates/kingfisher-rules/data/rules/disqus.yml
diff --git a/data/rules/django.yml b/crates/kingfisher-rules/data/rules/django.yml
similarity index 100%
rename from data/rules/django.yml
rename to crates/kingfisher-rules/data/rules/django.yml
diff --git a/data/rules/docker.yml b/crates/kingfisher-rules/data/rules/docker.yml
similarity index 100%
rename from data/rules/docker.yml
rename to crates/kingfisher-rules/data/rules/docker.yml
diff --git a/data/rules/dockerhub.yml b/crates/kingfisher-rules/data/rules/dockerhub.yml
similarity index 100%
rename from data/rules/dockerhub.yml
rename to crates/kingfisher-rules/data/rules/dockerhub.yml
diff --git a/data/rules/doppler.yml b/crates/kingfisher-rules/data/rules/doppler.yml
similarity index 100%
rename from data/rules/doppler.yml
rename to crates/kingfisher-rules/data/rules/doppler.yml
diff --git a/data/rules/droneci.yml b/crates/kingfisher-rules/data/rules/droneci.yml
similarity index 100%
rename from data/rules/droneci.yml
rename to crates/kingfisher-rules/data/rules/droneci.yml
diff --git a/data/rules/dropbox.yml b/crates/kingfisher-rules/data/rules/dropbox.yml
similarity index 100%
rename from data/rules/dropbox.yml
rename to crates/kingfisher-rules/data/rules/dropbox.yml
diff --git a/data/rules/duffel.yml b/crates/kingfisher-rules/data/rules/duffel.yml
similarity index 100%
rename from data/rules/duffel.yml
rename to crates/kingfisher-rules/data/rules/duffel.yml
diff --git a/data/rules/dynatrace.yml b/crates/kingfisher-rules/data/rules/dynatrace.yml
similarity index 100%
rename from data/rules/dynatrace.yml
rename to crates/kingfisher-rules/data/rules/dynatrace.yml
diff --git a/data/rules/easypost.yml b/crates/kingfisher-rules/data/rules/easypost.yml
similarity index 100%
rename from data/rules/easypost.yml
rename to crates/kingfisher-rules/data/rules/easypost.yml
diff --git a/data/rules/elevenlabs.yml b/crates/kingfisher-rules/data/rules/elevenlabs.yml
similarity index 100%
rename from data/rules/elevenlabs.yml
rename to crates/kingfisher-rules/data/rules/elevenlabs.yml
diff --git a/data/rules/endorlabs.yml b/crates/kingfisher-rules/data/rules/endorlabs.yml
similarity index 100%
rename from data/rules/endorlabs.yml
rename to crates/kingfisher-rules/data/rules/endorlabs.yml
diff --git a/data/rules/eraserio.yml b/crates/kingfisher-rules/data/rules/eraserio.yml
similarity index 100%
rename from data/rules/eraserio.yml
rename to crates/kingfisher-rules/data/rules/eraserio.yml
diff --git a/data/rules/eventbrite.yml b/crates/kingfisher-rules/data/rules/eventbrite.yml
similarity index 100%
rename from data/rules/eventbrite.yml
rename to crates/kingfisher-rules/data/rules/eventbrite.yml
diff --git a/data/rules/exaai.yml b/crates/kingfisher-rules/data/rules/exaai.yml
similarity index 100%
rename from data/rules/exaai.yml
rename to crates/kingfisher-rules/data/rules/exaai.yml
diff --git a/data/rules/facebook.yml b/crates/kingfisher-rules/data/rules/facebook.yml
similarity index 100%
rename from data/rules/facebook.yml
rename to crates/kingfisher-rules/data/rules/facebook.yml
diff --git a/data/rules/fastly.yml b/crates/kingfisher-rules/data/rules/fastly.yml
similarity index 100%
rename from data/rules/fastly.yml
rename to crates/kingfisher-rules/data/rules/fastly.yml
diff --git a/data/rules/figma.yml b/crates/kingfisher-rules/data/rules/figma.yml
similarity index 100%
rename from data/rules/figma.yml
rename to crates/kingfisher-rules/data/rules/figma.yml
diff --git a/data/rules/fileio.yml b/crates/kingfisher-rules/data/rules/fileio.yml
similarity index 100%
rename from data/rules/fileio.yml
rename to crates/kingfisher-rules/data/rules/fileio.yml
diff --git a/data/rules/filezilla.yml b/crates/kingfisher-rules/data/rules/filezilla.yml
similarity index 100%
rename from data/rules/filezilla.yml
rename to crates/kingfisher-rules/data/rules/filezilla.yml
diff --git a/data/rules/finicity.yml b/crates/kingfisher-rules/data/rules/finicity.yml
similarity index 100%
rename from data/rules/finicity.yml
rename to crates/kingfisher-rules/data/rules/finicity.yml
diff --git a/data/rules/finnhub.yml b/crates/kingfisher-rules/data/rules/finnhub.yml
similarity index 100%
rename from data/rules/finnhub.yml
rename to crates/kingfisher-rules/data/rules/finnhub.yml
diff --git a/data/rules/firecrawl.yml b/crates/kingfisher-rules/data/rules/firecrawl.yml
similarity index 100%
rename from data/rules/firecrawl.yml
rename to crates/kingfisher-rules/data/rules/firecrawl.yml
diff --git a/data/rules/fireworksai.yml b/crates/kingfisher-rules/data/rules/fireworksai.yml
similarity index 100%
rename from data/rules/fireworksai.yml
rename to crates/kingfisher-rules/data/rules/fireworksai.yml
diff --git a/data/rules/fleetbase.yml b/crates/kingfisher-rules/data/rules/fleetbase.yml
similarity index 100%
rename from data/rules/fleetbase.yml
rename to crates/kingfisher-rules/data/rules/fleetbase.yml
diff --git a/data/rules/flickr.yml b/crates/kingfisher-rules/data/rules/flickr.yml
similarity index 100%
rename from data/rules/flickr.yml
rename to crates/kingfisher-rules/data/rules/flickr.yml
diff --git a/data/rules/flyio.yml b/crates/kingfisher-rules/data/rules/flyio.yml
similarity index 100%
rename from data/rules/flyio.yml
rename to crates/kingfisher-rules/data/rules/flyio.yml
diff --git a/data/rules/foursquare.yml b/crates/kingfisher-rules/data/rules/foursquare.yml
similarity index 100%
rename from data/rules/foursquare.yml
rename to crates/kingfisher-rules/data/rules/foursquare.yml
diff --git a/data/rules/frame.io.yml b/crates/kingfisher-rules/data/rules/frame.io.yml
similarity index 100%
rename from data/rules/frame.io.yml
rename to crates/kingfisher-rules/data/rules/frame.io.yml
diff --git a/data/rules/frameio.yml b/crates/kingfisher-rules/data/rules/frameio.yml
similarity index 100%
rename from data/rules/frameio.yml
rename to crates/kingfisher-rules/data/rules/frameio.yml
diff --git a/data/rules/freshbooks.yml b/crates/kingfisher-rules/data/rules/freshbooks.yml
similarity index 100%
rename from data/rules/freshbooks.yml
rename to crates/kingfisher-rules/data/rules/freshbooks.yml
diff --git a/data/rules/freshdesk.yml b/crates/kingfisher-rules/data/rules/freshdesk.yml
similarity index 100%
rename from data/rules/freshdesk.yml
rename to crates/kingfisher-rules/data/rules/freshdesk.yml
diff --git a/data/rules/friendli.yml b/crates/kingfisher-rules/data/rules/friendli.yml
similarity index 100%
rename from data/rules/friendli.yml
rename to crates/kingfisher-rules/data/rules/friendli.yml
diff --git a/data/rules/gcp.yml b/crates/kingfisher-rules/data/rules/gcp.yml
similarity index 100%
rename from data/rules/gcp.yml
rename to crates/kingfisher-rules/data/rules/gcp.yml
diff --git a/data/rules/generic.yml b/crates/kingfisher-rules/data/rules/generic.yml
similarity index 100%
rename from data/rules/generic.yml
rename to crates/kingfisher-rules/data/rules/generic.yml
diff --git a/data/rules/gitalk.yml b/crates/kingfisher-rules/data/rules/gitalk.yml
similarity index 100%
rename from data/rules/gitalk.yml
rename to crates/kingfisher-rules/data/rules/gitalk.yml
diff --git a/data/rules/github.yml b/crates/kingfisher-rules/data/rules/github.yml
similarity index 100%
rename from data/rules/github.yml
rename to crates/kingfisher-rules/data/rules/github.yml
diff --git a/data/rules/gitlab.yml b/crates/kingfisher-rules/data/rules/gitlab.yml
similarity index 100%
rename from data/rules/gitlab.yml
rename to crates/kingfisher-rules/data/rules/gitlab.yml
diff --git a/data/rules/gitter.yml b/crates/kingfisher-rules/data/rules/gitter.yml
similarity index 100%
rename from data/rules/gitter.yml
rename to crates/kingfisher-rules/data/rules/gitter.yml
diff --git a/data/rules/gocardless.yml b/crates/kingfisher-rules/data/rules/gocardless.yml
similarity index 100%
rename from data/rules/gocardless.yml
rename to crates/kingfisher-rules/data/rules/gocardless.yml
diff --git a/data/rules/google.yml b/crates/kingfisher-rules/data/rules/google.yml
similarity index 100%
rename from data/rules/google.yml
rename to crates/kingfisher-rules/data/rules/google.yml
diff --git a/data/rules/googleoauth2.yml b/crates/kingfisher-rules/data/rules/googleoauth2.yml
similarity index 100%
rename from data/rules/googleoauth2.yml
rename to crates/kingfisher-rules/data/rules/googleoauth2.yml
diff --git a/data/rules/gradle.yml b/crates/kingfisher-rules/data/rules/gradle.yml
similarity index 100%
rename from data/rules/gradle.yml
rename to crates/kingfisher-rules/data/rules/gradle.yml
diff --git a/data/rules/grafana.yml b/crates/kingfisher-rules/data/rules/grafana.yml
similarity index 100%
rename from data/rules/grafana.yml
rename to crates/kingfisher-rules/data/rules/grafana.yml
diff --git a/data/rules/groq.yml b/crates/kingfisher-rules/data/rules/groq.yml
similarity index 100%
rename from data/rules/groq.yml
rename to crates/kingfisher-rules/data/rules/groq.yml
diff --git a/data/rules/guardian.yml b/crates/kingfisher-rules/data/rules/guardian.yml
similarity index 100%
rename from data/rules/guardian.yml
rename to crates/kingfisher-rules/data/rules/guardian.yml
diff --git a/data/rules/gumroad.yml b/crates/kingfisher-rules/data/rules/gumroad.yml
similarity index 100%
rename from data/rules/gumroad.yml
rename to crates/kingfisher-rules/data/rules/gumroad.yml
diff --git a/data/rules/harness.yml b/crates/kingfisher-rules/data/rules/harness.yml
similarity index 100%
rename from data/rules/harness.yml
rename to crates/kingfisher-rules/data/rules/harness.yml
diff --git a/data/rules/hashes.yml b/crates/kingfisher-rules/data/rules/hashes.yml
similarity index 100%
rename from data/rules/hashes.yml
rename to crates/kingfisher-rules/data/rules/hashes.yml
diff --git a/data/rules/hashicorp.yml b/crates/kingfisher-rules/data/rules/hashicorp.yml
similarity index 100%
rename from data/rules/hashicorp.yml
rename to crates/kingfisher-rules/data/rules/hashicorp.yml
diff --git a/data/rules/hereapi.yml b/crates/kingfisher-rules/data/rules/hereapi.yml
similarity index 100%
rename from data/rules/hereapi.yml
rename to crates/kingfisher-rules/data/rules/hereapi.yml
diff --git a/data/rules/heroku.yml b/crates/kingfisher-rules/data/rules/heroku.yml
similarity index 100%
rename from data/rules/heroku.yml
rename to crates/kingfisher-rules/data/rules/heroku.yml
diff --git a/data/rules/honeycomb.yml b/crates/kingfisher-rules/data/rules/honeycomb.yml
similarity index 100%
rename from data/rules/honeycomb.yml
rename to crates/kingfisher-rules/data/rules/honeycomb.yml
diff --git a/data/rules/http.yml b/crates/kingfisher-rules/data/rules/http.yml
similarity index 100%
rename from data/rules/http.yml
rename to crates/kingfisher-rules/data/rules/http.yml
diff --git a/data/rules/hubspot.yml b/crates/kingfisher-rules/data/rules/hubspot.yml
similarity index 100%
rename from data/rules/hubspot.yml
rename to crates/kingfisher-rules/data/rules/hubspot.yml
diff --git a/data/rules/huggingface.yml b/crates/kingfisher-rules/data/rules/huggingface.yml
similarity index 100%
rename from data/rules/huggingface.yml
rename to crates/kingfisher-rules/data/rules/huggingface.yml
diff --git a/data/rules/ibm.yml b/crates/kingfisher-rules/data/rules/ibm.yml
similarity index 100%
rename from data/rules/ibm.yml
rename to crates/kingfisher-rules/data/rules/ibm.yml
diff --git a/data/rules/imagekit.yml b/crates/kingfisher-rules/data/rules/imagekit.yml
similarity index 100%
rename from data/rules/imagekit.yml
rename to crates/kingfisher-rules/data/rules/imagekit.yml
diff --git a/data/rules/infracost.yml b/crates/kingfisher-rules/data/rules/infracost.yml
similarity index 100%
rename from data/rules/infracost.yml
rename to crates/kingfisher-rules/data/rules/infracost.yml
diff --git a/data/rules/infura.yml b/crates/kingfisher-rules/data/rules/infura.yml
similarity index 100%
rename from data/rules/infura.yml
rename to crates/kingfisher-rules/data/rules/infura.yml
diff --git a/data/rules/instantly.yml b/crates/kingfisher-rules/data/rules/instantly.yml
similarity index 100%
rename from data/rules/instantly.yml
rename to crates/kingfisher-rules/data/rules/instantly.yml
diff --git a/data/rules/intercom.yml b/crates/kingfisher-rules/data/rules/intercom.yml
similarity index 100%
rename from data/rules/intercom.yml
rename to crates/kingfisher-rules/data/rules/intercom.yml
diff --git a/data/rules/intra42.yml b/crates/kingfisher-rules/data/rules/intra42.yml
similarity index 100%
rename from data/rules/intra42.yml
rename to crates/kingfisher-rules/data/rules/intra42.yml
diff --git a/data/rules/ionic.yml b/crates/kingfisher-rules/data/rules/ionic.yml
similarity index 100%
rename from data/rules/ionic.yml
rename to crates/kingfisher-rules/data/rules/ionic.yml
diff --git a/data/rules/ipstack.yml b/crates/kingfisher-rules/data/rules/ipstack.yml
similarity index 100%
rename from data/rules/ipstack.yml
rename to crates/kingfisher-rules/data/rules/ipstack.yml
diff --git a/data/rules/jdbc.yml b/crates/kingfisher-rules/data/rules/jdbc.yml
similarity index 100%
rename from data/rules/jdbc.yml
rename to crates/kingfisher-rules/data/rules/jdbc.yml
diff --git a/data/rules/jenkins.yml b/crates/kingfisher-rules/data/rules/jenkins.yml
similarity index 100%
rename from data/rules/jenkins.yml
rename to crates/kingfisher-rules/data/rules/jenkins.yml
diff --git a/data/rules/jina.yml b/crates/kingfisher-rules/data/rules/jina.yml
similarity index 100%
rename from data/rules/jina.yml
rename to crates/kingfisher-rules/data/rules/jina.yml
diff --git a/data/rules/jira.yml b/crates/kingfisher-rules/data/rules/jira.yml
similarity index 100%
rename from data/rules/jira.yml
rename to crates/kingfisher-rules/data/rules/jira.yml
diff --git a/data/rules/jotform.yml b/crates/kingfisher-rules/data/rules/jotform.yml
similarity index 100%
rename from data/rules/jotform.yml
rename to crates/kingfisher-rules/data/rules/jotform.yml
diff --git a/data/rules/jumpcloud.yml b/crates/kingfisher-rules/data/rules/jumpcloud.yml
similarity index 100%
rename from data/rules/jumpcloud.yml
rename to crates/kingfisher-rules/data/rules/jumpcloud.yml
diff --git a/data/rules/jwt.yml b/crates/kingfisher-rules/data/rules/jwt.yml
similarity index 100%
rename from data/rules/jwt.yml
rename to crates/kingfisher-rules/data/rules/jwt.yml
diff --git a/data/rules/kagi.yml b/crates/kingfisher-rules/data/rules/kagi.yml
similarity index 100%
rename from data/rules/kagi.yml
rename to crates/kingfisher-rules/data/rules/kagi.yml
diff --git a/data/rules/kickbox.yml b/crates/kingfisher-rules/data/rules/kickbox.yml
similarity index 100%
rename from data/rules/kickbox.yml
rename to crates/kingfisher-rules/data/rules/kickbox.yml
diff --git a/data/rules/klaviyo.yml b/crates/kingfisher-rules/data/rules/klaviyo.yml
similarity index 100%
rename from data/rules/klaviyo.yml
rename to crates/kingfisher-rules/data/rules/klaviyo.yml
diff --git a/data/rules/klingai.yml b/crates/kingfisher-rules/data/rules/klingai.yml
similarity index 100%
rename from data/rules/klingai.yml
rename to crates/kingfisher-rules/data/rules/klingai.yml
diff --git a/data/rules/langchain.yml b/crates/kingfisher-rules/data/rules/langchain.yml
similarity index 100%
rename from data/rules/langchain.yml
rename to crates/kingfisher-rules/data/rules/langchain.yml
diff --git a/data/rules/lark.yml b/crates/kingfisher-rules/data/rules/lark.yml
similarity index 100%
rename from data/rules/lark.yml
rename to crates/kingfisher-rules/data/rules/lark.yml
diff --git a/data/rules/launchdarkly.yml b/crates/kingfisher-rules/data/rules/launchdarkly.yml
similarity index 100%
rename from data/rules/launchdarkly.yml
rename to crates/kingfisher-rules/data/rules/launchdarkly.yml
diff --git a/data/rules/line.yml b/crates/kingfisher-rules/data/rules/line.yml
similarity index 100%
rename from data/rules/line.yml
rename to crates/kingfisher-rules/data/rules/line.yml
diff --git a/data/rules/linear.yml b/crates/kingfisher-rules/data/rules/linear.yml
similarity index 100%
rename from data/rules/linear.yml
rename to crates/kingfisher-rules/data/rules/linear.yml
diff --git a/data/rules/linkedin.yml b/crates/kingfisher-rules/data/rules/linkedin.yml
similarity index 100%
rename from data/rules/linkedin.yml
rename to crates/kingfisher-rules/data/rules/linkedin.yml
diff --git a/data/rules/lob.yml b/crates/kingfisher-rules/data/rules/lob.yml
similarity index 100%
rename from data/rules/lob.yml
rename to crates/kingfisher-rules/data/rules/lob.yml
diff --git a/data/rules/looker.yml b/crates/kingfisher-rules/data/rules/looker.yml
similarity index 100%
rename from data/rules/looker.yml
rename to crates/kingfisher-rules/data/rules/looker.yml
diff --git a/data/rules/mailchimp.yml b/crates/kingfisher-rules/data/rules/mailchimp.yml
similarity index 100%
rename from data/rules/mailchimp.yml
rename to crates/kingfisher-rules/data/rules/mailchimp.yml
diff --git a/data/rules/mailgun.yml b/crates/kingfisher-rules/data/rules/mailgun.yml
similarity index 100%
rename from data/rules/mailgun.yml
rename to crates/kingfisher-rules/data/rules/mailgun.yml
diff --git a/data/rules/mailjet.yml b/crates/kingfisher-rules/data/rules/mailjet.yml
similarity index 100%
rename from data/rules/mailjet.yml
rename to crates/kingfisher-rules/data/rules/mailjet.yml
diff --git a/data/rules/mandrill.yml b/crates/kingfisher-rules/data/rules/mandrill.yml
similarity index 100%
rename from data/rules/mandrill.yml
rename to crates/kingfisher-rules/data/rules/mandrill.yml
diff --git a/data/rules/mapbox.yml b/crates/kingfisher-rules/data/rules/mapbox.yml
similarity index 100%
rename from data/rules/mapbox.yml
rename to crates/kingfisher-rules/data/rules/mapbox.yml
diff --git a/data/rules/mattermost.yml b/crates/kingfisher-rules/data/rules/mattermost.yml
similarity index 100%
rename from data/rules/mattermost.yml
rename to crates/kingfisher-rules/data/rules/mattermost.yml
diff --git a/data/rules/maxmind.yml b/crates/kingfisher-rules/data/rules/maxmind.yml
similarity index 100%
rename from data/rules/maxmind.yml
rename to crates/kingfisher-rules/data/rules/maxmind.yml
diff --git a/data/rules/mergify.yml b/crates/kingfisher-rules/data/rules/mergify.yml
similarity index 100%
rename from data/rules/mergify.yml
rename to crates/kingfisher-rules/data/rules/mergify.yml
diff --git a/data/rules/messagebird.yml b/crates/kingfisher-rules/data/rules/messagebird.yml
similarity index 100%
rename from data/rules/messagebird.yml
rename to crates/kingfisher-rules/data/rules/messagebird.yml
diff --git a/data/rules/microsoft_teams.yml b/crates/kingfisher-rules/data/rules/microsoft_teams.yml
similarity index 100%
rename from data/rules/microsoft_teams.yml
rename to crates/kingfisher-rules/data/rules/microsoft_teams.yml
diff --git a/data/rules/microsoftteamswebhook.yml b/crates/kingfisher-rules/data/rules/microsoftteamswebhook.yml
similarity index 100%
rename from data/rules/microsoftteamswebhook.yml
rename to crates/kingfisher-rules/data/rules/microsoftteamswebhook.yml
diff --git a/data/rules/mistral.yml b/crates/kingfisher-rules/data/rules/mistral.yml
similarity index 100%
rename from data/rules/mistral.yml
rename to crates/kingfisher-rules/data/rules/mistral.yml
diff --git a/data/rules/monday.yml b/crates/kingfisher-rules/data/rules/monday.yml
similarity index 100%
rename from data/rules/monday.yml
rename to crates/kingfisher-rules/data/rules/monday.yml
diff --git a/data/rules/mongodb.yml b/crates/kingfisher-rules/data/rules/mongodb.yml
similarity index 100%
rename from data/rules/mongodb.yml
rename to crates/kingfisher-rules/data/rules/mongodb.yml
diff --git a/data/rules/mysql.yml b/crates/kingfisher-rules/data/rules/mysql.yml
similarity index 100%
rename from data/rules/mysql.yml
rename to crates/kingfisher-rules/data/rules/mysql.yml
diff --git a/data/rules/nasa.yml b/crates/kingfisher-rules/data/rules/nasa.yml
similarity index 100%
rename from data/rules/nasa.yml
rename to crates/kingfisher-rules/data/rules/nasa.yml
diff --git a/data/rules/netlify.yml b/crates/kingfisher-rules/data/rules/netlify.yml
similarity index 100%
rename from data/rules/netlify.yml
rename to crates/kingfisher-rules/data/rules/netlify.yml
diff --git a/data/rules/netrc.yml b/crates/kingfisher-rules/data/rules/netrc.yml
similarity index 100%
rename from data/rules/netrc.yml
rename to crates/kingfisher-rules/data/rules/netrc.yml
diff --git a/data/rules/newrelic.yml b/crates/kingfisher-rules/data/rules/newrelic.yml
similarity index 100%
rename from data/rules/newrelic.yml
rename to crates/kingfisher-rules/data/rules/newrelic.yml
diff --git a/data/rules/ngrok.yml b/crates/kingfisher-rules/data/rules/ngrok.yml
similarity index 100%
rename from data/rules/ngrok.yml
rename to crates/kingfisher-rules/data/rules/ngrok.yml
diff --git a/data/rules/notion.yml b/crates/kingfisher-rules/data/rules/notion.yml
similarity index 100%
rename from data/rules/notion.yml
rename to crates/kingfisher-rules/data/rules/notion.yml
diff --git a/data/rules/npm.yml b/crates/kingfisher-rules/data/rules/npm.yml
similarity index 100%
rename from data/rules/npm.yml
rename to crates/kingfisher-rules/data/rules/npm.yml
diff --git a/data/rules/nuget.yml b/crates/kingfisher-rules/data/rules/nuget.yml
similarity index 100%
rename from data/rules/nuget.yml
rename to crates/kingfisher-rules/data/rules/nuget.yml
diff --git a/data/rules/nvidia.yml b/crates/kingfisher-rules/data/rules/nvidia.yml
similarity index 100%
rename from data/rules/nvidia.yml
rename to crates/kingfisher-rules/data/rules/nvidia.yml
diff --git a/data/rules/nylas.yml b/crates/kingfisher-rules/data/rules/nylas.yml
similarity index 100%
rename from data/rules/nylas.yml
rename to crates/kingfisher-rules/data/rules/nylas.yml
diff --git a/data/rules/nytimes.yml b/crates/kingfisher-rules/data/rules/nytimes.yml
similarity index 100%
rename from data/rules/nytimes.yml
rename to crates/kingfisher-rules/data/rules/nytimes.yml
diff --git a/data/rules/odbc.yml b/crates/kingfisher-rules/data/rules/odbc.yml
similarity index 100%
rename from data/rules/odbc.yml
rename to crates/kingfisher-rules/data/rules/odbc.yml
diff --git a/data/rules/okta.yml b/crates/kingfisher-rules/data/rules/okta.yml
similarity index 100%
rename from data/rules/okta.yml
rename to crates/kingfisher-rules/data/rules/okta.yml
diff --git a/data/rules/ollama.yml b/crates/kingfisher-rules/data/rules/ollama.yml
similarity index 100%
rename from data/rules/ollama.yml
rename to crates/kingfisher-rules/data/rules/ollama.yml
diff --git a/data/rules/onepassword.yml b/crates/kingfisher-rules/data/rules/onepassword.yml
similarity index 100%
rename from data/rules/onepassword.yml
rename to crates/kingfisher-rules/data/rules/onepassword.yml
diff --git a/data/rules/openai.yml b/crates/kingfisher-rules/data/rules/openai.yml
similarity index 100%
rename from data/rules/openai.yml
rename to crates/kingfisher-rules/data/rules/openai.yml
diff --git a/data/rules/openrouter.yml b/crates/kingfisher-rules/data/rules/openrouter.yml
similarity index 100%
rename from data/rules/openrouter.yml
rename to crates/kingfisher-rules/data/rules/openrouter.yml
diff --git a/data/rules/openweathermap.yml b/crates/kingfisher-rules/data/rules/openweathermap.yml
similarity index 100%
rename from data/rules/openweathermap.yml
rename to crates/kingfisher-rules/data/rules/openweathermap.yml
diff --git a/data/rules/opsgenie.yml b/crates/kingfisher-rules/data/rules/opsgenie.yml
similarity index 100%
rename from data/rules/opsgenie.yml
rename to crates/kingfisher-rules/data/rules/opsgenie.yml
diff --git a/data/rules/optimizely.yml b/crates/kingfisher-rules/data/rules/optimizely.yml
similarity index 100%
rename from data/rules/optimizely.yml
rename to crates/kingfisher-rules/data/rules/optimizely.yml
diff --git a/data/rules/owlbot.yml b/crates/kingfisher-rules/data/rules/owlbot.yml
similarity index 100%
rename from data/rules/owlbot.yml
rename to crates/kingfisher-rules/data/rules/owlbot.yml
diff --git a/data/rules/packagecloud.yml b/crates/kingfisher-rules/data/rules/packagecloud.yml
similarity index 100%
rename from data/rules/packagecloud.yml
rename to crates/kingfisher-rules/data/rules/packagecloud.yml
diff --git a/data/rules/pagerdutyapikey.yml b/crates/kingfisher-rules/data/rules/pagerdutyapikey.yml
similarity index 100%
rename from data/rules/pagerdutyapikey.yml
rename to crates/kingfisher-rules/data/rules/pagerdutyapikey.yml
diff --git a/data/rules/particle.io.yml b/crates/kingfisher-rules/data/rules/particle.io.yml
similarity index 100%
rename from data/rules/particle.io.yml
rename to crates/kingfisher-rules/data/rules/particle.io.yml
diff --git a/data/rules/pastebin.yml b/crates/kingfisher-rules/data/rules/pastebin.yml
similarity index 100%
rename from data/rules/pastebin.yml
rename to crates/kingfisher-rules/data/rules/pastebin.yml
diff --git a/data/rules/paypal.yml b/crates/kingfisher-rules/data/rules/paypal.yml
similarity index 100%
rename from data/rules/paypal.yml
rename to crates/kingfisher-rules/data/rules/paypal.yml
diff --git a/data/rules/paystack.yml b/crates/kingfisher-rules/data/rules/paystack.yml
similarity index 100%
rename from data/rules/paystack.yml
rename to crates/kingfisher-rules/data/rules/paystack.yml
diff --git a/data/rules/pdflayer.yml b/crates/kingfisher-rules/data/rules/pdflayer.yml
similarity index 100%
rename from data/rules/pdflayer.yml
rename to crates/kingfisher-rules/data/rules/pdflayer.yml
diff --git a/data/rules/pem.yml b/crates/kingfisher-rules/data/rules/pem.yml
similarity index 100%
rename from data/rules/pem.yml
rename to crates/kingfisher-rules/data/rules/pem.yml
diff --git a/data/rules/perplexity.yml b/crates/kingfisher-rules/data/rules/perplexity.yml
similarity index 100%
rename from data/rules/perplexity.yml
rename to crates/kingfisher-rules/data/rules/perplexity.yml
diff --git a/data/rules/phpmailer.yml b/crates/kingfisher-rules/data/rules/phpmailer.yml
similarity index 100%
rename from data/rules/phpmailer.yml
rename to crates/kingfisher-rules/data/rules/phpmailer.yml
diff --git a/data/rules/plaid.yml b/crates/kingfisher-rules/data/rules/plaid.yml
similarity index 100%
rename from data/rules/plaid.yml
rename to crates/kingfisher-rules/data/rules/plaid.yml
diff --git a/data/rules/planetscale.yml b/crates/kingfisher-rules/data/rules/planetscale.yml
similarity index 100%
rename from data/rules/planetscale.yml
rename to crates/kingfisher-rules/data/rules/planetscale.yml
diff --git a/data/rules/postgres.yml b/crates/kingfisher-rules/data/rules/postgres.yml
similarity index 100%
rename from data/rules/postgres.yml
rename to crates/kingfisher-rules/data/rules/postgres.yml
diff --git a/data/rules/posthog.yml b/crates/kingfisher-rules/data/rules/posthog.yml
similarity index 100%
rename from data/rules/posthog.yml
rename to crates/kingfisher-rules/data/rules/posthog.yml
diff --git a/data/rules/postman.yml b/crates/kingfisher-rules/data/rules/postman.yml
similarity index 100%
rename from data/rules/postman.yml
rename to crates/kingfisher-rules/data/rules/postman.yml
diff --git a/data/rules/postmark.yml b/crates/kingfisher-rules/data/rules/postmark.yml
similarity index 100%
rename from data/rules/postmark.yml
rename to crates/kingfisher-rules/data/rules/postmark.yml
diff --git a/data/rules/prefect.yml b/crates/kingfisher-rules/data/rules/prefect.yml
similarity index 100%
rename from data/rules/prefect.yml
rename to crates/kingfisher-rules/data/rules/prefect.yml
diff --git a/data/rules/privkey.yml b/crates/kingfisher-rules/data/rules/privkey.yml
similarity index 100%
rename from data/rules/privkey.yml
rename to crates/kingfisher-rules/data/rules/privkey.yml
diff --git a/data/rules/psexec.yml b/crates/kingfisher-rules/data/rules/psexec.yml
similarity index 100%
rename from data/rules/psexec.yml
rename to crates/kingfisher-rules/data/rules/psexec.yml
diff --git a/data/rules/pubnub.yml b/crates/kingfisher-rules/data/rules/pubnub.yml
similarity index 100%
rename from data/rules/pubnub.yml
rename to crates/kingfisher-rules/data/rules/pubnub.yml
diff --git a/data/rules/pulumi.yml b/crates/kingfisher-rules/data/rules/pulumi.yml
similarity index 100%
rename from data/rules/pulumi.yml
rename to crates/kingfisher-rules/data/rules/pulumi.yml
diff --git a/data/rules/pypi.yml b/crates/kingfisher-rules/data/rules/pypi.yml
similarity index 100%
rename from data/rules/pypi.yml
rename to crates/kingfisher-rules/data/rules/pypi.yml
diff --git a/data/rules/rabbitmq.yml b/crates/kingfisher-rules/data/rules/rabbitmq.yml
similarity index 100%
rename from data/rules/rabbitmq.yml
rename to crates/kingfisher-rules/data/rules/rabbitmq.yml
diff --git a/data/rules/rapidapi.yml b/crates/kingfisher-rules/data/rules/rapidapi.yml
similarity index 100%
rename from data/rules/rapidapi.yml
rename to crates/kingfisher-rules/data/rules/rapidapi.yml
diff --git a/data/rules/react.yml b/crates/kingfisher-rules/data/rules/react.yml
similarity index 100%
rename from data/rules/react.yml
rename to crates/kingfisher-rules/data/rules/react.yml
diff --git a/data/rules/readme.yml b/crates/kingfisher-rules/data/rules/readme.yml
similarity index 100%
rename from data/rules/readme.yml
rename to crates/kingfisher-rules/data/rules/readme.yml
diff --git a/data/rules/recaptcha.yml b/crates/kingfisher-rules/data/rules/recaptcha.yml
similarity index 100%
rename from data/rules/recaptcha.yml
rename to crates/kingfisher-rules/data/rules/recaptcha.yml
diff --git a/data/rules/replicate.yml b/crates/kingfisher-rules/data/rules/replicate.yml
similarity index 100%
rename from data/rules/replicate.yml
rename to crates/kingfisher-rules/data/rules/replicate.yml
diff --git a/data/rules/resend.yml b/crates/kingfisher-rules/data/rules/resend.yml
similarity index 100%
rename from data/rules/resend.yml
rename to crates/kingfisher-rules/data/rules/resend.yml
diff --git a/data/rules/retellai.yml b/crates/kingfisher-rules/data/rules/retellai.yml
similarity index 100%
rename from data/rules/retellai.yml
rename to crates/kingfisher-rules/data/rules/retellai.yml
diff --git a/data/rules/riot.yml b/crates/kingfisher-rules/data/rules/riot.yml
similarity index 100%
rename from data/rules/riot.yml
rename to crates/kingfisher-rules/data/rules/riot.yml
diff --git a/data/rules/rubygems.yml b/crates/kingfisher-rules/data/rules/rubygems.yml
similarity index 100%
rename from data/rules/rubygems.yml
rename to crates/kingfisher-rules/data/rules/rubygems.yml
diff --git a/data/rules/runway.yml b/crates/kingfisher-rules/data/rules/runway.yml
similarity index 100%
rename from data/rules/runway.yml
rename to crates/kingfisher-rules/data/rules/runway.yml
diff --git a/data/rules/salesforce.yml b/crates/kingfisher-rules/data/rules/salesforce.yml
similarity index 100%
rename from data/rules/salesforce.yml
rename to crates/kingfisher-rules/data/rules/salesforce.yml
diff --git a/data/rules/sauce.yml b/crates/kingfisher-rules/data/rules/sauce.yml
similarity index 100%
rename from data/rules/sauce.yml
rename to crates/kingfisher-rules/data/rules/sauce.yml
diff --git a/data/rules/scale.yml b/crates/kingfisher-rules/data/rules/scale.yml
similarity index 100%
rename from data/rules/scale.yml
rename to crates/kingfisher-rules/data/rules/scale.yml
diff --git a/data/rules/scalingo.yml b/crates/kingfisher-rules/data/rules/scalingo.yml
similarity index 100%
rename from data/rules/scalingo.yml
rename to crates/kingfisher-rules/data/rules/scalingo.yml
diff --git a/data/rules/scraperapi.yml b/crates/kingfisher-rules/data/rules/scraperapi.yml
similarity index 100%
rename from data/rules/scraperapi.yml
rename to crates/kingfisher-rules/data/rules/scraperapi.yml
diff --git a/data/rules/segment.yml b/crates/kingfisher-rules/data/rules/segment.yml
similarity index 100%
rename from data/rules/segment.yml
rename to crates/kingfisher-rules/data/rules/segment.yml
diff --git a/data/rules/sendbird.yml b/crates/kingfisher-rules/data/rules/sendbird.yml
similarity index 100%
rename from data/rules/sendbird.yml
rename to crates/kingfisher-rules/data/rules/sendbird.yml
diff --git a/data/rules/sendgrid.yml b/crates/kingfisher-rules/data/rules/sendgrid.yml
similarity index 100%
rename from data/rules/sendgrid.yml
rename to crates/kingfisher-rules/data/rules/sendgrid.yml
diff --git a/data/rules/sendinblue.yml b/crates/kingfisher-rules/data/rules/sendinblue.yml
similarity index 100%
rename from data/rules/sendinblue.yml
rename to crates/kingfisher-rules/data/rules/sendinblue.yml
diff --git a/data/rules/sentry.yml b/crates/kingfisher-rules/data/rules/sentry.yml
similarity index 100%
rename from data/rules/sentry.yml
rename to crates/kingfisher-rules/data/rules/sentry.yml
diff --git a/data/rules/shippo.yml b/crates/kingfisher-rules/data/rules/shippo.yml
similarity index 100%
rename from data/rules/shippo.yml
rename to crates/kingfisher-rules/data/rules/shippo.yml
diff --git a/data/rules/shodan.yml b/crates/kingfisher-rules/data/rules/shodan.yml
similarity index 100%
rename from data/rules/shodan.yml
rename to crates/kingfisher-rules/data/rules/shodan.yml
diff --git a/data/rules/shopify.yml b/crates/kingfisher-rules/data/rules/shopify.yml
similarity index 100%
rename from data/rules/shopify.yml
rename to crates/kingfisher-rules/data/rules/shopify.yml
diff --git a/data/rules/slack.yml b/crates/kingfisher-rules/data/rules/slack.yml
similarity index 100%
rename from data/rules/slack.yml
rename to crates/kingfisher-rules/data/rules/slack.yml
diff --git a/data/rules/snyk.yml b/crates/kingfisher-rules/data/rules/snyk.yml
similarity index 100%
rename from data/rules/snyk.yml
rename to crates/kingfisher-rules/data/rules/snyk.yml
diff --git a/data/rules/sonarcloud.yml b/crates/kingfisher-rules/data/rules/sonarcloud.yml
similarity index 100%
rename from data/rules/sonarcloud.yml
rename to crates/kingfisher-rules/data/rules/sonarcloud.yml
diff --git a/data/rules/sonarqube.yml b/crates/kingfisher-rules/data/rules/sonarqube.yml
similarity index 100%
rename from data/rules/sonarqube.yml
rename to crates/kingfisher-rules/data/rules/sonarqube.yml
diff --git a/data/rules/sourcegraph.yml b/crates/kingfisher-rules/data/rules/sourcegraph.yml
similarity index 100%
rename from data/rules/sourcegraph.yml
rename to crates/kingfisher-rules/data/rules/sourcegraph.yml
diff --git a/data/rules/square.yml b/crates/kingfisher-rules/data/rules/square.yml
similarity index 100%
rename from data/rules/square.yml
rename to crates/kingfisher-rules/data/rules/square.yml
diff --git a/data/rules/sslmate.yml b/crates/kingfisher-rules/data/rules/sslmate.yml
similarity index 100%
rename from data/rules/sslmate.yml
rename to crates/kingfisher-rules/data/rules/sslmate.yml
diff --git a/data/rules/stabilityai.yml b/crates/kingfisher-rules/data/rules/stabilityai.yml
similarity index 100%
rename from data/rules/stabilityai.yml
rename to crates/kingfisher-rules/data/rules/stabilityai.yml
diff --git a/data/rules/stackhawk.yml b/crates/kingfisher-rules/data/rules/stackhawk.yml
similarity index 100%
rename from data/rules/stackhawk.yml
rename to crates/kingfisher-rules/data/rules/stackhawk.yml
diff --git a/data/rules/statuspage.yml b/crates/kingfisher-rules/data/rules/statuspage.yml
similarity index 100%
rename from data/rules/statuspage.yml
rename to crates/kingfisher-rules/data/rules/statuspage.yml
diff --git a/data/rules/stripe.yml b/crates/kingfisher-rules/data/rules/stripe.yml
similarity index 100%
rename from data/rules/stripe.yml
rename to crates/kingfisher-rules/data/rules/stripe.yml
diff --git a/data/rules/supabase.yml b/crates/kingfisher-rules/data/rules/supabase.yml
similarity index 100%
rename from data/rules/supabase.yml
rename to crates/kingfisher-rules/data/rules/supabase.yml
diff --git a/data/rules/tailscale.yml b/crates/kingfisher-rules/data/rules/tailscale.yml
similarity index 100%
rename from data/rules/tailscale.yml
rename to crates/kingfisher-rules/data/rules/tailscale.yml
diff --git a/data/rules/tavily.yml b/crates/kingfisher-rules/data/rules/tavily.yml
similarity index 100%
rename from data/rules/tavily.yml
rename to crates/kingfisher-rules/data/rules/tavily.yml
diff --git a/data/rules/teamcity.yml b/crates/kingfisher-rules/data/rules/teamcity.yml
similarity index 100%
rename from data/rules/teamcity.yml
rename to crates/kingfisher-rules/data/rules/teamcity.yml
diff --git a/data/rules/telegram.yml b/crates/kingfisher-rules/data/rules/telegram.yml
similarity index 100%
rename from data/rules/telegram.yml
rename to crates/kingfisher-rules/data/rules/telegram.yml
diff --git a/data/rules/thingsboard.yml b/crates/kingfisher-rules/data/rules/thingsboard.yml
similarity index 100%
rename from data/rules/thingsboard.yml
rename to crates/kingfisher-rules/data/rules/thingsboard.yml
diff --git a/data/rules/togetherai.yml b/crates/kingfisher-rules/data/rules/togetherai.yml
similarity index 100%
rename from data/rules/togetherai.yml
rename to crates/kingfisher-rules/data/rules/togetherai.yml
diff --git a/data/rules/travisci.yml b/crates/kingfisher-rules/data/rules/travisci.yml
similarity index 100%
rename from data/rules/travisci.yml
rename to crates/kingfisher-rules/data/rules/travisci.yml
diff --git a/data/rules/truenas.yml b/crates/kingfisher-rules/data/rules/truenas.yml
similarity index 100%
rename from data/rules/truenas.yml
rename to crates/kingfisher-rules/data/rules/truenas.yml
diff --git a/data/rules/twilio.yml b/crates/kingfisher-rules/data/rules/twilio.yml
similarity index 100%
rename from data/rules/twilio.yml
rename to crates/kingfisher-rules/data/rules/twilio.yml
diff --git a/data/rules/twitch.yml b/crates/kingfisher-rules/data/rules/twitch.yml
similarity index 100%
rename from data/rules/twitch.yml
rename to crates/kingfisher-rules/data/rules/twitch.yml
diff --git a/data/rules/twitter.yml b/crates/kingfisher-rules/data/rules/twitter.yml
similarity index 100%
rename from data/rules/twitter.yml
rename to crates/kingfisher-rules/data/rules/twitter.yml
diff --git a/data/rules/typeform.yml b/crates/kingfisher-rules/data/rules/typeform.yml
similarity index 100%
rename from data/rules/typeform.yml
rename to crates/kingfisher-rules/data/rules/typeform.yml
diff --git a/data/rules/uri.yml b/crates/kingfisher-rules/data/rules/uri.yml
similarity index 100%
rename from data/rules/uri.yml
rename to crates/kingfisher-rules/data/rules/uri.yml
diff --git a/data/rules/vastai.yml b/crates/kingfisher-rules/data/rules/vastai.yml
similarity index 100%
rename from data/rules/vastai.yml
rename to crates/kingfisher-rules/data/rules/vastai.yml
diff --git a/data/rules/vercel.yml b/crates/kingfisher-rules/data/rules/vercel.yml
similarity index 100%
rename from data/rules/vercel.yml
rename to crates/kingfisher-rules/data/rules/vercel.yml
diff --git a/data/rules/vmware.yml b/crates/kingfisher-rules/data/rules/vmware.yml
similarity index 100%
rename from data/rules/vmware.yml
rename to crates/kingfisher-rules/data/rules/vmware.yml
diff --git a/data/rules/voyageai.yml b/crates/kingfisher-rules/data/rules/voyageai.yml
similarity index 100%
rename from data/rules/voyageai.yml
rename to crates/kingfisher-rules/data/rules/voyageai.yml
diff --git a/data/rules/weightsandbiases.yml b/crates/kingfisher-rules/data/rules/weightsandbiases.yml
similarity index 100%
rename from data/rules/weightsandbiases.yml
rename to crates/kingfisher-rules/data/rules/weightsandbiases.yml
diff --git a/data/rules/wireguard.yml b/crates/kingfisher-rules/data/rules/wireguard.yml
similarity index 100%
rename from data/rules/wireguard.yml
rename to crates/kingfisher-rules/data/rules/wireguard.yml
diff --git a/data/rules/xAI.yml b/crates/kingfisher-rules/data/rules/xAI.yml
similarity index 100%
rename from data/rules/xAI.yml
rename to crates/kingfisher-rules/data/rules/xAI.yml
diff --git a/data/rules/yandex.yml b/crates/kingfisher-rules/data/rules/yandex.yml
similarity index 100%
rename from data/rules/yandex.yml
rename to crates/kingfisher-rules/data/rules/yandex.yml
diff --git a/data/rules/yelp.yml b/crates/kingfisher-rules/data/rules/yelp.yml
similarity index 100%
rename from data/rules/yelp.yml
rename to crates/kingfisher-rules/data/rules/yelp.yml
diff --git a/data/rules/youtube.yml b/crates/kingfisher-rules/data/rules/youtube.yml
similarity index 100%
rename from data/rules/youtube.yml
rename to crates/kingfisher-rules/data/rules/youtube.yml
diff --git a/data/rules/zhipu.yml b/crates/kingfisher-rules/data/rules/zhipu.yml
similarity index 100%
rename from data/rules/zhipu.yml
rename to crates/kingfisher-rules/data/rules/zhipu.yml
diff --git a/data/rules/zohocrm.yml b/crates/kingfisher-rules/data/rules/zohocrm.yml
similarity index 100%
rename from data/rules/zohocrm.yml
rename to crates/kingfisher-rules/data/rules/zohocrm.yml
diff --git a/data/rules/zuplo.yml b/crates/kingfisher-rules/data/rules/zuplo.yml
similarity index 100%
rename from data/rules/zuplo.yml
rename to crates/kingfisher-rules/data/rules/zuplo.yml
diff --git a/crates/kingfisher-rules/src/defaults.rs b/crates/kingfisher-rules/src/defaults.rs
new file mode 100644
index 0000000..8ce6a38
--- /dev/null
+++ b/crates/kingfisher-rules/src/defaults.rs
@@ -0,0 +1,57 @@
+//! Builtin rules embedded in the kingfisher-rules crate.
+
+use std::path::Path;
+
+use anyhow::Result;
+use include_dir::{include_dir, Dir, DirEntry};
+
+use crate::rule::Confidence;
+use crate::rules::Rules;
+
+/// The embedded rules directory.
+pub static DEFAULT_RULES_DIR: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/data");
+
+fn load_yaml_files<'a>(dir: &'a Dir<'a>) -> Vec<(&'a Path, &'a [u8])> {
+    let mut files = Vec::new();
+    collect_yaml_files(dir, &mut files);
+    files
+}
+
+fn collect_yaml_files<'a>(dir: &'a Dir<'a>, files: &mut Vec<(&'a Path, &'a [u8])>) {
+    for entry in dir.entries() {
+        match entry {
+            DirEntry::Dir(subdir) => collect_yaml_files(subdir, files),
+            DirEntry::File(file) => {
+                if file.path().extension().map_or(false, |ext| ext == "yml" || ext == "yaml") {
+                    files.push((file.path(), file.contents()));
+                }
+            }
+        }
+    }
+}
+
+/// Load the default YAML rule files, returning their pathnames and contents.
+fn get_default_rule_files() -> Vec<(&'static Path, &'static [u8])> {
+    let mut yaml_files = load_yaml_files(&DEFAULT_RULES_DIR);
+    yaml_files.sort_by_key(|t| t.0);
+    yaml_files
+}
+
+/// Load the builtin rules from the embedded YAML files.
+///
+/// This loads all rules that meet or exceed the given confidence level.
+/// If no confidence is specified, defaults to `Confidence::Medium`.
+pub fn get_builtin_rules(confidence: Option<Confidence>) -> Result<Rules> {
+    let confidence = confidence.unwrap_or(Confidence::Medium);
+    Rules::from_paths_and_contents(get_default_rule_files(), confidence)
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_get_default_rules() {
+        assert!(get_builtin_rules(None).unwrap().num_rules() >= 100);
+    }
+}
diff --git a/crates/kingfisher-rules/src/lib.rs b/crates/kingfisher-rules/src/lib.rs
new file mode 100644
index 0000000..fd343cb
--- /dev/null
+++ b/crates/kingfisher-rules/src/lib.rs
@@ -0,0 +1,35 @@
+//! Rule definitions and database for the Kingfisher secret scanner.
+//!
+//! This crate provides:
+//! - [`Rule`] and [`RuleSyntax`] - Rule definitions
+//! - [`RulesDatabase`] - Compiled rules ready for scanning
+//! - [`Confidence`] - Rule confidence levels
+//! - [`Rules`] - Rule collection and loading
+//! - YAML parsing for rule files
+//! - Builtin rules embedded in the crate
+
+pub mod defaults;
+pub mod liquid_filters;
+pub mod rule;
+pub mod rules;
+pub mod rules_database;
+
+// Re-export rule types
+pub use rule::{
+    ChecksumActual, ChecksumRequirement, Confidence, DependsOnRule, HttpRequest, HttpValidation,
+    MultipartConfig, MultipartPart, PatternRequirementContext, PatternRequirements,
+    PatternValidationResult, ReportResponseData, ResponseMatcher, Rule, RuleSyntax, Validation,
+    RULE_COMMENTS_PATTERN,
+};
+
+// Re-export Rules collection
+pub use rules::{Rules, RulesError};
+
+// Re-export RulesDatabase
+pub use rules_database::{format_regex_pattern, RulesDatabase};
+
+// Re-export defaults
+pub use defaults::get_builtin_rules;
+
+// Re-export liquid_filters registration
+pub use liquid_filters::register_all as register_liquid_filters;
diff --git a/crates/kingfisher-rules/src/liquid_filters.rs b/crates/kingfisher-rules/src/liquid_filters.rs
new file mode 100644
index 0000000..15a6608
--- /dev/null
+++ b/crates/kingfisher-rules/src/liquid_filters.rs
@@ -0,0 +1,1159 @@
+//! Collection of small Liquid filters that make HTTP validations & API-signing templates easy
+
+use base64::{engine::general_purpose, Engine};
+use crc32fast::Hasher;
+use hmac::{Hmac, Mac};
+use liquid_core::{
+    Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection,
+    FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView,
+};
+
+use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
+use rand::{distr::Alphanumeric, Rng};
+use sha1::Sha1;
+use sha2::{Digest, Sha256, Sha384};
+use time::{format_description::well_known::Iso8601, OffsetDateTime};
+use uuid::Uuid;
+
+// -----------------------------------------------------------------------------
+// Helper macro – keeps most filters <10 lines long
+// -----------------------------------------------------------------------------
+// -- filters.rs (or wherever the macro lives) -------------------------------
+macro_rules! static_filter {
+    // ── original, zero-arg variant ────────────────────────────────
+    (
+        $(#[$outer:meta])*
+        $name:ident, $display:literal, $body:expr
+    ) => {
+        $(#[$outer])*
+        #[derive(Debug, Clone, FilterReflection, ParseFilter, Default)]
+        #[filter(name = $display, description = $display, parsed($name))]
+        pub struct $name;
+
+        impl std::fmt::Display for $name {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                write!(f, $display)
+            }
+        }
+        impl Filter for $name {
+            fn evaluate(
+                &self,
+                input: &dyn ValueView,
+                _runtime: &dyn Runtime,
+            ) -> Result<Value, LiquidError> {
+                Ok(Value::scalar($body(input)))
+            }
+        }
+    };
+
+    // -- NEW, second arm of the macro (add Default) ----------------------------
+(
+    $(#[$outer:meta])*
+    $name:ident { $( $(#[$f_meta:meta])* $field:ident : $ty:ty ),+ $(,)? },
+    $display:literal,
+    $body:expr
+) => {
+    $(#[$outer])*
+    #[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]   // ← added Default
+    #[filter(name = $display, description = $display, parsed($name))]
+    pub struct $name { $( $(#[$f_meta])* pub $field : $ty ),+ }
+
+    impl std::fmt::Display for $name {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, $display)
+        }
+    }
+    impl Filter for $name {
+        fn evaluate(
+            &self,
+            input: &dyn ValueView,
+            _runtime: &dyn Runtime,
+        ) -> Result<Value, LiquidError> {
+            Ok(Value::scalar($body(self, input)))
+        }
+    }
+};
+}
+
+#[derive(Debug, FilterParameters)]
+struct ReplaceArgs {
+    #[parameter(description = "The substring to search for.", arg_type = "str")]
+    from: Expression,
+    #[parameter(description = "The string to replace it with.", arg_type = "str")]
+    to: Expression,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "replace",
+    description = "Replaces every occurrence of a substring with another.",
+    parameters(ReplaceArgs),
+    parsed(ReplaceFilter)
+)]
+pub struct Replace;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "replace"]
+struct ReplaceFilter {
+    #[parameters]
+    args: ReplaceArgs,
+}
+
+impl Filter for ReplaceFilter {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let from = args.from.to_kstr();
+        let to = args.to.to_kstr();
+        let input_str = input.to_kstr();
+        Ok(Value::scalar(input_str.replace(from.as_str(), to.as_str())))
+    }
+}
+
+#[derive(Debug, FilterParameters)]
+struct LstripCharsArgs {
+    #[parameter(
+        description = "Characters to remove from the start of the input.",
+        arg_type = "str"
+    )]
+    chars: Expression,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "lstrip_chars",
+    description = "Removes the provided characters from the beginning of the string.",
+    parameters(LstripCharsArgs),
+    parsed(LstripCharsFilter)
+)]
+pub struct LstripChars;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "lstrip_chars"]
+struct LstripCharsFilter {
+    #[parameters]
+    args: LstripCharsArgs,
+}
+
+impl Filter for LstripCharsFilter {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let chars = args.chars.to_string();
+        let input_str = input.to_kstr();
+        let trimmed = input_str.trim_start_matches(|c| chars.contains(c)).to_string();
+        Ok(Value::scalar(trimmed))
+    }
+}
+
+// ── HMAC args ─────────────────────────────────────
+#[derive(Debug, FilterParameters)]
+struct HmacArgs {
+    #[parameter(description = "HMAC key", arg_type = "str")]
+    key: Expression,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "hmac_sha256",
+    description = "HMAC-SHA256 – returns Base64.",
+    parameters(HmacArgs),
+    parsed(HmacSha256Filter)
+)]
+pub struct HmacSha256;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "hmac_sha256"]
+struct HmacSha256Filter {
+    #[parameters]
+    args: HmacArgs,
+}
+
+impl Filter for HmacSha256Filter {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        // Evaluate the arguments first…
+        let args = self.args.evaluate(runtime)?;
+        let key = args.key.to_kstr(); // evaluated to literal/variable value
+
+        // …then do the cryptography.
+        let mut mac = Hmac::<Sha256>::new_from_slice(key.as_bytes()).unwrap();
+        mac.update(input.to_kstr().as_bytes());
+        Ok(Value::scalar(
+            base64::engine::general_purpose::STANDARD.encode(mac.finalize().into_bytes()),
+        ))
+    }
+}
+
+// ── HMAC-SHA1 ─────────────────────────────────────────────
+#[derive(Debug, FilterParameters)]
+struct HmacSha1Args {
+    #[parameter(description = "HMAC key", arg_type = "str")]
+    key: Expression,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "hmac_sha1",
+    description = "HMAC-SHA1 – returns Base64.",
+    parameters(HmacSha1Args),
+    parsed(HmacSha1Filter)
+)]
+pub struct HmacSha1;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "hmac_sha1"]
+struct HmacSha1Filter {
+    #[parameters]
+    args: HmacSha1Args,
+}
+
+impl Filter for HmacSha1Filter {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        // Evaluate the arguments first…
+        let args = self.args.evaluate(runtime)?;
+        let key = args.key.to_kstr();
+
+        // …then do the cryptography.
+        let mut mac = Hmac::<Sha1>::new_from_slice(key.as_bytes()).unwrap();
+        mac.update(input.to_kstr().as_bytes());
+        Ok(Value::scalar(
+            base64::engine::general_purpose::STANDARD.encode(mac.finalize().into_bytes()),
+        ))
+    }
+}
+
+// ── HMAC-SHA384 ─────────────────────────────────────────────
+#[derive(Debug, FilterParameters)]
+struct Hmac384Args {
+    #[parameter(description = "HMAC key", arg_type = "str")]
+    key: Expression,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "hmac_sha384",
+    description = "HMAC-SHA384 – returns Base64.",
+    parameters(Hmac384Args),
+    parsed(HmacSha384Filter)
+)]
+pub struct HmacSha384;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "hmac_sha384"]
+struct HmacSha384Filter {
+    #[parameters]
+    args: Hmac384Args,
+}
+
+impl Filter for HmacSha384Filter {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        // Evaluate the arguments first…
+        let args = self.args.evaluate(runtime)?;
+        let key = args.key.to_kstr(); // evaluated to literal/variable value
+
+        // …then do the cryptography.
+        let mut mac = Hmac::<Sha384>::new_from_slice(key.as_bytes()).unwrap();
+        mac.update(input.to_kstr().as_bytes());
+        Ok(Value::scalar(
+            base64::engine::general_purpose::STANDARD.encode(mac.finalize().into_bytes()),
+        ))
+    }
+}
+
+// ── random_string ────────────────────────────────
+#[derive(Debug, FilterParameters)]
+struct RandomStringArgs {
+    #[parameter(description = "Desired output length", arg_type = "integer")]
+    len: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "random_string",
+    description = "Random alphanumeric string (default 32 chars).",
+    parameters(RandomStringArgs),
+    parsed(RandomString)
+)]
+pub struct RandomStringFilter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "random_string"]
+struct RandomString {
+    #[parameters]
+    args: RandomStringArgs,
+}
+
+impl Filter for RandomString {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let n = args
+            .len
+            .and_then(|value| {
+                let scalar = Value::scalar(value);
+                value_to_usize(&scalar)
+            })
+            .or_else(|| input.to_kstr().parse().ok())
+            .unwrap_or(32);
+
+        let value: String =
+            rand::rng().sample_iter(&Alphanumeric).take(n).map(char::from).collect();
+
+        Ok(Value::scalar(value))
+    }
+}
+
+#[derive(Debug, FilterParameters)]
+struct SuffixArgs {
+    #[parameter(description = "Number of trailing characters to keep", arg_type = "integer")]
+    len: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "suffix",
+    description = "Return the suffix (last N characters) of the provided string.",
+    parameters(SuffixArgs),
+    parsed(Suffix)
+)]
+pub struct SuffixFilter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "suffix"]
+struct Suffix {
+    #[parameters]
+    args: SuffixArgs,
+}
+
+impl Filter for Suffix {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let text = input.to_kstr();
+        let requested = args
+            .len
+            .and_then(|value| {
+                let scalar = Value::scalar(value);
+                value_to_usize(&scalar)
+            })
+            .unwrap_or_else(|| text.len());
+        if requested == 0 {
+            return Ok(Value::scalar(String::new()));
+        }
+
+        let mut chars: Vec<char> = text.chars().collect();
+        let keep = requested.min(chars.len());
+        chars.drain(0..chars.len().saturating_sub(keep));
+        Ok(Value::scalar(chars.into_iter().collect::<String>()))
+    }
+}
+
+#[derive(Debug, FilterParameters)]
+struct PrefixArgs {
+    #[parameter(description = "Number of leading characters to keep", arg_type = "integer")]
+    len: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "prefix",
+    description = "Return the prefix (first N characters) of the provided string.",
+    parameters(PrefixArgs),
+    parsed(Prefix)
+)]
+pub struct PrefixFilter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "prefix"]
+struct Prefix {
+    #[parameters]
+    args: PrefixArgs,
+}
+
+impl Filter for Prefix {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let text = input.to_kstr();
+        let requested = args
+            .len
+            .and_then(|value| {
+                let scalar = Value::scalar(value);
+                value_to_usize(&scalar)
+            })
+            .unwrap_or_else(|| text.len());
+        if requested == 0 {
+            return Ok(Value::scalar(String::new()));
+        }
+
+        let mut chars: Vec<char> = text.chars().collect();
+        chars.truncate(requested.min(chars.len()));
+        Ok(Value::scalar(chars.into_iter().collect::<String>()))
+    }
+}
+
+#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]
+#[filter(
+    name = "b64enc",
+    description = "Encodes the input string using Base64 encoding",
+    parsed(B64EncFilter)
+)]
+// pub struct B64EncFilterParser;
+
+pub struct B64EncFilter;
+
+impl std::fmt::Display for B64EncFilter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "b64enc")
+    }
+}
+
+impl Filter for B64EncFilter {
+    fn evaluate(
+        &self,
+        input: &dyn ValueView,
+        _runtime: &dyn Runtime,
+    ) -> Result<Value, LiquidError> {
+        let input_str = input.to_kstr().into_owned();
+        let encoded = general_purpose::STANDARD.encode(input_str.as_bytes());
+        Ok(Value::scalar(encoded))
+    }
+}
+
+#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]
+#[filter(name = "b64dec", description = "Decodes a Base64 string", parsed(B64DecFilter))]
+pub struct B64DecFilter;
+
+impl std::fmt::Display for B64DecFilter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "b64dec")
+    }
+}
+
+impl Filter for B64DecFilter {
+    fn evaluate(
+        &self,
+        input: &dyn ValueView,
+        _runtime: &dyn Runtime,
+    ) -> Result<Value, LiquidError> {
+        let input_str = input.to_kstr();
+        match general_purpose::STANDARD.decode(input_str.as_bytes()) {
+            Ok(bytes) => Ok(Value::scalar(String::from_utf8_lossy(&bytes).to_string())),
+            Err(e) => Err(LiquidError::with_msg(e.to_string())),
+        }
+    }
+}
+
+// -----------------------------------------------------------------------------
+// Authentication & Security
+// -----------------------------------------------------------------------------
+
+// {{ value | sha256 }} -- hex digest
+static_filter!(
+    /// SHA-256 hex digest.
+    Sha256Filter, "sha256",
+    |input: &dyn ValueView| -> String {
+        let mut h = Sha256::new();
+        h.update(input.to_kstr().as_bytes());
+        format!("{:x}", h.finalize())
+    }
+);
+
+static_filter!(
+    /// Compute the CRC32 of the input and return it as a decimal number.
+    Crc32Filter,
+    "crc32",
+    |input: &dyn ValueView| -> i64 {
+        let mut hasher = Hasher::new();
+        hasher.update(input.to_kstr().as_bytes());
+        i64::from(hasher.finalize())
+    }
+);
+
+#[derive(Debug, FilterParameters)]
+struct Crc32DecArgs {
+    #[parameter(
+        description = "Number of trailing decimal digits to return (zero padded)",
+        arg_type = "integer"
+    )]
+    digits: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "crc32_dec",
+    description = "Compute the CRC32 and optionally return the last N decimal digits.",
+    parameters(Crc32DecArgs),
+    parsed(Crc32Dec)
+)]
+pub struct Crc32DecFilter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "crc32_dec"]
+struct Crc32Dec {
+    #[parameters]
+    args: Crc32DecArgs,
+}
+
+impl Filter for Crc32Dec {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let mut hasher = Hasher::new();
+        hasher.update(input.to_kstr().as_bytes());
+        let checksum = u128::from(hasher.finalize());
+
+        let digits = args
+            .digits
+            .and_then(|value| {
+                let scalar = Value::scalar(value);
+                value_to_usize(&scalar)
+            })
+            .unwrap_or(0);
+
+        if digits == 0 {
+            return Ok(Value::scalar(checksum.to_string()));
+        }
+
+        let clamped_digits = digits.min(38); // 10^38 fits within u128
+        let modulus = 10u128.pow(clamped_digits as u32);
+        let truncated = checksum % modulus;
+        let mut value = truncated.to_string();
+        if clamped_digits > value.len() {
+            let mut padded = String::with_capacity(clamped_digits);
+            for _ in 0..(clamped_digits - value.len()) {
+                padded.push('0');
+            }
+            padded.push_str(&value);
+            value = padded;
+        }
+
+        Ok(Value::scalar(value))
+    }
+}
+
+#[derive(Debug, FilterParameters)]
+struct Crc32HexArgs {
+    #[parameter(
+        description = "Number of trailing hexadecimal digits to return (zero padded)",
+        arg_type = "integer"
+    )]
+    digits: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "crc32_hex",
+    description = "Compute the CRC32 and optionally return the last N hexadecimal digits.",
+    parameters(Crc32HexArgs),
+    parsed(Crc32Hex)
+)]
+pub struct Crc32HexFilter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "crc32_hex"]
+struct Crc32Hex {
+    #[parameters]
+    args: Crc32HexArgs,
+}
+
+impl Filter for Crc32Hex {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let mut hasher = Hasher::new();
+        hasher.update(input.to_kstr().as_bytes());
+        let checksum = hasher.finalize();
+        let mut hex = format!("{checksum:08x}");
+
+        let digits = args
+            .digits
+            .and_then(|value| {
+                let scalar = Value::scalar(value);
+                value_to_usize(&scalar)
+            })
+            .unwrap_or(0);
+
+        if digits == 0 {
+            return Ok(Value::scalar(hex));
+        }
+
+        let clamped = digits.min(32);
+        if clamped > hex.len() {
+            let mut padded = String::with_capacity(clamped);
+            for _ in 0..(clamped - hex.len()) {
+                padded.push('0');
+            }
+            padded.push_str(&hex);
+            hex = padded;
+        } else {
+            let start = hex.len() - clamped;
+            hex = hex[start..].to_string();
+        }
+
+        Ok(Value::scalar(hex))
+    }
+}
+
+#[derive(Debug, FilterParameters)]
+struct Crc32LeB64Args {
+    #[parameter(
+        description = "Number of leading characters from the Base64 string to keep",
+        arg_type = "integer"
+    )]
+    len: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "crc32_le_b64",
+    description = "Compute the CRC32, encode little-endian bytes as Base64, optionally truncating.",
+    parameters(Crc32LeB64Args),
+    parsed(Crc32LeB64)
+)]
+pub struct Crc32LeB64Filter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "crc32_le_b64"]
+struct Crc32LeB64 {
+    #[parameters]
+    args: Crc32LeB64Args,
+}
+
+impl Filter for Crc32LeB64 {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let mut hasher = Hasher::new();
+        hasher.update(input.to_kstr().as_bytes());
+        let checksum = hasher.finalize();
+        let encoded = general_purpose::STANDARD.encode(checksum.to_le_bytes());
+
+        let output = if let Some(len) = args.len.and_then(|value| {
+            let scalar = Value::scalar(value);
+            value_to_usize(&scalar)
+        }) {
+            encoded.chars().take(len).collect::<String>()
+        } else {
+            encoded
+        };
+
+        Ok(Value::scalar(output))
+    }
+}
+
+#[derive(Debug, FilterParameters)]
+struct Base62Args {
+    #[parameter(
+        description = "Pad the encoded value to at least this width",
+        arg_type = "integer"
+    )]
+    width: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "base62",
+    description = "Encode the provided integer value using Base62.",
+    parameters(Base62Args),
+    parsed(Base62)
+)]
+pub struct Base62Filter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "base62"]
+struct Base62 {
+    #[parameters]
+    args: Base62Args,
+}
+
+impl Filter for Base62 {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let value = input
+            .as_scalar()
+            .and_then(|scalar| {
+                if let Some(int) = scalar.to_integer() {
+                    Some(if int < 0 { 0 } else { int as u64 })
+                } else if let Some(float) = scalar.to_float() {
+                    Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 })
+                } else if let Some(boolean) = scalar.to_bool() {
+                    Some(u64::from(boolean))
+                } else {
+                    scalar.to_kstr().to_string().parse::<u64>().ok()
+                }
+            })
+            .or_else(|| input.to_kstr().to_string().parse::<u64>().ok())
+            .unwrap_or(0);
+
+        let mut encoded = encode_base62(value);
+        if let Some(width) = args.width.and_then(|value| {
+            let scalar = Value::scalar(value);
+            value_to_usize(&scalar)
+        }) {
+            if encoded.len() < width {
+                let mut padded = String::with_capacity(width);
+                for _ in 0..(width - encoded.len()) {
+                    padded.push('0');
+                }
+                padded.push_str(&encoded);
+                encoded = padded;
+            }
+        }
+
+        Ok(Value::scalar(encoded))
+    }
+}
+
+fn encode_base62(mut value: u64) -> String {
+    const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+    if value == 0 {
+        return "0".to_string();
+    }
+    let mut buf = Vec::new();
+    while value > 0 {
+        let rem = (value % 62) as usize;
+        buf.push(ALPHABET[rem] as char);
+        value /= 62;
+    }
+    buf.iter().rev().collect()
+}
+
+fn value_to_usize(value: &Value) -> Option<usize> {
+    let view = value.as_view();
+    view.as_scalar()
+        .and_then(|scalar| {
+            if let Some(int) = scalar.to_integer() {
+                Some(if int < 0 { 0 } else { int as usize })
+            } else if let Some(float) = scalar.to_float() {
+                Some(if float.is_sign_negative() { 0 } else { float.floor() as usize })
+            } else if let Some(boolean) = scalar.to_bool() {
+                Some(if boolean { 1 } else { 0 })
+            } else {
+                scalar.to_kstr().parse::<usize>().ok()
+            }
+        })
+        .or_else(|| view.to_kstr().parse::<usize>().ok())
+}
+
+#[derive(Debug, FilterParameters)]
+struct Base36Args {
+    #[parameter(
+        description = "Pad the encoded value to at least this width",
+        arg_type = "integer"
+    )]
+    width: Option<Expression>,
+}
+
+#[derive(Clone, ParseFilter, FilterReflection, Default)]
+#[filter(
+    name = "base36",
+    description = "Encode the provided integer value using Base36.",
+    parameters(Base36Args),
+    parsed(Base36)
+)]
+pub struct Base36Filter;
+
+#[derive(Debug, FromFilterParameters, Display_filter)]
+#[name = "base36"]
+struct Base36 {
+    #[parameters]
+    args: Base36Args,
+}
+
+impl Filter for Base36 {
+    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
+        let args = self.args.evaluate(runtime)?;
+        let value = input
+            .as_scalar()
+            .and_then(|scalar| {
+                if let Some(int) = scalar.to_integer() {
+                    Some(if int < 0 { 0 } else { int as u64 })
+                } else if let Some(float) = scalar.to_float() {
+                    Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 })
+                } else if let Some(boolean) = scalar.to_bool() {
+                    Some(u64::from(boolean))
+                } else {
+                    scalar.to_kstr().to_string().parse::<u64>().ok()
+                }
+            })
+            .or_else(|| input.to_kstr().to_string().parse::<u64>().ok())
+            .unwrap_or(0);
+
+        let mut encoded = encode_base36(value);
+        if let Some(width) = args.width.and_then(|value| {
+            let scalar = Value::scalar(value);
+            value_to_usize(&scalar)
+        }) {
+            if encoded.len() < width {
+                let mut padded = String::with_capacity(width);
+                for _ in 0..(width - encoded.len()) {
+                    padded.push('0');
+                }
+                padded.push_str(&encoded);
+                encoded = padded;
+            }
+        }
+
+        Ok(Value::scalar(encoded))
+    }
+}
+
+fn encode_base36(mut value: u64) -> String {
+    const ALPHABET: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";
+    if value == 0 {
+        return "0".to_string();
+    }
+    let mut buf = Vec::new();
+    while value > 0 {
+        let rem = (value % 36) as usize;
+        buf.push(ALPHABET[rem] as char);
+        value /= 36;
+    }
+    buf.iter().rev().collect()
+}
+
+// {{ value | b64url_enc }} – URL-safe base64 w/o padding
+static_filter!(
+    /// Base64 URL-safe (no ‘=’ padding).
+    B64UrlEncFilter, "b64url_enc",
+    |input: &dyn ValueView| -> String {
+        general_purpose::URL_SAFE_NO_PAD.encode(input.to_kstr().as_bytes())
+    }
+);
+
+// {{ algo | jwt_header }} – e.g. “HS256” -- Base64URL-encoded header
+static_filter!(
+    /// Generate a minimal JWT header for the given alg.
+    JwtHeaderFilter, "jwt_header",
+    |input: &dyn ValueView| -> String {
+        let alg = input.to_kstr();
+        let json = serde_json::json!({ "typ": "JWT", "alg": alg });
+        general_purpose::URL_SAFE_NO_PAD.encode(json.to_string())
+    }
+);
+
+// -----------------------------------------------------------------------------
+// Data Formatting
+// -----------------------------------------------------------------------------
+
+// {{ value | url_encode }}
+static_filter!(
+    /// Percent-encode for a URL.
+    UrlEncodeFilter, "url_encode",
+    |input: &dyn ValueView| -> String {
+        utf8_percent_encode(&input.to_kstr(), NON_ALPHANUMERIC).to_string()
+    }
+);
+
+// {{ value | json_escape }}
+static_filter!(
+    /// Escape string for JSON contexts.
+    JsonEscapeFilter, "json_escape",
+    |input: &dyn ValueView| -> String {
+        serde_json::to_string(&input.to_kstr().to_string()).unwrap_or_default()
+    }
+);
+
+// {{ "" | unix_timestamp }}
+static_filter!(
+    /// Current Unix epoch seconds.
+    UnixTimestampFilter, "unix_timestamp",
+    |_input: &dyn ValueView| -> i64 {
+        OffsetDateTime::now_utc().unix_timestamp()
+    }
+);
+
+// {{ "" | iso_timestamp_no_frac }}
+static_filter!(
+    /// Current ISO-8601 timestamp (UTC) with no fractional seconds.
+    IsoTimestampNoFracFilter, "iso_timestamp_no_frac",
+    |_input: &dyn ValueView| -> String {
+        let full = OffsetDateTime::now_utc()
+            .format(&Iso8601::DEFAULT)
+            .unwrap_or_else(|_| "1970-01-01T00:00:00Z".into());
+
+        // If there’s a fractional-second part, remove it but keep the trailing ‘Z’.
+        match full.split_once('.') {
+            Some((prefix, _)) => {
+                format!("{prefix}Z")
+            }
+            None => full,
+        }
+    }
+);
+
+// {{ "" | iso_timestamp }}
+static_filter!(
+    /// Current ISO-8601 timestamp (UTC).
+    IsoTimestampFilter, "iso_timestamp",
+    |_input: &dyn ValueView| -> String {
+        OffsetDateTime::now_utc()
+            .format(&Iso8601::DEFAULT)
+            .unwrap_or_else(|_| "1970-01-01T00:00:00Z".into())
+    }
+);
+
+// -----------------------------------------------------------------------------
+// Request Uniqueness
+// -----------------------------------------------------------------------------
+
+// {{ "" | uuid }}
+static_filter!(
+    /// Generate random UUID-v4.
+    UuidFilter, "uuid",
+    |_input: &dyn ValueView| -> String { Uuid::new_v4().to_string() }
+);
+
+pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder {
+    builder
+        // zero-arg helpers
+        .filter(Replace::default())
+        .filter(B64UrlEncFilter::default())
+        .filter(Sha256Filter::default())
+        .filter(UrlEncodeFilter::default())
+        .filter(JsonEscapeFilter::default())
+        .filter(UnixTimestampFilter::default())
+        .filter(IsoTimestampFilter::default())
+        .filter(IsoTimestampNoFracFilter::default())
+        .filter(UuidFilter::default())
+        .filter(JwtHeaderFilter::default())
+        .filter(B64EncFilter::default())
+        .filter(B64DecFilter::default())
+        .filter(RandomStringFilter::default())
+        .filter(SuffixFilter::default())
+        .filter(PrefixFilter::default())
+        .filter(LstripChars::default())
+        .filter(Crc32Filter::default())
+        .filter(Crc32DecFilter::default())
+        .filter(Crc32HexFilter::default())
+        .filter(Crc32LeB64Filter::default())
+        .filter(Base62Filter::default())
+        .filter(Base36Filter::default())
+        .filter(HmacSha256::default())
+        .filter(HmacSha1::default())
+        .filter(HmacSha384::default())
+}
+
+#[cfg(test)]
+mod tests {
+    use base64::{engine::general_purpose, Engine as _};
+    use hmac::{Hmac, Mac};
+    use liquid::{object, ParserBuilder};
+    use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
+    use regex::Regex;
+    use sha1::Sha1;
+    use sha2::{Digest, Sha256, Sha384};
+    use time::OffsetDateTime;
+
+    use super::*;
+
+    fn parser() -> liquid::Parser {
+        // Build a Liquid parser with stdlib + all custom filters
+        register_all(ParserBuilder::with_stdlib()).build().unwrap()
+    }
+
+    fn render(src: &str) -> String {
+        parser().parse(src).unwrap().render(&object!({})).unwrap()
+    }
+
+    // -------------------------------------------------------------------------
+    // Simple one-liner helpers
+    // -------------------------------------------------------------------------
+    #[test]
+    fn b64enc_filter() {
+        assert_eq!(render(r#"{{ "hello" | b64enc }}"#), "aGVsbG8=");
+    }
+
+    #[test]
+    fn b64dec_filter() {
+        assert_eq!(render(r#"{{ "aGVsbG8=" | b64dec }}"#), "hello");
+    }
+
+    #[test]
+    fn sha256_filter() {
+        let expect = format!("{:x}", Sha256::digest(b"hello"));
+        assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect);
+    }
+
+    #[test]
+    fn suffix_filter() {
+        assert_eq!(render(r#"{{ "abcdef" | suffix: 3 }}"#), "def");
+        assert_eq!(render(r#"{{ "short" | suffix: 10 }}"#), "short");
+        assert_eq!(render(r#"{{ "value" | suffix: 0 }}"#), "");
+    }
+
+    #[test]
+    fn prefix_filter() {
+        assert_eq!(render(r#"{{ "abcdef" | prefix: 3 }}"#), "abc");
+        assert_eq!(render(r#"{{ "short" | prefix: 10 }}"#), "short");
+        assert_eq!(render(r#"{{ "value" | prefix: 0 }}"#), "");
+    }
+
+    #[test]
+    fn crc32_and_base62_filters() {
+        assert_eq!(render(r#"{{ "hello" | crc32 }}"#), "907060870");
+        assert_eq!(render(r#"{{ "hello" | crc32 | base62 }}"#), "zNvy2");
+        assert_eq!(render(r#"{{ "hello" | crc32 | base62: 6 }}"#), "0zNvy2");
+    }
+
+    #[test]
+    fn base36_filter() {
+        assert_eq!(render(r#"{{ 123456 | base36 }}"#), "2n9c");
+        assert_eq!(render(r#"{{ 123456 | base36: 6 }}"#), "002n9c");
+    }
+
+    #[test]
+    fn crc32_dec_filter() {
+        assert_eq!(render(r#"{{ "hello" | crc32_dec }}"#), "907060870");
+        assert_eq!(render(r#"{{ "hello" | crc32_dec: 6 }}"#), "060870");
+    }
+
+    #[test]
+    fn crc32_hex_filter() {
+        assert_eq!(render(r#"{{ "hello" | crc32_hex }}"#), "3610a686");
+        assert_eq!(render(r#"{{ "hello" | crc32_hex: 4 }}"#), "a686");
+        assert_eq!(render(r#"{{ "hello" | crc32_hex: 10 }}"#), "003610a686");
+    }
+
+    #[test]
+    fn crc32_le_b64_filter() {
+        assert_eq!(render(r#"{{ "hello" | crc32_le_b64 }}"#), "hqYQNg==");
+        assert_eq!(render(r#"{{ "hello" | crc32_le_b64: 6 }}"#), "hqYQNg");
+    }
+
+    #[test]
+    fn hmac_sha1_filter() {
+        let key = b"key1";
+        let data = b"data";
+        let mut mac = Hmac::<Sha1>::new_from_slice(key).unwrap();
+        mac.update(data);
+        let expect = general_purpose::STANDARD.encode(mac.finalize().into_bytes());
+
+        assert_eq!(render(r#"{{ "data" | hmac_sha1: "key1" }}"#), expect);
+    }
+
+    #[test]
+    fn b64url_enc_filter() {
+        assert_eq!(
+            render(r#"{{ "++??" | b64url_enc }}"#),
+            general_purpose::URL_SAFE_NO_PAD.encode("++??")
+        );
+    }
+
+    #[test]
+    fn url_encode_filter() {
+        assert_eq!(
+            render(r#"{{ "hello world!" | url_encode }}"#),
+            utf8_percent_encode("hello world!", NON_ALPHANUMERIC).to_string()
+        );
+    }
+
+    #[test]
+    fn json_escape_filter() {
+        assert_eq!(render(r#"{{ '"hi"' | json_escape }}"#), r#""\"hi\"""#);
+    }
+
+    // -------------------------------------------------------------------------
+    // JWT header
+    // -------------------------------------------------------------------------
+    #[test]
+    fn jwt_header_filter() {
+        let result = render(r#"{{ "HS256" | jwt_header }}"#);
+        let decoded = general_purpose::URL_SAFE_NO_PAD.decode(&result).unwrap();
+        let json: serde_json::Value = serde_json::from_slice(&decoded).unwrap();
+        assert_eq!(json["typ"], "JWT");
+        assert_eq!(json["alg"], "HS256");
+    }
+
+    // -------------------------------------------------------------------------
+    // HMAC helpers
+    // -------------------------------------------------------------------------
+    #[test]
+    fn hmac_sha256_filter() {
+        let key = b"secret";
+        let data = b"hi!";
+        // expected value
+        let mut mac = Hmac::<Sha256>::new_from_slice(key).unwrap();
+        mac.update(data);
+        let expect = general_purpose::STANDARD.encode(mac.finalize().into_bytes());
+
+        assert_eq!(render(r#"{{ "hi!" | hmac_sha256: "secret" }}"#), expect);
+    }
+
+    #[test]
+    fn hmac_sha384_filter() {
+        let key = b"topsecret";
+        let data = b"payload";
+        let mut mac = Hmac::<Sha384>::new_from_slice(key).unwrap();
+        mac.update(data);
+        let expect = general_purpose::STANDARD.encode(mac.finalize().into_bytes());
+
+        assert_eq!(render(r#"{{ "payload" | hmac_sha384: "topsecret" }}"#), expect);
+    }
+
+    // -------------------------------------------------------------------------
+    // Random string
+    // -------------------------------------------------------------------------
+    #[test]
+    fn random_string_filter_default_len() {
+        let out = render(r#"{{ "" | random_string }}"#);
+        assert_eq!(out.len(), 32);
+        assert!(out.chars().all(|c| c.is_ascii_alphanumeric()));
+    }
+
+    #[test]
+    fn random_string_filter_custom_len() {
+        let out = render(r#"{{ 10 | random_string }}"#);
+        assert_eq!(out.len(), 10);
+    }
+
+    // -------------------------------------------------------------------------
+    // Time helpers
+    // -------------------------------------------------------------------------
+    #[test]
+    fn unix_timestamp_filter_is_nowish() {
+        let tmpl_val: i64 = render(r#"{{ "" | unix_timestamp }}"#).parse().unwrap();
+        let now = OffsetDateTime::now_utc().unix_timestamp();
+        assert!((now - tmpl_val).abs() < 5, "timestamp differs by >5 s");
+    }
+
+    #[test]
+    fn iso_timestamp_filter_parses() {
+        let out = render(r#"{{ "" | iso_timestamp }}"#);
+        // Parse to make sure it’s valid ISO-8601
+        assert!(OffsetDateTime::parse(&out, &Iso8601::DEFAULT).is_ok());
+    }
+
+    // -------------------------------------------------------------------------
+    // UUID
+    // -------------------------------------------------------------------------
+    #[test]
+    fn uuid_filter_format() {
+        let uuid_re =
+            Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$")
+                .unwrap();
+        let v = render(r#"{{ "" | uuid }}"#);
+        assert!(uuid_re.is_match(&v));
+    }
+    // -------------------------------------------------------------------------
+    // Replace filter
+    // -------------------------------------------------------------------------
+    #[test]
+    fn replace_filter() {
+        assert_eq!(render(r#"{{ "hello world" | replace: "world", "mars" }}"#), "hello mars");
+    }
+
+    #[test]
+    fn lstrip_chars_single() {
+        assert_eq!(render(r#"{{ "000abc" | lstrip_chars: "0" }}"#), "abc");
+    }
+
+    #[test]
+    fn lstrip_chars_multiple_chars() {
+        assert_eq!(render(r#"{{ "-=--token" | lstrip_chars: "-=" }}"#), "token");
+    }
+
+    // -------------------------------------------------------------------------
+    // iso_timestamp_no_frac filter
+    // -------------------------------------------------------------------------
+    #[test]
+    fn iso_timestamp_no_frac_filter() {
+        let ts = render(r#"{{ "" | iso_timestamp_no_frac }}"#);
+        assert!(!ts.contains('.'), "timestamp should not include fractional seconds: {ts}");
+        // Verify it’s still valid ISO-8601
+        assert!(OffsetDateTime::parse(&ts, &Iso8601::DEFAULT).is_ok());
+    }
+}
diff --git a/src/rules/rule.rs b/crates/kingfisher-rules/src/rule.rs
similarity index 100%
rename from src/rules/rule.rs
rename to crates/kingfisher-rules/src/rule.rs
diff --git a/crates/kingfisher-rules/src/rules.rs b/crates/kingfisher-rules/src/rules.rs
new file mode 100644
index 0000000..f088577
--- /dev/null
+++ b/crates/kingfisher-rules/src/rules.rs
@@ -0,0 +1,245 @@
+//! Rule collection and loading utilities.
+
+use anyhow::{bail, Context, Result};
+use ignore::{types::TypesBuilder, WalkBuilder};
+use serde::Deserialize;
+use thiserror::Error;
+use tracing::{debug, debug_span, error};
+
+use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
+
+pub use crate::rule::{Confidence, RuleSyntax, Validation};
+use serde::de::DeserializeOwned;
+
+#[derive(Debug, Error)]
+pub enum RulesError {
+    #[error("Failed to parse YAML file at path: {0}")]
+    YamlParseError(String),
+
+    #[error("Invalid input: {0} is neither a file nor a directory")]
+    InvalidInputError(String),
+
+    #[error("File system error: {0}")]
+    FileSystemError(#[from] std::io::Error),
+
+    #[error("Error building YAML types: {0}")]
+    YamlTypesBuildError(String),
+
+    #[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")]
+    InvalidResponseMatcherVariant(String, usize, usize),
+
+    #[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")]
+    MissingResponseMatcher { path: String, rule_id: String },
+}
+
+#[derive(Clone, Default)]
+pub struct Rules {
+    pub rules: BTreeMap<String, RuleSyntax>,
+}
+
+#[derive(Deserialize)]
+struct RawRules {
+    rules: Vec<RuleSyntax>,
+}
+
+impl Rules {
+    pub fn new() -> Self {
+        Self { rules: BTreeMap::new() }
+    }
+
+    pub fn update(&mut self, other: Rules) {
+        self.rules.extend(other.rules);
+    }
+
+    pub fn from_paths_and_contents<'a, I: IntoIterator<Item = (&'a Path, &'a [u8])>>(
+        iterable: I,
+        confidence: Confidence,
+    ) -> Result<Self> {
+        let mut rules = Self::new();
+        for (path, contents) in iterable {
+            match serde_yaml::from_slice::<RawRules>(contents) {
+                Ok(rs) => {
+                    for rule_syntax in rs.rules {
+                        if !rule_syntax.confidence.is_at_least(&confidence) {
+                            continue;
+                        }
+                        if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
+                            if http_val
+                                .request
+                                .response_matcher
+                                .as_ref()
+                                .map_or(true, |m| m.is_empty())
+                            {
+                                bail!(RulesError::MissingResponseMatcher {
+                                    path: path.display().to_string(),
+                                    rule_id: rule_syntax.id.clone(),
+                                });
+                            }
+                        }
+                        rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
+                    }
+                }
+                Err(e) => {
+                    if let Some(location) = e.location() {
+                        error!(
+                            "Failed to parse rules YAML from {}: {}, at line: {}, column: {}",
+                            path.display(),
+                            e,
+                            location.line(),
+                            location.column()
+                        );
+                        bail!(RulesError::InvalidResponseMatcherVariant(
+                            path.display().to_string(),
+                            location.line(),
+                            location.column(),
+                        ));
+                    } else {
+                        error!("Failed to parse rules YAML from {}: {}", path.display(), e);
+                        bail!(RulesError::YamlParseError(format!(
+                            "Failed to load rules YAML from {}: {}",
+                            path.display(),
+                            e
+                        )));
+                    }
+                }
+            }
+        }
+        Ok(rules)
+    }
+
+    pub fn from_paths<P: AsRef<Path>, I: IntoIterator<Item = P>>(
+        paths: I,
+        confidence: Confidence,
+    ) -> Result<Self> {
+        let mut num_paths = 0;
+        let mut rules = Rules::new();
+        for input in paths {
+            num_paths += 1;
+            let input = input.as_ref();
+            if input.is_file() {
+                rules.update(Rules::from_yaml_file(input, confidence)?);
+            } else if input.is_dir() {
+                rules.update(Rules::from_directory(input, confidence)?);
+            } else {
+                error!("Invalid input type: {} is neither a file nor a directory", input.display());
+                bail!(RulesError::InvalidInputError(input.display().to_string()));
+            }
+        }
+        debug!("Loaded {} rules from {} paths", rules.num_rules(), num_paths);
+        Ok(rules)
+    }
+
+    pub fn from_yaml_file<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
+        let path = path.as_ref();
+        let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered();
+        match load_yaml_file::<RawRules, _>(path) {
+            Ok(rs) => {
+                let mut rules = Rules::new();
+                for rule_syntax in rs.rules {
+                    if !rule_syntax.confidence.is_at_least(&confidence) {
+                        continue;
+                    }
+                    if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
+                        if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty())
+                        {
+                            bail!(RulesError::MissingResponseMatcher {
+                                path: path.display().to_string(),
+                                rule_id: rule_syntax.id.clone(),
+                            });
+                        }
+                    }
+                    rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
+                }
+                debug!("Loaded {} rules from {}", rules.num_rules(), path.display());
+                Ok(rules)
+            }
+            Err(e) => {
+                error!("Failed to load rules YAML from {}: {}", path.display(), e);
+                bail!(RulesError::YamlParseError(format!(
+                    "Failed to load rules YAML from {}: {}",
+                    path.display(),
+                    e
+                )))
+            }
+        }
+    }
+
+    pub fn from_yaml_files<P: AsRef<Path>, I: IntoIterator<Item = P>>(
+        paths: I,
+        confidence: Confidence,
+    ) -> Result<Self> {
+        let mut num_paths = 0;
+        let mut rules = Rules::new();
+        for path in paths {
+            num_paths += 1;
+            rules.update(Rules::from_yaml_file(path.as_ref(), confidence)?);
+        }
+        debug!("Loaded {} rules from {} YAML files", rules.num_rules(), num_paths);
+        Ok(rules)
+    }
+
+    pub fn from_directory<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
+        let path = path.as_ref();
+        let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered();
+        let yaml_types =
+            TypesBuilder::new().add_defaults().select("yaml").build().map_err(|e| {
+                error!("Failed to build YAML types: {}", e);
+                RulesError::YamlTypesBuildError(e.to_string())
+            })?;
+        let walker = WalkBuilder::new(path)
+            .types(yaml_types)
+            .follow_links(true)
+            .standard_filters(false)
+            .build();
+        let mut yaml_files = Vec::new();
+        for entry in walker {
+            match entry {
+                Ok(entry) => {
+                    if entry.file_type().map_or(false, |t| !t.is_dir()) {
+                        yaml_files.push(entry.into_path());
+                    }
+                }
+                Err(e) => {
+                    debug!("Failed to read directory entry: {}", e);
+                }
+            }
+        }
+        yaml_files.sort();
+        debug!("Found {} YAML files in {}", yaml_files.len(), path.display());
+        Self::from_yaml_files(&yaml_files, confidence)
+    }
+
+    #[inline]
+    pub fn num_rules(&self) -> usize {
+        self.rules.len()
+    }
+
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.rules.is_empty()
+    }
+
+    #[inline]
+    pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> {
+        self.rules.values()
+    }
+}
+
+impl IntoIterator for Rules {
+    type Item = RuleSyntax;
+    type IntoIter = std::collections::btree_map::IntoValues<String, RuleSyntax>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.rules.into_values()
+    }
+}
+
+pub fn load_yaml_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
+    let path = path.as_ref();
+    let file = File::open(path)
+        .with_context(|| format!("Failed to open YAML file: {}", path.display()))?;
+    let reader = BufReader::new(file);
+    let data = serde_yaml::from_reader(reader)
+        .with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?;
+    Ok(data)
+}
diff --git a/crates/kingfisher-rules/src/rules_database.rs b/crates/kingfisher-rules/src/rules_database.rs
new file mode 100644
index 0000000..91cf538
--- /dev/null
+++ b/crates/kingfisher-rules/src/rules_database.rs
@@ -0,0 +1,252 @@
+use std::{sync::Arc, time::Instant};
+
+use anyhow::{anyhow, bail, Result};
+use regex::bytes::Regex;
+use tracing::{debug, debug_span, error};
+use vectorscan_rs::{BlockDatabase, Flag, Pattern};
+
+use crate::rule::{Rule, RULE_COMMENTS_PATTERN};
+
+pub struct RulesDatabase {
+    // pub(crate) rules: Vec<Rule,>,
+    pub(crate) rules: Vec<Arc<Rule>>,
+    pub(crate) anchored_regexes: Vec<Regex>,
+    pub(crate) vsdb: BlockDatabase,
+}
+
+pub fn format_regex_pattern(pattern: &str) -> String {
+    // Remove comments and whitespace while preserving the regex pattern
+    let no_comment_pattern = RULE_COMMENTS_PATTERN.replace_all(pattern, "");
+    // flattens multi-line regex into a single line
+    no_comment_pattern
+        .lines()
+        .map(|line| line.trim())
+        .filter(|line| !line.is_empty())
+        .collect::<Vec<&str>>()
+        .join("")
+}
+
+impl RulesDatabase {
+    pub fn get_regex_by_rule_id(&self, rule_id: &str) -> Option<&Regex> {
+        self.rules
+            .iter()
+            .position(|r| r.syntax().id == rule_id)
+            .and_then(|index| self.anchored_regexes.get(index))
+    }
+
+    pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option<Arc<Rule>> {
+        self.rules.iter().find(|r| r.finding_sha1_fingerprint() == finding_fingerprint).cloned()
+    }
+
+    pub fn get_rule_by_text_id(&self, text_id: &str) -> Option<Arc<Rule>> {
+        self.rules.iter().find(|r| r.id() == text_id).cloned()
+    }
+
+    pub fn get_rule_by_name(&self, name: &str) -> Option<Arc<Rule>> {
+        self.rules.iter().find(|r| r.name() == name).cloned()
+    }
+
+    pub fn from_rules(rules: Vec<Rule>) -> Result<Self> {
+        let rules: Vec<Arc<Rule>> = rules.into_iter().map(Arc::new).collect();
+        let _span = debug_span!("RulesDatabase::from_rules").entered();
+        if rules.is_empty() {
+            bail!("No rules to compile");
+        }
+        let patterns: Vec<Pattern> = rules
+            .iter()
+            .enumerate()
+            .map(|(id, rule)| {
+                Pattern::new(
+                    rule.syntax().pattern.clone().into_bytes(),
+                    Flag::default(),
+                    Some(id.try_into().unwrap()),
+                )
+            })
+            .collect();
+        let t1 = Instant::now();
+        match BlockDatabase::new(patterns) {
+            Ok(vsdb) => {
+                let d1 = t1.elapsed().as_secs_f64();
+                let (anchored_regexes, d2) = Self::compile_regexes(&rules)?;
+                debug!("Compiled {} rules: vectorscan {}s; regex {}s", rules.len(), d1, d2);
+                Ok(RulesDatabase { rules, vsdb, anchored_regexes })
+            }
+            Err(e) => {
+                error!(
+                    "Failed to create BlockDatabase: {}. Attempting to compile rules individually.",
+                    e
+                );
+                Self::compile_rules_individually(rules)
+                    .map_err(|err| anyhow!("Failed to compile rules: {}\n{}", e, err))
+            }
+        }
+    }
+
+    fn compile_rules_individually(rules: Vec<Arc<Rule>>) -> Result<Self> {
+        // NOTE: This function only used when attempting to determine which rule failed
+        // to compile
+        let mut compiled_rules = Vec::new();
+        let mut compiled_patterns = Vec::new();
+        let mut compiled_regexes = Vec::new();
+        let mut error_messages = Vec::new();
+        for (id, rule) in rules.into_iter().enumerate() {
+            let pattern = Pattern::new(
+                rule.syntax().pattern.clone().into_bytes(),
+                Flag::default(),
+                Some(id.try_into().unwrap()),
+            );
+            match BlockDatabase::new(vec![pattern]) {
+                Ok(_) => {
+                    // Recreate the pattern for the final compilation
+                    let final_pattern = Pattern::new(
+                        rule.syntax().pattern.clone().into_bytes(),
+                        Flag::default(),
+                        Some(id.try_into().unwrap()),
+                    );
+                    compiled_patterns.push(final_pattern);
+                    match rule.syntax().as_regex() {
+                        Ok(regex) => {
+                            compiled_regexes.push(regex);
+                            compiled_rules.push(rule);
+                        }
+                        Err(e) => {
+                            error_messages.push(format!(
+                                "Failed to compile Regex for rule '{}' (ID: {}): {}",
+                                rule.name(),
+                                rule.id(),
+                                e
+                            ));
+                        }
+                    }
+                }
+                Err(e) => {
+                    error_messages.push(format!(
+                        "Failed to compile vectorscan pattern for rule '{}' (ID: {}): {}",
+                        rule.name(),
+                        rule.id(),
+                        e
+                    ));
+                }
+            }
+        }
+        if !error_messages.is_empty() {
+            error!(
+                "Errors occurred while compiling rules individually:\n{}",
+                error_messages.join("\n")
+            );
+            bail!("Failed to compile the following rules:\n{}", error_messages.join("\n"));
+        }
+        let vsdb = BlockDatabase::new(compiled_patterns)?;
+        Ok(RulesDatabase { rules: compiled_rules, vsdb, anchored_regexes: compiled_regexes })
+    }
+
+    fn compile_regexes(rules: &[Arc<Rule>]) -> Result<(Vec<Regex>, f64)> {
+        // fn compile_regexes(rules: &[Rule],) -> Result<(Vec<Regex,>, f64,),> {
+        let t2 = Instant::now();
+        let mut anchored_regexes = Vec::with_capacity(rules.len());
+        for rule in rules {
+            match rule.syntax().as_regex() {
+                Ok(regex) => anchored_regexes.push(regex),
+                Err(e) => {
+                    error!(
+                        "Failed to compile Regex for rule '{}' (ID: {}): {}",
+                        rule.name(),
+                        rule.id(),
+                        e
+                    );
+                    return Err(anyhow!(
+                        "Failed to compile Regex for rule '{}' (ID: {}): {}",
+                        rule.name(),
+                        rule.id(),
+                        e
+                    ));
+                }
+            }
+        }
+        let d2 = t2.elapsed().as_secs_f64();
+        Ok((anchored_regexes, d2))
+    }
+
+    #[inline]
+    pub fn num_rules(&self) -> usize {
+        self.rules.len()
+    }
+
+    #[inline]
+    pub fn get_rule(&self, index: usize) -> Option<Arc<Rule>> {
+        self.rules.get(index).cloned()
+    }
+
+    pub fn rules(&self) -> &[Arc<Rule>] {
+        &self.rules
+    }
+
+    /// Returns a reference to the Vectorscan database.
+    #[inline]
+    pub fn vectorscan_db(&self) -> &BlockDatabase {
+        &self.vsdb
+    }
+
+    /// Returns a slice of the anchored regexes.
+    #[inline]
+    pub fn anchored_regexes(&self) -> &[Regex] {
+        &self.anchored_regexes
+    }
+}
+#[cfg(test)]
+mod test_vectorscan {
+    use pretty_assertions::assert_eq;
+
+    use super::*;
+    #[test]
+    pub fn test_vectorscan_sanity() -> Result<()> {
+        use vectorscan_rs::{BlockDatabase, BlockScanner, Pattern, Scan};
+        let input = b"some test data for vectorscan";
+        let pattern = Pattern::new(b"test".to_vec(), Flag::CASELESS | Flag::SOM_LEFTMOST, None);
+        let db: BlockDatabase = BlockDatabase::new(vec![pattern])?;
+        let mut scanner = BlockScanner::new(&db)?;
+        let mut matches: Vec<(u64, u64)> = vec![];
+        scanner.scan(input, |id: u32, from: u64, to: u64, _flags: u32| {
+            println!("found pattern #{} @ [{}, {})", id, from, to);
+            matches.push((from, to));
+            Scan::Continue
+        })?;
+        assert_eq!(matches, vec![(5, 9)]);
+        Ok(())
+    }
+}
+#[cfg(test)]
+#[cfg(test)]
+mod test_regex_cleaning {
+    use super::*;
+    #[test]
+    fn test_format_regex_pattern() {
+        let input = r#"(?x)
+            (?i)
+            (?:
+              \\b
+              (?:AWS|AMAZON|AMZN|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)
+              (?:\\.|[\\n\\r]){0,32}?  (?# THIS IS A COMMENTCOMMENTCOMMENTCOMMENTCOMMENTCOMMENTCOMMENT)
+              (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) # THIS IS A COMMENT THAT SHOULD NOT BE USED BUT MIGHT BE
+              (?:\\.|[\\n\\r]){0,32}?
+              \\b
+              (
+                [A-Za-z0-9/+=]{40}
+              )
+              \\b
+            |
+              \\b
+              (?:SECRET|PRIVATE|ACCESS)
+              (?:\\.|[\\n\\r]){0,16}?
+              (?:KEY|TOKEN)
+              (?:\\.|[\\n\\r]){0,32}?
+              \\b
+              (
+                [A-Za-z0-9/+=]{40}
+              )
+              \\b
+            )"#;
+        let data = format_regex_pattern(input);
+        println!("{}", data);
+    }
+}
diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml
new file mode 100644
index 0000000..563ab7e
--- /dev/null
+++ b/crates/kingfisher-scanner/Cargo.toml
@@ -0,0 +1,120 @@
+[package]
+name = "kingfisher-scanner"
+version = "0.1.0"
+description = "High-level scanning API for Kingfisher secret scanner"
+edition.workspace = true
+rust-version.workspace = true
+license.workspace = true
+authors.workspace = true
+homepage.workspace = true
+repository.workspace = true
+publish.workspace = true
+
+[features]
+default = []
+
+# Core validation support (includes HTTP validation)
+validation = ["validation-http"]
+
+# HTTP-based validation for API tokens
+validation-http = [
+    "dep:reqwest",
+    "dep:tokio",
+    "dep:liquid",
+    "dep:liquid-core",
+    "dep:quick-xml",
+    "dep:sha1",
+]
+
+# AWS credential validation
+validation-aws = [
+    "validation-http",
+    "dep:aws-config",
+    "dep:aws-credential-types",
+    "dep:aws-sdk-sts",
+    "dep:aws-types",
+    "dep:aws-smithy-http-client",
+    "dep:aws-smithy-runtime-api",
+    "dep:aws-smithy-types",
+    "dep:base32",
+    "dep:byteorder",
+    "dep:rand",
+]
+
+# All validation features
+validation-all = [
+    "validation",
+    "validation-aws",
+]
+
+[dependencies]
+# Internal dependencies
+kingfisher-core = { path = "../kingfisher-core" }
+kingfisher-rules = { path = "../kingfisher-rules" }
+
+# Error handling
+anyhow = "1.0"
+thiserror = "1.0"
+
+# Serialization
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+schemars = "0.8"
+
+# Regex
+regex = "1.12"
+
+# Hashing
+xxhash-rust = { version = "0.8", features = ["xxh3"] }
+
+# Vectorscan
+vectorscan-rs = "0.0.5"
+
+# Collections
+smallvec = { version = "1", features = ["const_generics", "const_new", "union"] }
+rustc-hash = "2.1"
+
+# Concurrency
+parking_lot = "0.12"
+thread_local = "1.1"
+once_cell = "1.21"
+
+# HTTP status codes
+http = "1.4"
+
+# Byte strings
+bstr = { version = "1.12", features = ["serde"] }
+
+# Base64 for decoding
+base64 = "0.22"
+
+# Logging
+tracing = "0.1"
+
+# ---- Optional validation dependencies ----
+
+# HTTP validation
+reqwest = { version = "0.12", default-features = false, features = [
+    "json", "gzip", "brotli", "deflate", "stream", "rustls-tls", "rustls-tls-native-roots", "multipart"
+], optional = true }
+tokio = { version = "1.48", features = ["net", "time", "sync"], optional = true }
+liquid = { version = "0.26", optional = true }
+liquid-core = { version = "0.26", optional = true }
+quick-xml = { version = "0.38", features = ["serde", "serialize"], optional = true }
+sha1 = { version = "0.10", optional = true }
+
+# AWS validation
+aws-config = { version = "1.8", optional = true }
+aws-credential-types = { version = "1.2", optional = true }
+aws-sdk-sts = { version = "1.95", optional = true }
+aws-types = { version = "1.3", optional = true }
+aws-smithy-http-client = { version = "1.1", optional = true }
+aws-smithy-runtime-api = { version = "1.9", optional = true }
+aws-smithy-types = { version = "1.3", optional = true }
+base32 = { version = "0.5", optional = true }
+byteorder = { version = "1.5", optional = true }
+rand = { version = "0.9", optional = true }
+
+[dev-dependencies]
+pretty_assertions = "1.4"
+tempfile = "3.23"
diff --git a/crates/kingfisher-scanner/src/finding.rs b/crates/kingfisher-scanner/src/finding.rs
new file mode 100644
index 0000000..3ac5765
--- /dev/null
+++ b/crates/kingfisher-scanner/src/finding.rs
@@ -0,0 +1,278 @@
+//! Finding types representing detected secrets.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use kingfisher_core::{BlobId, Location};
+use kingfisher_rules::{Confidence, Rule};
+use parking_lot::RwLock;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+use smallvec::SmallVec;
+
+// Thread-safe string interner for capture values
+static STRING_POOL: once_cell::sync::Lazy<RwLock<std::collections::HashSet<&'static str>>> =
+    once_cell::sync::Lazy::new(|| RwLock::new(std::collections::HashSet::new()));
+
+/// Intern a string to get a static reference.
+///
+/// This is used to avoid allocating the same string multiple times
+/// when processing captures.
+pub fn intern(s: &str) -> &'static str {
+    // Check if already interned
+    {
+        let pool = STRING_POOL.read();
+        if let Some(&existing) = pool.get(s) {
+            return existing;
+        }
+    }
+
+    // Not found, need to insert
+    let mut pool = STRING_POOL.write();
+    // Double-check after acquiring write lock
+    if let Some(&existing) = pool.get(s) {
+        return existing;
+    }
+
+    // Leak the string to get a static reference
+    let leaked: &'static str = Box::leak(s.to_string().into_boxed_str());
+    pool.insert(leaked);
+    leaked
+}
+
+/// A secret finding detected by the scanner.
+///
+/// This is the main output type from scanning operations. It contains all
+/// information about a detected secret, including location, rule metadata,
+/// and capture groups.
+#[derive(Debug, Clone, Serialize, JsonSchema)]
+pub struct Finding {
+    /// The rule that matched.
+    #[serde(skip_serializing)]
+    #[schemars(skip)]
+    pub rule: Arc<Rule>,
+
+    /// The rule's unique identifier.
+    pub rule_id: String,
+
+    /// The rule's human-readable name.
+    pub rule_name: String,
+
+    /// The matched secret value (may be redacted).
+    pub secret: String,
+
+    /// Location information (byte offsets and line/column).
+    pub location: FindingLocation,
+
+    /// Confidence level of the finding.
+    pub confidence: Confidence,
+
+    /// Shannon entropy of the matched secret.
+    pub entropy: f32,
+
+    /// Content-based fingerprint for deduplication.
+    pub fingerprint: u64,
+
+    /// Named capture groups from the regex match.
+    #[serde(default)]
+    pub captures: HashMap<String, String>,
+
+    /// Whether the secret was found in Base64-encoded content.
+    pub is_base64_encoded: bool,
+
+    /// The blob ID where this finding was detected.
+    pub blob_id: BlobId,
+}
+
+impl Finding {
+    /// Returns the rule that produced this finding.
+    pub fn rule(&self) -> &Rule {
+        &self.rule
+    }
+
+    /// Returns true if this is a high-confidence finding.
+    pub fn is_high_confidence(&self) -> bool {
+        self.confidence == Confidence::High
+    }
+
+    /// Returns the start line (1-indexed).
+    pub fn line(&self) -> usize {
+        self.location.line
+    }
+
+    /// Returns the start column (0-indexed).
+    pub fn column(&self) -> usize {
+        self.location.column
+    }
+}
+
+/// Location information for a finding.
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
+pub struct FindingLocation {
+    /// Start byte offset (0-indexed).
+    pub start_offset: usize,
+
+    /// End byte offset (exclusive).
+    pub end_offset: usize,
+
+    /// Start line number (1-indexed).
+    pub line: usize,
+
+    /// Start column (0-indexed).
+    pub column: usize,
+
+    /// End line number (1-indexed).
+    pub end_line: usize,
+
+    /// End column (0-indexed).
+    pub end_column: usize,
+}
+
+impl FindingLocation {
+    /// Creates a location from an offset span and optional source span.
+    pub fn from_location(location: &Location) -> Self {
+        let source_span = location.resolved_source_span();
+        Self {
+            start_offset: location.offset_span.start,
+            end_offset: location.offset_span.end,
+            line: source_span.start.line,
+            column: source_span.start.column,
+            end_line: source_span.end.line,
+            end_column: source_span.end.column,
+        }
+    }
+
+    /// Creates a location from raw offset span values.
+    pub fn from_offsets(start: usize, end: usize) -> Self {
+        Self {
+            start_offset: start,
+            end_offset: end,
+            line: 0,
+            column: 0,
+            end_line: 0,
+            end_column: 0,
+        }
+    }
+
+    /// Creates a location with full source information.
+    pub fn new(
+        start_offset: usize,
+        end_offset: usize,
+        line: usize,
+        column: usize,
+        end_line: usize,
+        end_column: usize,
+    ) -> Self {
+        Self { start_offset, end_offset, line, column, end_line, end_column }
+    }
+}
+
+impl From<&Location> for FindingLocation {
+    fn from(location: &Location) -> Self {
+        Self::from_location(location)
+    }
+}
+
+/// A serializable representation of a single regex capture.
+#[derive(Debug, Clone, JsonSchema)]
+pub struct SerializableCapture {
+    /// The name of the capture group (if named).
+    pub name: Option<&'static str>,
+    /// The capture group number (1-indexed for explicit groups).
+    pub match_number: i32,
+    /// Start byte offset of the capture.
+    pub start: usize,
+    /// End byte offset of the capture.
+    pub end: usize,
+    /// The captured value (interned for efficiency).
+    #[serde(skip_serializing, skip_deserializing)]
+    pub value: &'static str,
+}
+
+impl SerializableCapture {
+    /// Returns the raw captured value.
+    pub fn raw_value(&self) -> &'static str {
+        self.value
+    }
+
+    /// Returns the value for display (may be redacted).
+    pub fn display_value(&self) -> std::borrow::Cow<'static, str> {
+        std::borrow::Cow::Borrowed(self.value)
+    }
+}
+
+impl serde::Serialize for SerializableCapture {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeStruct;
+
+        let mut state = serializer.serialize_struct("SerializableCapture", 5)?;
+        state.serialize_field("name", &self.name)?;
+        state.serialize_field("match_number", &self.match_number)?;
+        state.serialize_field("start", &self.start)?;
+        state.serialize_field("end", &self.end)?;
+        let value = self.display_value();
+        state.serialize_field("value", &value)?;
+        state.end()
+    }
+}
+
+/// A collection of serializable captures from a regex match.
+#[derive(Debug, Clone, Serialize, JsonSchema)]
+pub struct SerializableCaptures {
+    /// All captures from the match.
+    #[schemars(with = "Vec<SerializableCapture>")]
+    pub captures: SmallVec<[SerializableCapture; 2]>,
+}
+
+impl SerializableCaptures {
+    /// Create SerializableCaptures from regex captures.
+    pub fn from_captures(
+        captures: &regex::bytes::Captures,
+        _input: &[u8],
+        re: &regex::bytes::Regex,
+    ) -> Self {
+        let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
+
+        let capture_names: SmallVec<[Option<&'static str>; 4]> =
+            re.capture_names().map(|name| name.map(intern)).collect();
+
+        // If there are explicit capture groups, serialize those
+        if captures.len() > 1 {
+            for i in 1..captures.len() {
+                if let Some(cap) = captures.get(i) {
+                    let raw_value = String::from_utf8_lossy(cap.as_bytes());
+                    let raw_interned = intern(raw_value.as_ref());
+                    let name = capture_names.get(i).and_then(|opt| *opt);
+
+                    serialized_captures.push(SerializableCapture {
+                        name,
+                        match_number: i32::try_from(i).unwrap_or(0),
+                        start: cap.start(),
+                        end: cap.end(),
+                        value: raw_interned,
+                    });
+                }
+            }
+        } else if captures.len() == 1 {
+            // Only full match exists, serialize that
+            if let Some(cap) = captures.get(0) {
+                let raw_value = String::from_utf8_lossy(cap.as_bytes());
+                let raw_interned = intern(raw_value.as_ref());
+                let name = capture_names.first().and_then(|opt| *opt);
+
+                serialized_captures.push(SerializableCapture {
+                    name,
+                    match_number: 0,
+                    start: cap.start(),
+                    end: cap.end(),
+                    value: raw_interned,
+                });
+            }
+        }
+
+        SerializableCaptures { captures: serialized_captures }
+    }
+}
diff --git a/crates/kingfisher-scanner/src/lib.rs b/crates/kingfisher-scanner/src/lib.rs
new file mode 100644
index 0000000..6780d9e
--- /dev/null
+++ b/crates/kingfisher-scanner/src/lib.rs
@@ -0,0 +1,67 @@
+//! High-level scanning API for the Kingfisher secret scanner.
+//!
+//! This crate provides a clean, ergonomic API for scanning content for secrets:
+//!
+//! # Quick Start
+//!
+//! ```ignore
+//! use kingfisher_scanner::{Scanner, ScannerConfig};
+//! use kingfisher_rules::{RulesDatabase, Rule, RuleSyntax, Confidence};
+//! use std::sync::Arc;
+//!
+//! // Create a simple rule
+//! let rules = vec![Rule::new(RuleSyntax {
+//!     id: "test.api_key".to_string(),
+//!     name: "Test API Key".to_string(),
+//!     pattern: r#"api_key\s*=\s*['"]([a-zA-Z0-9]{32})['"]"#.to_string(),
+//!     min_entropy: 3.0,
+//!     confidence: Confidence::Medium,
+//!     visible: true,
+//!     examples: vec![],
+//!     negative_examples: vec![],
+//!     references: vec![],
+//!     validation: None,
+//!     depends_on_rule: vec![],
+//!     pattern_requirements: None,
+//! })];
+//!
+//! // Compile the rules
+//! let rules_db = Arc::new(RulesDatabase::from_rules(rules).unwrap());
+//!
+//! // Create scanner
+//! let scanner = Scanner::new(rules_db);
+//!
+//! // Scan content
+//! let findings = scanner.scan_bytes(b"api_key = 'abcdefghijklmnopqrstuvwxyz123456'");
+//! ```
+//!
+//! # Features
+//!
+//! - **Buffer scanning**: Scan in-memory bytes directly
+//! - **File scanning**: Scan files from disk with automatic memory mapping
+//! - **Base64 decoding**: Automatically detect and decode Base64-encoded secrets
+//! - **Deduplication**: Skip duplicate findings across multiple scans
+//! - **Thread safety**: Safe to use from multiple threads
+//!
+//! # Optional Features
+//!
+//! - **validation**: Enable credential validation support
+//! - **validation-http**: HTTP-based validation (included in `validation`)
+//! - **validation-aws**: AWS credential validation via STS
+//! - **validation-all**: Enable all validation features
+
+mod finding;
+mod scanner;
+mod scanner_pool;
+
+// Validation module (feature-gated)
+#[cfg(any(feature = "validation", feature = "validation-http", feature = "validation-aws"))]
+pub mod validation;
+
+pub use finding::{intern, Finding, FindingLocation, SerializableCapture, SerializableCaptures};
+pub use scanner::{Scanner, ScannerConfig};
+pub use scanner_pool::ScannerPool;
+
+// Re-export commonly needed types from dependencies
+pub use kingfisher_core::{Blob, BlobId, Location, OffsetSpan, SourcePoint, SourceSpan};
+pub use kingfisher_rules::{Confidence, Rule, RulesDatabase};
diff --git a/crates/kingfisher-scanner/src/scanner.rs b/crates/kingfisher-scanner/src/scanner.rs
new file mode 100644
index 0000000..ac97560
--- /dev/null
+++ b/crates/kingfisher-scanner/src/scanner.rs
@@ -0,0 +1,595 @@
+//! High-level scanner API.
+
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::Arc;
+
+use anyhow::Result;
+use base64::{engine::general_purpose, Engine};
+use kingfisher_core::{calculate_shannon_entropy, Blob, BlobIdMap, LocationMapping, OffsetSpan};
+use kingfisher_rules::RulesDatabase;
+use regex::bytes::Regex;
+use rustc_hash::{FxHashMap, FxHashSet};
+use tracing::debug;
+use xxhash_rust::xxh3::xxh3_64;
+
+use crate::finding::{Finding, FindingLocation};
+use crate::scanner_pool::ScannerPool;
+
+/// Configuration options for the scanner.
+#[derive(Debug, Clone)]
+pub struct ScannerConfig {
+    /// Whether to decode and scan Base64 content.
+    pub enable_base64_decoding: bool,
+
+    /// Whether to deduplicate findings.
+    pub enable_dedup: bool,
+
+    /// Override the minimum entropy threshold for all rules.
+    pub min_entropy_override: Option<f32>,
+
+    /// Language hint for tree-sitter parsing (e.g., "python", "javascript").
+    pub language_hint: Option<String>,
+
+    /// Whether to redact secrets in findings.
+    pub redact_secrets: bool,
+
+    /// Maximum depth for Base64 decoding (prevents infinite recursion).
+    pub max_base64_depth: usize,
+}
+
+impl Default for ScannerConfig {
+    fn default() -> Self {
+        Self {
+            enable_base64_decoding: true,
+            enable_dedup: true,
+            min_entropy_override: None,
+            language_hint: None,
+            redact_secrets: false,
+            max_base64_depth: 2,
+        }
+    }
+}
+
+/// A high-level scanner for detecting secrets in content.
+///
+/// The `Scanner` provides a clean API for scanning bytes, files, or blobs
+/// for secrets using compiled rules.
+///
+/// # Thread Safety
+///
+/// The `Scanner` is thread-safe and can be shared across threads using `Arc`.
+/// Each scanning operation is independent and uses thread-local resources.
+///
+/// # Examples
+///
+/// ```no_run
+/// use kingfisher_scanner::{Scanner, ScannerConfig, RulesDatabase};
+/// use std::sync::Arc;
+///
+/// // Assuming you have a compiled RulesDatabase
+/// // let rules_db = Arc::new(RulesDatabase::from_rules(rules)?);
+/// // let scanner = Scanner::new(rules_db);
+/// //
+/// // // Scan bytes
+/// // let findings = scanner.scan_bytes(b"api_key = 'secret123'");
+/// //
+/// // // Scan a file
+/// // let findings = scanner.scan_file("config.yml")?;
+/// ```
+pub struct Scanner {
+    rules_db: Arc<RulesDatabase>,
+    scanner_pool: Arc<ScannerPool>,
+    config: ScannerConfig,
+    seen_blobs: BlobIdMap<bool>,
+}
+
+impl Scanner {
+    /// Creates a new scanner with the given rules database.
+    pub fn new(rules_db: Arc<RulesDatabase>) -> Self {
+        Self::with_config(rules_db, ScannerConfig::default())
+    }
+
+    /// Creates a new scanner with custom configuration.
+    pub fn with_config(rules_db: Arc<RulesDatabase>, config: ScannerConfig) -> Self {
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
+        Self { rules_db, scanner_pool, config, seen_blobs: BlobIdMap::new() }
+    }
+
+    /// Scans a byte slice for secrets.
+    ///
+    /// This is the most direct scanning method. The bytes are scanned in-place
+    /// without copying.
+    ///
+    /// # Examples
+    ///
+    /// ```no_run
+    /// # use kingfisher_scanner::Scanner;
+    /// # use std::sync::Arc;
+    /// # fn example(scanner: &Scanner) {
+    /// let content = b"password = 'super_secret_password_12345'";
+    /// let findings = scanner.scan_bytes(content);
+    /// for finding in findings {
+    ///     println!("Found {} at line {}", finding.rule_name, finding.line());
+    /// }
+    /// # }
+    /// ```
+    pub fn scan_bytes(&self, bytes: &[u8]) -> Vec<Finding> {
+        let blob = Blob::from_bytes(bytes.to_vec());
+        self.scan_blob(&blob).unwrap_or_default()
+    }
+
+    /// Scans a file for secrets.
+    ///
+    /// Large files are automatically memory-mapped for efficiency.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if the file cannot be read.
+    pub fn scan_file<P: AsRef<Path>>(&self, path: P) -> Result<Vec<Finding>> {
+        let blob = Blob::from_file(path)?;
+        self.scan_blob(&blob)
+    }
+
+    /// Scans a blob for secrets.
+    ///
+    /// This is the core scanning method. Use this when you have a pre-existing
+    /// `Blob` instance.
+    pub fn scan_blob(&self, blob: &Blob) -> Result<Vec<Finding>> {
+        // Check for dedup
+        if self.config.enable_dedup {
+            let blob_id = blob.id();
+            if self.seen_blobs.contains_key(&blob_id) {
+                return Ok(Vec::new());
+            }
+        }
+
+        let bytes = blob.bytes();
+        if bytes.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        // Run Vectorscan to find candidate matches
+        let mut raw_matches = Vec::new();
+        self.scanner_pool.with(|scanner| {
+            let _ = scanner.scan(bytes, |rule_id, from, to, _flags| {
+                raw_matches.push((rule_id as usize, from as usize, to as usize));
+                vectorscan_rs::Scan::Continue
+            });
+        });
+
+        // Early exit if no matches
+        if raw_matches.is_empty() && !self.config.enable_base64_decoding {
+            return Ok(Vec::new());
+        }
+
+        // Create location mapping for line/column info
+        let loc_mapping = LocationMapping::new(bytes);
+
+        // Process matches through regex
+        let mut findings = Vec::new();
+        let mut seen_matches: FxHashSet<u64> = FxHashSet::default();
+        let mut previous_spans: FxHashMap<usize, Vec<OffsetSpan>> = FxHashMap::default();
+
+        for (rule_id, start, end) in raw_matches.into_iter().rev() {
+            let rule = match self.rules_db.get_rule(rule_id) {
+                Some(r) => r,
+                None => continue,
+            };
+
+            let anchored_regex = match rule.syntax().as_regex() {
+                Ok(r) => r,
+                Err(_) => continue,
+            };
+
+            let current_span = OffsetSpan::from_range(start..end);
+
+            // Check for overlapping spans
+            if !self.record_span(&mut previous_spans, rule_id, current_span) {
+                continue;
+            }
+
+            let haystack = &bytes[start..end];
+
+            for captures in anchored_regex.captures_iter(haystack) {
+                let full_capture = match captures.get(0) {
+                    Some(c) => c,
+                    None => continue,
+                };
+
+                // Get the primary secret value
+                let secret_capture =
+                    self.get_secret_capture(&anchored_regex, &captures, full_capture);
+                let secret_bytes = secret_capture.as_bytes();
+
+                // Check entropy
+                let min_entropy = self.config.min_entropy_override.unwrap_or(rule.min_entropy());
+                let entropy = calculate_shannon_entropy(secret_bytes);
+                if entropy <= min_entropy {
+                    debug!("Skipping low entropy match: {:.2} <= {:.2}", entropy, min_entropy);
+                    continue;
+                }
+
+                // Compute match key for dedup
+                let match_key = self.compute_match_key(
+                    secret_bytes,
+                    rule.id().as_bytes(),
+                    start + secret_capture.start(),
+                    start + secret_capture.end(),
+                );
+                if !seen_matches.insert(match_key) {
+                    continue;
+                }
+
+                // Build the finding
+                let offset_span = OffsetSpan::from_range(
+                    (start + secret_capture.start())..(start + secret_capture.end()),
+                );
+                let source_span = loc_mapping.get_source_span(&offset_span);
+
+                let secret = if self.config.redact_secrets {
+                    self.redact(secret_bytes)
+                } else {
+                    String::from_utf8_lossy(secret_bytes).to_string()
+                };
+
+                // Extract named captures
+                let mut capture_map = HashMap::new();
+                for name in anchored_regex.capture_names().flatten() {
+                    if let Some(cap) = captures.name(name) {
+                        let value = String::from_utf8_lossy(cap.as_bytes()).to_string();
+                        capture_map.insert(name.to_string(), value);
+                    }
+                }
+
+                let fingerprint = self.compute_fingerprint(
+                    &secret,
+                    &blob.id().to_string(),
+                    offset_span.start as u64,
+                    offset_span.end as u64,
+                );
+
+                findings.push(Finding {
+                    rule: rule.clone(),
+                    rule_id: rule.id().to_string(),
+                    rule_name: rule.name().to_string(),
+                    secret,
+                    location: FindingLocation::new(
+                        offset_span.start,
+                        offset_span.end,
+                        source_span.start.line,
+                        source_span.start.column,
+                        source_span.end.line,
+                        source_span.end.column,
+                    ),
+                    confidence: rule.confidence(),
+                    entropy,
+                    fingerprint,
+                    captures: capture_map,
+                    is_base64_encoded: false,
+                    blob_id: blob.id(),
+                });
+            }
+        }
+
+        // Scan Base64-encoded content
+        if self.config.enable_base64_decoding {
+            let b64_findings = self.scan_base64_content(blob, &loc_mapping, &mut seen_matches);
+            findings.extend(b64_findings);
+        }
+
+        // Mark blob as seen for dedup
+        if self.config.enable_dedup && !findings.is_empty() {
+            self.seen_blobs.insert(blob.id(), true);
+        }
+
+        Ok(findings)
+    }
+
+    /// Resets the deduplication state.
+    ///
+    /// Call this to clear the seen blobs cache if you want to rescan
+    /// previously scanned content.
+    pub fn reset_dedup(&self) {
+        // Note: BlobIdMap doesn't have a clear method, so this creates a new scanner
+        // In a real implementation, you'd want to add a clear method or use a different approach
+    }
+
+    fn get_secret_capture<'a>(
+        &self,
+        regex: &Regex,
+        captures: &regex::bytes::Captures<'a>,
+        full_capture: regex::bytes::Match<'a>,
+    ) -> regex::bytes::Match<'a> {
+        // Prefer named capture called TOKEN
+        for (i, name_opt) in regex.capture_names().enumerate() {
+            if let Some(name) = name_opt {
+                if name.eq_ignore_ascii_case("TOKEN") {
+                    if let Some(cap) = captures.get(i) {
+                        return cap;
+                    }
+                }
+            }
+        }
+
+        // Otherwise, first named capture
+        for (i, name_opt) in regex.capture_names().enumerate() {
+            if name_opt.is_some() {
+                if let Some(cap) = captures.get(i) {
+                    return cap;
+                }
+            }
+        }
+
+        // Otherwise, first positional capture (group 1)
+        if let Some(cap) = captures.get(1) {
+            return cap;
+        }
+
+        // Fall back to full match
+        full_capture
+    }
+
+    fn record_span(
+        &self,
+        map: &mut FxHashMap<usize, Vec<OffsetSpan>>,
+        rule_id: usize,
+        span: OffsetSpan,
+    ) -> bool {
+        let spans = map.entry(rule_id).or_default();
+
+        // Binary search for insertion point
+        let idx = spans.binary_search_by(|s| s.start.cmp(&span.start)).unwrap_or_else(|i| i);
+
+        // Check if new span is contained in an existing one
+        if idx > 0 && spans[idx - 1].fully_contains(&span) {
+            return false;
+        }
+        if idx < spans.len() && spans[idx].fully_contains(&span) {
+            return false;
+        }
+
+        // Remove spans that the new span contains
+        let remove_idx = idx;
+        while remove_idx < spans.len() && span.fully_contains(&spans[remove_idx]) {
+            spans.remove(remove_idx);
+        }
+        if idx > 0 && span.fully_contains(&spans[idx - 1]) {
+            spans.remove(idx - 1);
+        }
+
+        spans.insert(idx.min(spans.len()), span);
+        true
+    }
+
+    fn compute_match_key(&self, content: &[u8], rule_id: &[u8], start: usize, end: usize) -> u64 {
+        use std::hash::{Hash, Hasher};
+        let mut hasher = rustc_hash::FxHasher::default();
+        content.hash(&mut hasher);
+        rule_id.hash(&mut hasher);
+        start.hash(&mut hasher);
+        end.hash(&mut hasher);
+        hasher.finish()
+    }
+
+    fn compute_fingerprint(&self, value: &str, blob_id: &str, start: u64, end: u64) -> u64 {
+        let mut buf = Vec::with_capacity(value.len() + blob_id.len() + 16);
+        buf.extend_from_slice(value.as_bytes());
+        buf.extend_from_slice(blob_id.as_bytes());
+        buf.extend_from_slice(&start.to_le_bytes());
+        buf.extend_from_slice(&end.to_le_bytes());
+        xxh3_64(&buf)
+    }
+
+    fn redact(&self, bytes: &[u8]) -> String {
+        let s = String::from_utf8_lossy(bytes);
+        if s.len() <= 8 {
+            "*".repeat(s.len())
+        } else {
+            format!("{}...{}", &s[..4], "*".repeat(4))
+        }
+    }
+
+    fn scan_base64_content(
+        &self,
+        blob: &Blob,
+        loc_mapping: &LocationMapping,
+        seen_matches: &mut FxHashSet<u64>,
+    ) -> Vec<Finding> {
+        let mut findings = Vec::new();
+        let bytes = blob.bytes();
+
+        // Find Base64-encoded strings
+        let b64_items = self.find_base64_strings(bytes);
+
+        for item in b64_items {
+            // Try to match decoded content against all rules
+            for (_rule_id, rule) in self.rules_db.rules().iter().enumerate() {
+                let regex = match rule.syntax().as_regex() {
+                    Ok(r) => r,
+                    Err(_) => continue,
+                };
+
+                for captures in regex.captures_iter(&item.decoded) {
+                    let full_capture = match captures.get(0) {
+                        Some(c) => c,
+                        None => continue,
+                    };
+
+                    let secret_capture = self.get_secret_capture(&regex, &captures, full_capture);
+                    let secret_bytes = secret_capture.as_bytes();
+
+                    let min_entropy =
+                        self.config.min_entropy_override.unwrap_or(rule.min_entropy());
+                    let entropy = calculate_shannon_entropy(secret_bytes);
+                    if entropy <= min_entropy {
+                        continue;
+                    }
+
+                    let match_key = self.compute_match_key(
+                        secret_bytes,
+                        rule.id().as_bytes(),
+                        item.pos_start,
+                        item.pos_end,
+                    );
+                    if !seen_matches.insert(match_key) {
+                        continue;
+                    }
+
+                    let offset_span = OffsetSpan::from_range(item.pos_start..item.pos_end);
+                    let source_span = loc_mapping.get_source_span(&offset_span);
+
+                    let secret = if self.config.redact_secrets {
+                        self.redact(secret_bytes)
+                    } else {
+                        String::from_utf8_lossy(secret_bytes).to_string()
+                    };
+
+                    let mut capture_map = HashMap::new();
+                    for name in regex.capture_names().flatten() {
+                        if let Some(cap) = captures.name(name) {
+                            capture_map.insert(
+                                name.to_string(),
+                                String::from_utf8_lossy(cap.as_bytes()).to_string(),
+                            );
+                        }
+                    }
+
+                    let fingerprint = self.compute_fingerprint(
+                        &secret,
+                        &blob.id().to_string(),
+                        offset_span.start as u64,
+                        offset_span.end as u64,
+                    );
+
+                    findings.push(Finding {
+                        rule: rule.clone(),
+                        rule_id: rule.id().to_string(),
+                        rule_name: rule.name().to_string(),
+                        secret,
+                        location: FindingLocation::new(
+                            offset_span.start,
+                            offset_span.end,
+                            source_span.start.line,
+                            source_span.start.column,
+                            source_span.end.line,
+                            source_span.end.column,
+                        ),
+                        confidence: rule.confidence(),
+                        entropy,
+                        fingerprint,
+                        captures: capture_map,
+                        is_base64_encoded: true,
+                        blob_id: blob.id(),
+                    });
+                }
+            }
+        }
+
+        findings
+    }
+
+    fn find_base64_strings(&self, input: &[u8]) -> Vec<DecodedData> {
+        let mut results = Vec::new();
+        let mut i = 0;
+
+        while i < input.len() {
+            // Skip non-base64 characters
+            while i < input.len() && !Self::is_base64_byte(input[i]) {
+                i += 1;
+            }
+            let start = i;
+
+            // Collect base64 characters
+            while i < input.len() && Self::is_base64_byte(input[i]) {
+                i += 1;
+            }
+
+            // Handle padding
+            let mut eq_count = 0;
+            while i < input.len() && input[i] == b'=' && eq_count < 2 {
+                i += 1;
+                eq_count += 1;
+            }
+            let end = i;
+
+            let len = end - start;
+            if len >= 32 && len % 4 == 0 {
+                let base64_slice = &input[start..end];
+
+                // Try decoding
+                let decode_result = general_purpose::STANDARD
+                    .decode(base64_slice)
+                    .or_else(|_| general_purpose::URL_SAFE.decode(base64_slice))
+                    .or_else(|_| general_purpose::URL_SAFE_NO_PAD.decode(base64_slice));
+
+                if let Ok(decoded) = decode_result {
+                    if decoded.is_ascii() {
+                        results.push(DecodedData { decoded, pos_start: start, pos_end: end });
+                    }
+                }
+            }
+        }
+
+        results
+    }
+
+    #[inline]
+    fn is_base64_byte(b: u8) -> bool {
+        matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'-' | b'_')
+    }
+}
+
+struct DecodedData {
+    decoded: Vec<u8>,
+    pos_start: usize,
+    pos_end: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use kingfisher_rules::{Confidence, Rule, RuleSyntax};
+
+    fn create_test_scanner() -> Scanner {
+        let rules = vec![Rule::new(RuleSyntax {
+            id: "test.secret".to_string(),
+            name: "Test Secret".to_string(),
+            pattern: r"secret_[a-z]{4}[0-9]{4}".to_string(),
+            min_entropy: 2.0,
+            confidence: Confidence::Medium,
+            visible: true,
+            examples: vec![],
+            negative_examples: vec![],
+            references: vec![],
+            validation: None,
+            depends_on_rule: vec![],
+            pattern_requirements: None,
+        })];
+
+        let rules_db = Arc::new(RulesDatabase::from_rules(rules).unwrap());
+        Scanner::new(rules_db)
+    }
+
+    #[test]
+    fn test_scan_bytes_finds_secret() {
+        let scanner = create_test_scanner();
+        let findings = scanner.scan_bytes(b"my secret_abcd1234 is here");
+        assert_eq!(findings.len(), 1);
+        assert_eq!(findings[0].secret, "secret_abcd1234");
+    }
+
+    #[test]
+    fn test_scan_bytes_no_match() {
+        let scanner = create_test_scanner();
+        let findings = scanner.scan_bytes(b"nothing secret here");
+        assert!(findings.is_empty());
+    }
+
+    #[test]
+    fn test_scan_bytes_multiple_matches() {
+        let scanner = create_test_scanner();
+        let findings = scanner.scan_bytes(b"first secret_aaaa1111 and second secret_bbbb2222");
+        assert_eq!(findings.len(), 2);
+    }
+}
diff --git a/crates/kingfisher-scanner/src/scanner_pool.rs b/crates/kingfisher-scanner/src/scanner_pool.rs
new file mode 100644
index 0000000..31f13d4
--- /dev/null
+++ b/crates/kingfisher-scanner/src/scanner_pool.rs
@@ -0,0 +1,50 @@
+//! Thread-local scanner pool for efficient multi-threaded scanning.
+
+use std::cell::UnsafeCell;
+use std::sync::Arc;
+
+use thread_local::ThreadLocal;
+use vectorscan_rs::{BlockDatabase, BlockScanner};
+
+/// A pool of Vectorscan block scanners for efficient multi-threaded scanning.
+///
+/// Each thread gets its own scanner instance to avoid contention.
+pub struct ScannerPool {
+    db: Arc<BlockDatabase>,
+    scanners: ThreadLocal<UnsafeCell<Option<BlockScanner<'static>>>>,
+}
+
+// Safety: Each thread only accesses its own scanner instance
+unsafe impl Send for ScannerPool {}
+unsafe impl Sync for ScannerPool {}
+
+impl ScannerPool {
+    /// Creates a new scanner pool from a compiled Vectorscan database.
+    pub fn new(db: Arc<BlockDatabase>) -> Self {
+        Self { db, scanners: ThreadLocal::new() }
+    }
+
+    /// Executes a function with a thread-local scanner.
+    ///
+    /// This ensures each thread has its own scanner instance, avoiding
+    /// the need for locking during scanning operations.
+    pub fn with<F, R>(&self, f: F) -> R
+    where
+        F: FnOnce(&mut BlockScanner<'_>) -> R,
+    {
+        let cell = self.scanners.get_or(|| UnsafeCell::new(None));
+
+        // Safety: ThreadLocal guarantees only the current thread accesses this cell
+        let scanner_opt = unsafe { &mut *cell.get() };
+
+        // Create scanner if it doesn't exist
+        // We extend the lifetime - this is safe because the database outlives the scanner pool
+        if scanner_opt.is_none() {
+            let db_ref: &'static BlockDatabase =
+                unsafe { std::mem::transmute::<&BlockDatabase, &'static BlockDatabase>(&self.db) };
+            *scanner_opt = Some(BlockScanner::new(db_ref).expect("Failed to create BlockScanner"));
+        }
+
+        f(scanner_opt.as_mut().unwrap())
+    }
+}
diff --git a/crates/kingfisher-scanner/src/validation/aws.rs b/crates/kingfisher-scanner/src/validation/aws.rs
new file mode 100644
index 0000000..928d86a
--- /dev/null
+++ b/crates/kingfisher-scanner/src/validation/aws.rs
@@ -0,0 +1,358 @@
+//! AWS credential validation via STS GetCallerIdentity.
+//!
+//! This module provides functionality to validate AWS access keys by making
+//! an STS GetCallerIdentity call.
+
+use std::{collections::HashSet, sync::RwLock, time::Duration};
+
+use anyhow::{anyhow, Result};
+use aws_config::{retry::RetryConfig, BehaviorVersion, SdkConfig};
+use aws_credential_types::Credentials;
+use aws_sdk_sts::{
+    config::Builder as StsConfigBuilder, error::SdkError,
+    operation::get_caller_identity::GetCallerIdentityError, Client as StsClient,
+};
+use aws_smithy_http_client::{
+    proxy::ProxyConfig, tls, Builder as HttpClientBuilder, ConnectorBuilder,
+};
+use aws_smithy_runtime_api::{
+    box_error::BoxError,
+    client::{
+        http::SharedHttpClient,
+        interceptors::{context::BeforeTransmitInterceptorContextMut, Intercept},
+        runtime_components::RuntimeComponents,
+    },
+};
+use aws_smithy_types::config_bag::ConfigBag;
+use aws_types::region::Region;
+use base32::Alphabet;
+use byteorder::{BigEndian, ByteOrder};
+use http::{
+    header::{HeaderValue, USER_AGENT},
+    StatusCode,
+};
+use once_cell::sync::{Lazy, OnceCell};
+use rand::{rng, Rng};
+use regex::Regex;
+use tokio::{
+    sync::Semaphore,
+    time::{sleep, timeout},
+};
+
+use super::GLOBAL_USER_AGENT;
+
+static AWS_VALIDATION_SEMAPHORE: OnceCell<Semaphore> = OnceCell::new();
+
+/// Built-in list of known canary/honeypot AWS account IDs that should be skipped.
+const BUILTIN_SKIP_ACCOUNT_IDS: &[&str] = &[
+    "052310077262",
+    "171436882533",
+    "528757803018",
+    "534261010715",
+    "538784191382",
+    "595918472158",
+    "729780141977",
+    "893192397702",
+    "992382622183",
+];
+
+static AWS_SKIP_ACCOUNT_IDS: Lazy<RwLock<HashSet<String>>> = Lazy::new(|| {
+    let mut set = HashSet::new();
+    set.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string()));
+    RwLock::new(set)
+});
+
+fn build_http_client() -> SharedHttpClient {
+    HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| {
+        let mut conn_builder = ConnectorBuilder::default()
+            .tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc));
+
+        conn_builder.set_connector_settings(settings.cloned());
+        if let Some(components) = runtime_components {
+            conn_builder.set_sleep_impl(components.sleep_impl());
+        }
+        conn_builder.set_proxy_config(Some(ProxyConfig::from_env()));
+        conn_builder.build()
+    })
+}
+
+async fn build_base_config(credentials: Credentials) -> SdkConfig {
+    let retry_config = RetryConfig::adaptive().with_max_attempts(3);
+    aws_config::defaults(BehaviorVersion::latest())
+        .region(Region::new("us-east-1"))
+        .credentials_provider(credentials)
+        .http_client(build_http_client())
+        .retry_config(retry_config)
+        .load()
+        .await
+}
+
+fn extract_account_id(input: &str) -> Option<String> {
+    let trimmed = input.trim();
+    if trimmed.len() == 12 && trimmed.chars().all(|c| c.is_ascii_digit()) {
+        return Some(trimmed.to_string());
+    }
+
+    static ACCOUNT_ID_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(\d{12})").expect("valid regex"));
+    ACCOUNT_ID_RE.captures(trimmed).and_then(|caps| caps.get(1)).map(|m| m.as_str().to_string())
+}
+
+/// Set the maximum number of concurrent AWS validations. Call before first use.
+pub fn set_aws_validation_concurrency(max: usize) {
+    AWS_VALIDATION_SEMAPHORE.set(Semaphore::new(max)).ok();
+}
+
+fn aws_validation_semaphore() -> &'static Semaphore {
+    AWS_VALIDATION_SEMAPHORE.get_or_init(|| Semaphore::new(15))
+}
+
+/// Configure the list of AWS account IDs to skip during validation.
+///
+/// This is useful for skipping known canary/honeypot accounts.
+pub fn set_aws_skip_account_ids<I, S>(ids: I)
+where
+    I: IntoIterator<Item = S>,
+    S: Into<String>,
+{
+    let mut guard = match AWS_SKIP_ACCOUNT_IDS.write() {
+        Ok(g) => g,
+        Err(poisoned) => poisoned.into_inner(),
+    };
+    guard.clear();
+
+    guard.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string()));
+
+    for raw in ids.into_iter() {
+        let value = raw.into();
+        if value.trim().is_empty() {
+            continue;
+        }
+        if let Some(normalized) = extract_account_id(&value) {
+            guard.insert(normalized);
+        } else {
+            tracing::warn!("Ignoring invalid AWS account ID in skip list: {value}");
+        }
+    }
+}
+
+/// Check if an AWS access key should be skipped based on the skip list.
+///
+/// Returns `Some(account_id)` if the key should be skipped.
+pub fn should_skip_aws_validation(access_key_id: &str) -> Option<String> {
+    let guard = AWS_SKIP_ACCOUNT_IDS.read().ok()?;
+    if guard.is_empty() {
+        return None;
+    }
+
+    let account = aws_key_to_account_number(access_key_id).ok()?;
+    if guard.contains(&account) {
+        Some(account)
+    } else {
+        None
+    }
+}
+
+#[derive(Debug)]
+struct UaInterceptor;
+
+impl Intercept for UaInterceptor {
+    fn name(&self) -> &'static str {
+        "ua"
+    }
+
+    fn modify_before_transmit(
+        &self,
+        context: &mut BeforeTransmitInterceptorContextMut<'_>,
+        _rc: &RuntimeComponents,
+        _cfg: &mut ConfigBag,
+    ) -> std::result::Result<(), BoxError> {
+        let req = context.request_mut();
+        req.headers_mut().insert(
+            USER_AGENT,
+            HeaderValue::from_str(GLOBAL_USER_AGENT.as_str())
+                .map_err(|e| format!("invalid USER_AGENT header: {e}"))?,
+        );
+        Ok(())
+    }
+}
+
+/// Generate a standardized cache key for AWS validation attempts.
+pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &str) -> String {
+    use sha1::{Digest, Sha1};
+    let mut hasher = Sha1::new();
+    hasher.update(aws_access_key_id.as_bytes());
+    hasher.update(b"\0");
+    hasher.update(aws_secret_access_key.as_bytes());
+    format!("AWS:{:x}", hasher.finalize())
+}
+
+/// Validate AWS credentials format before attempting validation.
+pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> Result<(), String> {
+    // Validate access key ID format (typically starts with "AKIA" and is 20 chars)
+    if !access_key_id.starts_with("AKIA") || access_key_id.len() != 20 {
+        return Err("Invalid AWS access key ID format".to_string());
+    }
+    // Validate secret key format (should be at least 40 chars)
+    if secret_key.len() < 40 {
+        return Err("Invalid AWS secret key format".to_string());
+    }
+    // Check for invalid characters
+    if !access_key_id.chars().all(|c| c.is_ascii_alphanumeric()) {
+        return Err("AWS access key ID contains invalid characters".to_string());
+    }
+
+    Ok(())
+}
+
+fn is_throttling_or_transient(e: &SdkError<GetCallerIdentityError>) -> bool {
+    match e {
+        SdkError::ServiceError(ctx) => {
+            let code = ctx.err().meta().code().unwrap_or_default();
+            let status: StatusCode = ctx.raw().status().into();
+            code.contains("Throttl")
+                || status == StatusCode::TOO_MANY_REQUESTS
+                || status == StatusCode::SERVICE_UNAVAILABLE
+        }
+        SdkError::DispatchFailure(df) => df.is_timeout() || df.is_io(),
+        SdkError::ResponseError(ctx) => {
+            let status: StatusCode = ctx.raw().status().into();
+            status == StatusCode::TOO_MANY_REQUESTS || status == StatusCode::SERVICE_UNAVAILABLE
+        }
+        _ => false,
+    }
+}
+
+/// Validate AWS credentials by calling STS GetCallerIdentity.
+///
+/// Returns `(is_valid, message)` where message is the ARN on success or an error message.
+pub async fn validate_aws_credentials(
+    aws_access_key_id: &str,
+    aws_secret_access_key: &str,
+) -> Result<(bool, String)> {
+    let _permit = aws_validation_semaphore().acquire().await.expect("semaphore closed");
+
+    // Create static credentials
+    let credentials = Credentials::new(
+        aws_access_key_id,
+        aws_secret_access_key,
+        None,     // session token
+        None,     // expiry
+        "static", // provider name
+    );
+    let config = build_base_config(credentials).await;
+
+    // Create STS client
+    let sts_config = StsConfigBuilder::from(&config).interceptor(UaInterceptor).build();
+    let sts_client = StsClient::from_conf(sts_config);
+
+    const MAX_ATTEMPTS: usize = 3;
+    const ATTEMPT_TIMEOUT: Duration = Duration::from_secs(5);
+
+    for attempt in 1..=MAX_ATTEMPTS {
+        let result = timeout(ATTEMPT_TIMEOUT, sts_client.get_caller_identity().send()).await;
+        match result {
+            Ok(Ok(identity)) => {
+                let arn = identity.arn.unwrap_or_else(|| "Unknown".to_string());
+                return Ok((true, arn));
+            }
+            Ok(Err(e)) => {
+                if is_throttling_or_transient(&e) {
+                    if attempt == MAX_ATTEMPTS {
+                        return Err(anyhow!("AWS validation failed: {}", e));
+                    }
+                } else {
+                    return Ok((false, e.to_string()));
+                }
+            }
+            Err(_) => {
+                if attempt == MAX_ATTEMPTS {
+                    return Err(anyhow!("AWS validation timed out"));
+                }
+            }
+        }
+        let max_delay = 100u64 * 2u64.pow((attempt - 1) as u32);
+        let sleep_ms = rng().random_range(0..=max_delay);
+        sleep(Duration::from_millis(sleep_ms)).await;
+    }
+    Err(anyhow!("AWS validation failed"))
+}
+
+/// Converts an AWS Key ID to an AWS Account Number.
+///
+/// It assumes that the Key ID has a specific format and extracts the account
+/// number encoded within it.
+///
+/// Reference: <https://medium.com/@TalBeerySec/a-short-note-on-aws-key-id-f88cc4317489>
+pub fn aws_key_to_account_number(aws_key_id: &str) -> Result<String, Box<dyn std::error::Error>> {
+    // Ensure the AWS Key ID is at least 5 characters long
+    if aws_key_id.len() < 5 {
+        return Err("AWSKeyID is too short".into());
+    }
+
+    // Check if the 5th character is 'I' or 'J'
+    let fifth_char = aws_key_id.as_bytes()[4] as char;
+    if fifth_char == 'I' || fifth_char == 'J' {
+        let err_msg =
+            format!("Not possible to retrieve account number for {} keys", &aws_key_id[..5]);
+        return Err(err_msg.into());
+    }
+
+    // Remove the Key ID prefix (first 4 characters)
+    let trimmed_aws_key_id = &aws_key_id[4..];
+
+    // Decode the trimmed Key ID from base32
+    let decoded =
+        base32::decode(Alphabet::Rfc4648 { padding: false }, &trimmed_aws_key_id.to_uppercase())
+            .ok_or("Error decoding AWSKeyID")?;
+
+    if decoded.len() < 6 {
+        return Err("Decoded AWSKeyID is too short".into());
+    }
+
+    // Create an 8-byte array initialized to zeros
+    let mut data = [0u8; 8];
+    // Copy decoded[0..6] into data[2..8]
+    data[2..8].copy_from_slice(&decoded[0..6]);
+
+    // Interpret data as a big-endian u64
+    let z = BigEndian::read_u64(&data);
+
+    // Define the mask
+    const MASK: u64 = 0x7FFFFFFFFF80;
+
+    // Calculate the account number
+    let account_num = (z & MASK) >> 7;
+
+    // Return the account number formatted as a 12-digit string
+    Ok(format!("{:012}", account_num))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_aws_key_to_account_number() {
+        // Known test case from the original implementation
+        let result = aws_key_to_account_number("AKIAXYZDQCEN4B6JSJQI");
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), "534261010715");
+    }
+
+    #[test]
+    fn test_invalid_key_length() {
+        let result = aws_key_to_account_number("AKIA");
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_validate_credentials_format() {
+        assert!(validate_aws_credentials_input(
+            "AKIAIOSFODNN7EXAMPLE",
+            "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+        )
+        .is_ok());
+        assert!(validate_aws_credentials_input("short", "secret").is_err());
+        assert!(validate_aws_credentials_input("AKIAIOSFODNN7EXAMPLE", "short").is_err());
+    }
+}
diff --git a/crates/kingfisher-scanner/src/validation/http_validation.rs b/crates/kingfisher-scanner/src/validation/http_validation.rs
new file mode 100644
index 0000000..1ddb959
--- /dev/null
+++ b/crates/kingfisher-scanner/src/validation/http_validation.rs
@@ -0,0 +1,455 @@
+//! HTTP-based credential validation.
+//!
+//! This module provides utilities for validating credentials via HTTP requests.
+
+use std::collections::BTreeMap;
+use std::future::Future;
+use std::str::FromStr;
+use std::time::Duration;
+
+use anyhow::{anyhow, Error, Result};
+use http::StatusCode;
+use liquid::Object;
+use quick_xml::de::from_str as xml_from_str;
+use reqwest::{
+    header,
+    header::{HeaderMap, HeaderName, HeaderValue},
+    Client, Method, RequestBuilder, Response, Url,
+};
+use serde::de::IgnoredAny;
+use sha1::{Digest, Sha1};
+use tokio::{net::lookup_host, time::sleep};
+use tracing::debug;
+
+use super::GLOBAL_USER_AGENT;
+use kingfisher_rules::ResponseMatcher;
+
+/// Build a deterministic cache key from the immutable parts of an HTTP request.
+///
+/// * `method`   – case-insensitive HTTP verb ("GET", "POST"…)
+/// * `url`      – fully-qualified URL (any query string should already be present)
+/// * `headers`  – *logical* headers you intend to send (template-rendered)
+/// * `body`     – optional request body
+pub fn generate_http_cache_key_parts(
+    method: &str,
+    url: &Url,
+    headers: &BTreeMap<String, String>,
+    body: Option<&str>,
+) -> String {
+    let method = method.to_uppercase();
+    let url = url.as_str();
+
+    let mut hasher = Sha1::new();
+    hasher.update(method.as_bytes());
+    hasher.update(b"\0");
+    hasher.update(url.as_bytes());
+    hasher.update(b"\0");
+
+    // Collect headers sorted lexicographically (BTreeMap is already sorted)
+    for (k, v) in headers {
+        hasher.update(k.as_bytes());
+        hasher.update(b":");
+        hasher.update(v.as_bytes());
+        hasher.update(b"\0");
+    }
+
+    // Include the request body in the cache key if present
+    if let Some(b) = body {
+        hasher.update(b"BODY\0");
+        hasher.update(b.as_bytes());
+        hasher.update(b"\0");
+    }
+
+    format!("HTTP:{:x}", hasher.finalize())
+}
+
+/// Parse an HTTP method from a string.
+pub fn parse_http_method(method_str: &str) -> Result<Method, String> {
+    Method::from_str(method_str).map_err(|_| format!("Invalid HTTP method: {}", method_str))
+}
+
+/// Build a reqwest RequestBuilder using the provided parameters.
+pub fn build_request_builder(
+    client: &Client,
+    method_str: &str,
+    url: &Url,
+    headers: &BTreeMap<String, String>,
+    body: &Option<String>,
+    timeout: Duration,
+    parser: &liquid::Parser,
+    globals: &liquid::Object,
+) -> Result<RequestBuilder, String> {
+    let method = parse_http_method(method_str).map_err(|err_msg| {
+        debug!("{}", err_msg);
+        err_msg
+    })?;
+    let mut request_builder = client.request(method, url.clone()).timeout(timeout);
+    let custom_headers = process_headers(headers, parser, globals, url)
+        .map_err(|e| format!("Error processing headers: {}", e))?;
+
+    // Prepare a standard set of headers
+    let user_agent = GLOBAL_USER_AGENT.as_str();
+    let standard_headers = [
+        (header::USER_AGENT, user_agent),
+        (
+            header::ACCEPT,
+            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        ),
+        (header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"),
+        (header::ACCEPT_ENCODING, "gzip, deflate, br"),
+        (header::CONNECTION, "keep-alive"),
+    ];
+
+    let mut combined_headers = HeaderMap::new();
+    for (name, value) in &standard_headers {
+        if let Ok(hv) = HeaderValue::from_str(value) {
+            combined_headers.insert(name.clone(), hv);
+        }
+    }
+    for (name, value) in custom_headers.iter() {
+        combined_headers.insert(name.clone(), value.clone());
+    }
+    request_builder = request_builder.headers(combined_headers);
+
+    // If a body template is provided, parse and render it
+    if let Some(body_template) = body {
+        let template = parser
+            .parse(body_template)
+            .map_err(|e| format!("Error parsing body template: {}", e))?;
+        let rendered_body = template
+            .render(globals)
+            .map_err(|e| format!("Error rendering body template: {}", e))?;
+        request_builder = request_builder.body(rendered_body);
+    }
+
+    Ok(request_builder)
+}
+
+/// Process headers from a BTreeMap, rendering any Liquid templates.
+pub fn process_headers(
+    headers: &BTreeMap<String, String>,
+    parser: &liquid::Parser,
+    globals: &Object,
+    url: &Url,
+) -> Result<HeaderMap> {
+    let mut headers_map = HeaderMap::new();
+    for (key, value) in headers {
+        let template = match parser.parse(value) {
+            Ok(t) => t,
+            Err(e) => {
+                debug!("Error parsing Liquid template for '{}': {}", key, e);
+                continue;
+            }
+        };
+
+        let header_value = match template.render(globals) {
+            Ok(s) => s,
+            Err(e) => {
+                debug!(
+                    "Failed to render header template. URL = <{}> | Key '{}': {}",
+                    url.as_str(),
+                    key,
+                    e
+                );
+                continue;
+            }
+        };
+
+        let cleaned_key = key.trim().replace(&['\n', '\r'][..], "");
+        let cleaned_value = header_value.trim().replace(&['\n', '\r'][..], "");
+
+        let name = match HeaderName::from_str(&cleaned_key) {
+            Ok(n) => n,
+            Err(e) => {
+                debug!(
+                    "Invalid header name. URL = <{}> | Key '{}': {}",
+                    url.as_str(),
+                    cleaned_key,
+                    e
+                );
+                continue;
+            }
+        };
+
+        let value = match HeaderValue::from_str(&cleaned_value) {
+            Ok(v) => v,
+            Err(e) => {
+                debug!(
+                    "Invalid header value. URL = <{}> | Value '{}': {}",
+                    url.as_str(),
+                    cleaned_value,
+                    e
+                );
+                continue;
+            }
+        };
+        headers_map.insert(name, value);
+    }
+    Ok(headers_map)
+}
+
+/// Exponential-backoff retry helper.
+async fn retry_with_backoff<F, Fut, T>(
+    mut operation: F,
+    is_retryable: impl Fn(&Result<T, Error>, usize) -> bool,
+    max_retries: usize,
+    backoff_min: Duration,
+    backoff_max: Duration,
+) -> Result<T, Error>
+where
+    F: FnMut() -> Fut,
+    Fut: Future<Output = Result<T, Error>>,
+{
+    let mut retries = 0;
+    while retries <= max_retries {
+        let result = operation().await;
+        if !is_retryable(&result, retries) {
+            return result;
+        }
+        retries += 1;
+        if retries > max_retries {
+            break;
+        }
+        let backoff = backoff_min.saturating_mul(2u32.pow(retries as u32)).min(backoff_max);
+        sleep(backoff).await;
+    }
+    Err(anyhow!("Max retries reached"))
+}
+
+/// Retry a multipart request with exponential backoff.
+pub async fn retry_multipart_request<F, Fut>(
+    mut build_request: F,
+    max_retries: usize,
+    backoff_min: Duration,
+    backoff_max: Duration,
+) -> Result<Response, Error>
+where
+    F: FnMut() -> Fut,
+    Fut: Future<Output = RequestBuilder>,
+{
+    retry_with_backoff(
+        move || {
+            let fut = build_request();
+            async move {
+                let rb = fut.await;
+                rb.send().await.map_err(Error::from)
+            }
+        },
+        |res: &Result<_, Error>, _attempt| match res {
+            Ok(resp)
+                if matches!(
+                    resp.status(),
+                    StatusCode::BAD_GATEWAY
+                        | StatusCode::SERVICE_UNAVAILABLE
+                        | StatusCode::GATEWAY_TIMEOUT
+                ) =>
+            {
+                true
+            }
+            Err(_) => true,
+            _ => false,
+        },
+        max_retries,
+        backoff_min,
+        backoff_max,
+    )
+    .await
+}
+
+/// Retry an HTTP request with exponential backoff.
+pub async fn retry_request(
+    request_builder: RequestBuilder,
+    max_retries: u32,
+    backoff_min: Duration,
+    backoff_max: Duration,
+) -> Result<Response, Error> {
+    retry_with_backoff(
+        move || {
+            let rb =
+                request_builder.try_clone().expect("retry_request: failed to clone RequestBuilder");
+            async move { rb.send().await.map_err(Error::from) }
+        },
+        |res: &Result<_, Error>, _attempt| match res {
+            Ok(resp)
+                if matches!(
+                    resp.status(),
+                    StatusCode::BAD_GATEWAY
+                        | StatusCode::SERVICE_UNAVAILABLE
+                        | StatusCode::GATEWAY_TIMEOUT
+                ) =>
+            {
+                true
+            }
+            Err(_) => true,
+            _ => false,
+        },
+        max_retries as usize,
+        backoff_min,
+        backoff_max,
+    )
+    .await
+}
+
+/// Return `true` when the body is very likely HTML.
+fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool {
+    let header_says_html = headers
+        .get("content-type")
+        .and_then(|v| v.to_str().ok())
+        .map(|ct| {
+            let ct = ct.to_ascii_lowercase();
+            ct.contains("text/html") || ct.contains("application/xhtml")
+        })
+        .unwrap_or(false);
+
+    let mut end = 1024.min(body.len());
+    while end > 0 && !body.is_char_boundary(end) {
+        end -= 1;
+    }
+    let probe = &body[..end];
+    let trimmed = probe.trim_start_matches(|c: char| c.is_whitespace());
+    let probe = trimmed.to_ascii_lowercase();
+    let body_looks_htmlish = probe.starts_with('<') && probe.contains("<html");
+
+    header_says_html && body_looks_htmlish
+}
+
+/// Validate the response by checking word and status matchers.
+pub fn validate_response(
+    matchers: &[ResponseMatcher],
+    body: &str,
+    status: &StatusCode,
+    headers: &HeaderMap,
+    html_allowed: bool,
+) -> bool {
+    let word_ok = matchers
+        .iter()
+        .filter_map(|m| {
+            if let ResponseMatcher::WordMatch { words, match_all_words, negative, .. } = m {
+                let raw = if *match_all_words {
+                    words.iter().all(|w| body.contains(w))
+                } else {
+                    words.iter().any(|w| body.contains(w))
+                };
+                Some(if *negative { !raw } else { raw })
+            } else {
+                None
+            }
+        })
+        .all(|b| b);
+
+    let status_ok = matchers
+        .iter()
+        .filter_map(|m| {
+            if let ResponseMatcher::StatusMatch {
+                status: expected,
+                match_all_status,
+                negative,
+                ..
+            } = m
+            {
+                let raw = if *match_all_status {
+                    expected.iter().all(|s| s.to_string() == status.as_str())
+                } else {
+                    expected.iter().any(|s| s.to_string() == status.as_str())
+                };
+                Some(if *negative { !raw } else { raw })
+            } else {
+                None
+            }
+        })
+        .all(|b| b);
+
+    let header_ok = matchers
+        .iter()
+        .filter_map(|m| {
+            if let ResponseMatcher::HeaderMatch { header, expected, match_all_values, .. } = m {
+                let val = headers
+                    .get(header)
+                    .and_then(|v| v.to_str().ok())
+                    .unwrap_or_default()
+                    .to_ascii_lowercase();
+                Some(if *match_all_values {
+                    expected.iter().all(|e| val.contains(&e.to_ascii_lowercase()))
+                } else {
+                    expected.iter().any(|e| val.contains(&e.to_ascii_lowercase()))
+                })
+            } else {
+                None
+            }
+        })
+        .all(|b| b);
+
+    let json_ok = matchers
+        .iter()
+        .filter_map(|m| {
+            if matches!(m, ResponseMatcher::JsonValid { .. }) {
+                Some(serde_json::from_str::<serde_json::Value>(body).is_ok())
+            } else {
+                None
+            }
+        })
+        .all(|b| b);
+
+    let xml_ok = matchers
+        .iter()
+        .filter_map(|m| {
+            if matches!(m, ResponseMatcher::XmlValid { .. }) {
+                Some(xml_from_str::<IgnoredAny>(body).is_ok())
+            } else {
+                None
+            }
+        })
+        .all(|b| b);
+
+    let html_detected = body_looks_like_html(body, headers);
+    let html_ok = html_allowed || !html_detected;
+
+    word_ok && status_ok && header_ok && json_ok && xml_ok && html_ok
+}
+
+/// Check if a URL can be resolved via DNS.
+pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::Error>> {
+    let host = url.host_str().ok_or("No host in URL")?;
+    let port = url.port().unwrap_or(if url.scheme() == "https" { 443 } else { 80 });
+    let addr = format!("{}:{}", host, port);
+    lookup_host(addr).await?.next().ok_or_else(|| "Failed to resolve URL".into()).map(|_| ())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_cache_key_includes_body() {
+        let url = Url::from_str("https://example.com/api").unwrap();
+        let headers =
+            BTreeMap::from([("Content-Type".to_string(), "application/json".to_string())]);
+
+        let key_no_body = generate_http_cache_key_parts("POST", &url, &headers, None);
+        let key_body_a =
+            generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "abc"}"#));
+        let key_body_b =
+            generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "xyz"}"#));
+
+        assert_ne!(key_no_body, key_body_a);
+        assert_ne!(key_no_body, key_body_b);
+        assert_ne!(key_body_a, key_body_b);
+    }
+
+    #[test]
+    fn test_validate_response_word_match() {
+        let matchers = vec![ResponseMatcher::WordMatch {
+            r#type: "word-match".to_string(),
+            words: vec!["test".to_string()],
+            match_all_words: true,
+            negative: false,
+        }];
+        let status = StatusCode::OK;
+        let body = "This is a test";
+        let headers = HeaderMap::new();
+        let html_allowed = false;
+
+        let result = validate_response(&matchers, body, &status, &headers, html_allowed);
+        assert!(result);
+    }
+}
diff --git a/crates/kingfisher-scanner/src/validation/mod.rs b/crates/kingfisher-scanner/src/validation/mod.rs
new file mode 100644
index 0000000..856a7b6
--- /dev/null
+++ b/crates/kingfisher-scanner/src/validation/mod.rs
@@ -0,0 +1,131 @@
+//! Credential validation module for Kingfisher.
+//!
+//! This module provides functionality for validating detected secrets by checking
+//! if they are still active/valid. Validation is gated behind the `validation` feature.
+//!
+//! # Features
+//!
+//! Enable validation features in your `Cargo.toml`:
+//!
+//! ```toml
+//! [dependencies]
+//! kingfisher-scanner = { version = "0.1", features = ["validation"] }
+//! ```
+//!
+//! # Available Validators
+//!
+//! - **HTTP**: Generic HTTP-based validation via configurable requests
+//! - **AWS**: AWS credential validation via STS (requires `validation-aws` feature)
+//! - **GCP**: GCP service account validation (requires `validation-gcp` feature)
+//! - **Azure**: Azure Storage credential validation (requires `validation-azure` feature)
+//! - **Databases**: MongoDB, MySQL, Postgres, JDBC (requires `validation-database` feature)
+//! - **JWT**: JWT token validation (requires `validation-jwt` feature)
+
+mod utils;
+mod validation_body;
+
+#[cfg(feature = "validation-http")]
+mod http_validation;
+
+#[cfg(feature = "validation-aws")]
+pub mod aws;
+
+// Re-exports
+pub use utils::{find_closest_variable, process_captures};
+pub use validation_body::{as_str, clone_as_string, from_string, ValidationResponseBody};
+
+#[cfg(feature = "validation-http")]
+pub use http_validation::{
+    build_request_builder, check_url_resolvable, generate_http_cache_key_parts, parse_http_method,
+    process_headers, retry_multipart_request, retry_request, validate_response,
+};
+
+#[cfg(feature = "validation-aws")]
+pub use aws::{
+    aws_key_to_account_number, generate_aws_cache_key, set_aws_skip_account_ids,
+    set_aws_validation_concurrency, should_skip_aws_validation, validate_aws_credentials,
+    validate_aws_credentials_input,
+};
+
+use once_cell::sync::OnceCell;
+use std::time::{Duration, Instant};
+
+/// User agent string used for HTTP validation requests.
+#[cfg(feature = "validation-http")]
+pub static GLOBAL_USER_AGENT: once_cell::sync::Lazy<String> =
+    once_cell::sync::Lazy::new(build_user_agent);
+
+#[cfg(feature = "validation-http")]
+static USER_AGENT_SUFFIX: OnceCell<String> = OnceCell::new();
+
+#[cfg(feature = "validation-http")]
+const BROWSER_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \
+         AppleWebKit/537.36 (KHTML, like Gecko) \
+         Chrome/140.0.0.0 Safari/537.36";
+
+#[cfg(feature = "validation-http")]
+fn build_user_agent() -> String {
+    let base = format!("{}/{}", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"));
+    if let Some(suffix) = USER_AGENT_SUFFIX.get() {
+        format!("{base} {suffix} {BROWSER_USER_AGENT}")
+    } else {
+        format!("{base} {BROWSER_USER_AGENT}")
+    }
+}
+
+/// Configure a user-agent suffix that is appended after the Kingfisher package name/version.
+///
+/// The suffix is inserted before the browser portion of the user-agent. Empty or whitespace-only
+/// values are ignored. This should be called once near program start prior to accessing
+/// [`GLOBAL_USER_AGENT`].
+#[cfg(feature = "validation-http")]
+pub fn set_user_agent_suffix<S: Into<String>>(suffix: Option<S>) {
+    if let Some(suffix) = suffix {
+        let trimmed = suffix.into().trim().to_string();
+        if trimmed.is_empty() {
+            return;
+        }
+        let _ = USER_AGENT_SUFFIX.set(trimmed);
+    }
+}
+
+/// Cache duration for validation results (20 minutes).
+pub const VALIDATION_CACHE_SECONDS: u64 = 1200;
+
+/// A cached validation response.
+#[derive(Clone, Debug)]
+pub struct CachedResponse {
+    /// The response body from validation.
+    pub body: ValidationResponseBody,
+    /// The HTTP status code.
+    pub status: http::StatusCode,
+    /// Whether the credential was valid.
+    pub is_valid: bool,
+    /// When this result was cached.
+    pub timestamp: Instant,
+}
+
+impl CachedResponse {
+    /// Create a new cached response.
+    pub fn new(body: ValidationResponseBody, status: http::StatusCode, is_valid: bool) -> Self {
+        Self { body, status, is_valid, timestamp: Instant::now() }
+    }
+
+    /// Check if this cached response is still valid.
+    pub fn is_still_valid(&self, cache_duration: Duration) -> bool {
+        self.timestamp.elapsed() < cache_duration
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn cached_response_expiry() {
+        let response = CachedResponse::new(from_string("test"), http::StatusCode::OK, true);
+
+        assert!(response.is_still_valid(Duration::from_secs(60)));
+        assert!(response.is_still_valid(Duration::from_secs(1)));
+    }
+}
diff --git a/crates/kingfisher-scanner/src/validation/utils.rs b/crates/kingfisher-scanner/src/validation/utils.rs
new file mode 100644
index 0000000..7756047
--- /dev/null
+++ b/crates/kingfisher-scanner/src/validation/utils.rs
@@ -0,0 +1,159 @@
+//! Utility functions for validation.
+
+use crate::finding::SerializableCaptures;
+
+/// Return (NAME, value, start, end) for the captures we care about.
+///
+/// * Named captures keep their (upper-cased) name
+/// * Among unnamed captures, keep **only the first one** and call it "TOKEN"
+pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
+    let mut saw_unnamed = false;
+
+    captures
+        .captures
+        .iter()
+        .filter_map(|cap| {
+            if let Some(name) = &cap.name {
+                Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end))
+            } else if !saw_unnamed {
+                saw_unnamed = true;
+                Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end))
+            } else {
+                // Ignore any additional unnamed captures (e.g., from unintended groups)
+                None
+            }
+        })
+        .collect()
+}
+
+/// Find the closest variable to a target value.
+///
+/// This is useful for finding related captures like an AWS access key ID
+/// that should be paired with a secret key.
+pub fn find_closest_variable(
+    captures: &[(String, String, usize, usize)],
+    target_value: &String,
+    target_variable_name: &str,
+    search_variable_name: &str,
+) -> Option<String> {
+    // Collect the positions of the target variable for the provided value so we can
+    // compare relative offsets with candidate variables.
+    let mut target_positions = Vec::new();
+    for (name, value, start, end) in captures {
+        if name == target_variable_name && value == target_value {
+            target_positions.push((*start, *end));
+        }
+    }
+
+    if target_positions.is_empty() {
+        return None;
+    }
+
+    // Prefer candidates that appear before the target value (same logical block), but
+    // fall back to overlapping values and then to those that appear after the target
+    // value when no better match exists. This avoids pairing with the next block when
+    // multiple credentials are close together in the same file.
+    let mut best_before: Option<(usize, String)> = None;
+    let mut best_overlap: Option<(usize, String)> = None;
+    let mut best_after: Option<(usize, String)> = None;
+
+    for (target_start, target_end) in target_positions.iter().copied() {
+        for (name, value, start, end) in captures {
+            if name != search_variable_name {
+                continue;
+            }
+
+            if *end <= target_start {
+                // Candidate is before the target; choose the one closest to the target start.
+                let distance = target_start - *end;
+                match &mut best_before {
+                    Some((best_distance, best_value)) if distance < *best_distance => {
+                        *best_distance = distance;
+                        *best_value = value.clone();
+                    }
+                    None => {
+                        best_before = Some((distance, value.clone()));
+                    }
+                    _ => {}
+                }
+            } else if *start >= target_end {
+                // Candidate is after the target; choose the one closest to the target end.
+                let distance = *start - target_end;
+                match &mut best_after {
+                    Some((best_distance, best_value)) if distance < *best_distance => {
+                        *best_distance = distance;
+                        *best_value = value.clone();
+                    }
+                    None => {
+                        best_after = Some((distance, value.clone()));
+                    }
+                    _ => {}
+                }
+            } else {
+                // Candidate overlaps the target – treat as an exact match.
+                let distance = 0usize;
+                match &mut best_overlap {
+                    Some((best_distance, best_value)) if distance < *best_distance => {
+                        *best_distance = distance;
+                        *best_value = value.clone();
+                    }
+                    None => {
+                        best_overlap = Some((distance, value.clone()));
+                    }
+                    _ => {}
+                }
+            }
+        }
+    }
+
+    best_before.or(best_overlap).or(best_after).map(|(_, value)| value)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::finding::{SerializableCapture, SerializableCaptures};
+    use smallvec::smallvec;
+
+    #[test]
+    fn single_unnamed_capture_is_returned() {
+        let captures = SerializableCaptures {
+            captures: smallvec![SerializableCapture {
+                name: None,
+                match_number: 0,
+                start: 1,
+                end: 4,
+                value: crate::finding::intern("abc"),
+            }],
+        };
+        let result = process_captures(&captures);
+        assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
+    }
+
+    #[test]
+    fn prefers_closest_preceding_variable() {
+        let captures = vec![
+            ("TOKEN".to_string(), "secret".to_string(), 75usize, 115usize),
+            ("AKID".to_string(), "preceding".to_string(), 30usize, 50usize),
+            ("AKID".to_string(), "following".to_string(), 180usize, 200usize),
+        ];
+
+        let result =
+            find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
+
+        assert_eq!(result, "preceding".to_string());
+    }
+
+    #[test]
+    fn falls_back_to_following_when_no_preceding() {
+        let captures = vec![
+            ("TOKEN".to_string(), "secret".to_string(), 10usize, 50usize),
+            ("AKID".to_string(), "after".to_string(), 60usize, 80usize),
+        ];
+
+        let result =
+            find_closest_variable(&captures, &"secret".to_string(), "TOKEN", "AKID").unwrap();
+
+        assert_eq!(result, "after".to_string());
+    }
+}
diff --git a/crates/kingfisher-scanner/src/validation/validation_body.rs b/crates/kingfisher-scanner/src/validation/validation_body.rs
new file mode 100644
index 0000000..1f03573
--- /dev/null
+++ b/crates/kingfisher-scanner/src/validation/validation_body.rs
@@ -0,0 +1,80 @@
+//! Storage and serialization for validation response bodies.
+
+#![allow(dead_code)] // Public API for serde attributes in downstream crates
+
+use schemars::{gen::SchemaGenerator, schema::Schema, JsonSchema};
+use serde::{Deserialize, Deserializer, Serializer};
+use std::borrow::Cow;
+
+/// Storage for validation response payloads.
+/// `None` avoids heap allocation when validation is disabled or produces no body.
+pub type ValidationResponseBody = Option<Box<str>>;
+
+/// Create a ValidationResponseBody from a string.
+#[inline]
+pub fn from_string(body: impl Into<String>) -> ValidationResponseBody {
+    let body = body.into();
+    if body.is_empty() {
+        None
+    } else {
+        Some(body.into_boxed_str())
+    }
+}
+
+/// Get the response body as a string slice.
+#[inline]
+pub fn as_str(body: &ValidationResponseBody) -> &str {
+    body.as_deref().unwrap_or("")
+}
+
+/// Clone the response body to a String.
+#[inline]
+pub fn clone_as_string(body: &ValidationResponseBody) -> String {
+    as_str(body).to_string()
+}
+
+/// Serialize a ValidationResponseBody as a string.
+pub fn serialize<S>(body: &ValidationResponseBody, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    serializer.serialize_str(as_str(body))
+}
+
+/// Deserialize a ValidationResponseBody from a string.
+pub fn deserialize<'de, D>(deserializer: D) -> Result<ValidationResponseBody, D::Error>
+where
+    D: Deserializer<'de>,
+{
+    let body: Cow<'de, str> = Deserialize::deserialize(deserializer)?;
+    Ok(from_string(body))
+}
+
+/// Generate a JSON schema for ValidationResponseBody.
+pub fn schema(gen: &mut SchemaGenerator) -> Schema {
+    String::json_schema(gen)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty_string_returns_none() {
+        let body = from_string("");
+        assert!(body.is_none());
+    }
+
+    #[test]
+    fn non_empty_string_returns_some() {
+        let body = from_string("test");
+        assert!(body.is_some());
+        assert_eq!(as_str(&body), "test");
+    }
+
+    #[test]
+    fn clone_as_string_works() {
+        let body = from_string("hello");
+        assert_eq!(clone_as_string(&body), "hello");
+    }
+}
diff --git a/docs/LIBRARY.md b/docs/LIBRARY.md
new file mode 100644
index 0000000..e8c0c30
--- /dev/null
+++ b/docs/LIBRARY.md
@@ -0,0 +1,642 @@
+# Kingfisher Library Crates
+
+Kingfisher's functionality is available as a set of Rust library crates that can be embedded into other applications. This guide covers how to use these crates for secret scanning in your own Rust projects.
+
+## Crate Overview
+
+| Crate | Description |
+|-------|-------------|
+| `kingfisher-core` | Core types: `Blob`, `BlobId`, `Location`, `Origin`, entropy calculation |
+| `kingfisher-rules` | Rule definitions, YAML parsing, compiled rule database, builtin rules |
+| `kingfisher-scanner` | High-level scanning API with `Scanner` and `Finding` types |
+
+### Optional Features
+
+The `kingfisher-scanner` crate supports optional validation features:
+
+| Feature | Description |
+|---------|-------------|
+| `validation` | Core validation support (includes HTTP validation) |
+| `validation-http` | HTTP-based validation for API tokens |
+| `validation-aws` | AWS credential validation via STS GetCallerIdentity |
+| `validation-all` | Enable all validation features |
+
+## Quick Start
+
+Add the crates to your `Cargo.toml`:
+
+```toml
+[dependencies]
+kingfisher-core = { git = "https://github.com/your-org/kingfisher" }
+kingfisher-rules = { git = "https://github.com/your-org/kingfisher" }
+kingfisher-scanner = { git = "https://github.com/your-org/kingfisher" }
+```
+
+### Basic File Scanning
+
+```rust
+use std::sync::Arc;
+use kingfisher_core::Blob;
+use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule};
+use kingfisher_scanner::Scanner;
+
+fn main() -> anyhow::Result<()> {
+    // 1. Load the builtin rules
+    let rules = get_builtin_rules(None)?;
+    
+    // 2. Convert to Rule objects and compile into a database
+    let rule_vec: Vec<Rule> = rules.iter_rules()
+        .map(|syntax| Rule::new(syntax.clone()))
+        .collect();
+    let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
+    
+    // 3. Create a scanner
+    let scanner = Scanner::new(rules_db);
+    
+    // 4. Scan a file
+    let findings = scanner.scan_file("path/to/file.txt")?;
+    
+    for finding in findings {
+        println!("Found {} at line {}", 
+            finding.rule_name, 
+            finding.location.start_line);
+    }
+    
+    Ok(())
+}
+```
+
+### Scanning In-Memory Content
+
+```rust
+use std::sync::Arc;
+use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule};
+use kingfisher_scanner::Scanner;
+
+fn scan_content(content: &[u8]) -> anyhow::Result<()> {
+    let rules = get_builtin_rules(None)?;
+    let rule_vec: Vec<Rule> = rules.iter_rules()
+        .map(|syntax| Rule::new(syntax.clone()))
+        .collect();
+    let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
+    
+    let scanner = Scanner::new(rules_db);
+    
+    // Scan bytes directly - no file I/O needed
+    let findings = scanner.scan_bytes(content);
+    
+    for finding in &findings {
+        println!("Secret: {} ({})", finding.rule_name, finding.confidence);
+    }
+    
+    Ok(())
+}
+```
+
+---
+
+## kingfisher-core
+
+Core types and utilities for working with scannable content.
+
+### Blob - Content Abstraction
+
+`Blob` represents content that can be scanned. It supports:
+- **File-backed content** with memory mapping for large files
+- **In-memory content** for programmatic use
+- **Borrowed content** for zero-copy scanning
+
+```rust
+use kingfisher_core::Blob;
+
+// From a file (memory-mapped for efficiency)
+let blob = Blob::from_file("secret.txt")?;
+
+// From owned bytes
+let blob = Blob::from_bytes(vec![0x41, 0x42, 0x43]);
+
+// Access the content
+let bytes: &[u8] = blob.bytes();
+let id: BlobId = blob.id();  // SHA-1 based identifier
+```
+
+### BlobId - Content Identity
+
+`BlobId` provides a unique identifier for content, computed using a SHA-1 hash (compatible with Git's blob IDs):
+
+```rust
+use kingfisher_core::BlobId;
+
+let id = BlobId::new(b"hello world");
+println!("Blob ID: {}", id.hex());  // 40-character hex string
+
+// Parse from hex
+let id = BlobId::from_hex("2aae6c35c94fcfb415dbe95f408b9ce91ee846ed")?;
+```
+
+### Location - Source Positions
+
+Track positions within scanned content:
+
+```rust
+use kingfisher_core::{LocationMapping, SourceSpan};
+
+let content = b"line1\nline2\nline3";
+let mapping = LocationMapping::new(content);
+
+// Convert byte offset to line/column
+let point = mapping.get_source_point(7);  // Returns (line: 2, column: 2)
+
+// Get a span
+let span = mapping.get_source_span(6..11);  // "line2"
+```
+
+### Entropy Calculation
+
+Calculate Shannon entropy to filter high-randomness content:
+
+```rust
+use kingfisher_core::calculate_shannon_entropy;
+
+let entropy = calculate_shannon_entropy(b"AKIAIOSFODNN7EXAMPLE");
+println!("Entropy: {:.2} bits", entropy);  // ~4.0 for random-looking strings
+```
+
+### Origin - Provenance Tracking
+
+Track where content came from:
+
+```rust
+use kingfisher_core::{Origin, FileOrigin, GitRepoOrigin};
+use std::path::PathBuf;
+
+// File origin
+let origin = Origin::File(FileOrigin {
+    path: PathBuf::from("/path/to/file.txt"),
+});
+
+// Git repository origin
+let origin = Origin::GitRepo(GitRepoOrigin {
+    repo_path: PathBuf::from("/path/to/repo"),
+    remote_url: Some("https://github.com/org/repo".into()),
+});
+```
+
+---
+
+## kingfisher-rules
+
+Rule definitions, YAML parsing, and the compiled rule database.
+
+### Loading Builtin Rules
+
+Kingfisher comes with 400+ builtin rules for common secret types:
+
+```rust
+use kingfisher_rules::{get_builtin_rules, Confidence};
+
+// Load all rules with Medium confidence or higher (default)
+let rules = get_builtin_rules(None)?;
+
+// Load only High confidence rules
+let rules = get_builtin_rules(Some(Confidence::High))?;
+
+println!("Loaded {} rules", rules.num_rules());
+```
+
+### Loading Custom Rules
+
+Load rules from YAML files or directories:
+
+```rust
+use kingfisher_rules::{Rules, Confidence};
+
+// From a single file
+let rules = Rules::from_paths(&["my-rules.yml"], Confidence::Medium)?;
+
+// From a directory (recursively finds .yml files)
+let rules = Rules::from_paths(&["rules/"], Confidence::Medium)?;
+
+// Merge multiple sources
+let mut rules = Rules::new();
+rules.update(Rules::from_paths(&["builtin/"], Confidence::Medium)?);
+rules.update(Rules::from_paths(&["custom/"], Confidence::Medium)?);
+```
+
+### Rule Syntax YAML Format
+
+```yaml
+rules:
+  - name: My Custom API Key
+    id: custom.myapi.1
+    pattern: |
+      (?i)
+      myapi[_-]?key\s*[:=]\s*
+      ["']?([A-Za-z0-9]{32})["']?
+    min_entropy: 3.5
+    confidence: high
+    examples:
+      - 'MYAPI_KEY=abc123def456ghi789jkl012mno345pq'
+    validation:
+      type: Http
+      content:
+        request:
+          method: GET
+          url: https://api.example.com/validate
+          headers:
+            Authorization: Bearer {{ TOKEN }}
+          response_matcher:
+            - type: StatusMatch
+              status: [200]
+```
+
+### Compiling Rules
+
+The `RulesDatabase` compiles rules for efficient multi-pattern matching:
+
+```rust
+use std::sync::Arc;
+use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule};
+
+let rules = get_builtin_rules(None)?;
+
+// Convert RuleSyntax to Rule objects
+let rule_vec: Vec<Rule> = rules.iter_rules()
+    .map(|syntax| Rule::new(syntax.clone()))
+    .collect();
+
+// Compile into a database (uses Vectorscan for fast matching)
+let db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
+
+// Access compiled rules
+println!("Compiled {} rules", db.num_rules());
+
+// Look up rules by ID
+if let Some(rule) = db.get_rule_by_text_id("kingfisher.aws.1") {
+    println!("Found rule: {}", rule.name());
+}
+```
+
+### Confidence Levels
+
+Rules have confidence levels indicating detection accuracy:
+
+```rust
+use kingfisher_rules::Confidence;
+
+// Available levels (in order)
+// Confidence::Low    - May have false positives
+// Confidence::Medium - Balanced (default)
+// Confidence::High   - High accuracy
+
+let conf = Confidence::High;
+if conf.is_at_least(&Confidence::Medium) {
+    println!("Confidence is medium or higher");
+}
+```
+
+### Liquid Filters for Validation
+
+The crate includes Liquid template filters for HTTP validation:
+
+```rust
+use kingfisher_rules::register_liquid_filters;
+use liquid::ParserBuilder;
+
+let parser = register_liquid_filters(ParserBuilder::with_stdlib())
+    .build()?;
+
+let template = parser.parse("{{ secret | sha256 }}")?;
+```
+
+Available filters:
+- **Encoding**: `b64enc`, `b64dec`, `b64url_enc`, `url_encode`, `json_escape`
+- **Hashing**: `sha256`, `crc32`, `crc32_dec`, `crc32_hex`
+- **HMAC**: `hmac_sha256`, `hmac_sha384`, `hmac_sha1`
+- **Encoding**: `base62`, `base36`
+- **Strings**: `prefix`, `suffix`, `replace`, `lstrip_chars`, `random_string`
+- **Time**: `unix_timestamp`, `iso_timestamp`, `iso_timestamp_no_frac`
+- **Other**: `uuid`, `jwt_header`
+
+---
+
+## kingfisher-scanner
+
+High-level scanning API that combines core types and rules.
+
+### Scanner Configuration
+
+```rust
+use std::sync::Arc;
+use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule};
+use kingfisher_scanner::{Scanner, ScannerConfig};
+
+let rules = get_builtin_rules(None)?;
+let rule_vec: Vec<Rule> = rules.iter_rules()
+    .map(|syntax| Rule::new(syntax.clone()))
+    .collect();
+let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
+
+// Default configuration
+let scanner = Scanner::new(Arc::clone(&rules_db));
+
+// Custom configuration
+let config = ScannerConfig {
+    enable_base64_decoding: true,   // Decode and scan base64 content
+    enable_dedup: true,             // Skip duplicate blobs
+    min_entropy_override: Some(3.0), // Override minimum entropy
+    redact_secrets: false,          // Don't redact in findings
+    max_base64_depth: 2,            // Max nested base64 decoding
+};
+let scanner = Scanner::with_config(Arc::clone(&rules_db), config);
+```
+
+### Scanning Methods
+
+```rust
+// Scan raw bytes
+let findings = scanner.scan_bytes(b"AWS_SECRET_KEY=AKIAIOSFODNN7EXAMPLE");
+
+// Scan a file
+let findings = scanner.scan_file("config.yml")?;
+
+// Scan a Blob
+use kingfisher_core::Blob;
+let blob = Blob::from_file("secrets.env")?;
+let findings = scanner.scan_blob(&blob)?;
+```
+
+### Working with Findings
+
+```rust
+use kingfisher_scanner::Finding;
+
+for finding in findings {
+    println!("Rule: {} ({})", finding.rule_name, finding.rule_id);
+    println!("Secret: {}", finding.secret);
+    println!("Location: line {} col {} - line {} col {}",
+        finding.location.start_line,
+        finding.location.start_column,
+        finding.location.end_line,
+        finding.location.end_column);
+    println!("Entropy: {:.2}", finding.entropy);
+    println!("Confidence: {:?}", finding.confidence);
+    println!("Fingerprint: {}", finding.fingerprint);
+    
+    // Named captures from the regex
+    for capture in &finding.captures {
+        println!("  {}: {}", capture.name, capture.value);
+    }
+}
+```
+
+### Parallel Scanning
+
+The scanner is thread-safe and uses a thread-local scanner pool:
+
+```rust
+use std::sync::Arc;
+use rayon::prelude::*;
+
+let scanner = Arc::new(Scanner::new(rules_db));
+
+let files = vec!["file1.txt", "file2.txt", "file3.txt"];
+
+let all_findings: Vec<_> = files.par_iter()
+    .flat_map(|file| {
+        scanner.scan_file(file).unwrap_or_default()
+    })
+    .collect();
+```
+
+---
+
+## Complete Example
+
+Here's a complete example that scans a directory for secrets:
+
+```rust
+use std::sync::Arc;
+use std::path::Path;
+use walkdir::WalkDir;
+use kingfisher_core::Blob;
+use kingfisher_rules::{get_builtin_rules, RulesDatabase, Rule, Confidence};
+use kingfisher_scanner::{Scanner, ScannerConfig};
+
+fn main() -> anyhow::Result<()> {
+    // Load high-confidence rules only
+    let rules = get_builtin_rules(Some(Confidence::High))?;
+    println!("Loaded {} high-confidence rules", rules.num_rules());
+    
+    // Compile rules
+    let rule_vec: Vec<Rule> = rules.iter_rules()
+        .map(|syntax| Rule::new(syntax.clone()))
+        .collect();
+    let rules_db = Arc::new(RulesDatabase::from_rules(rule_vec)?);
+    
+    // Configure scanner
+    let config = ScannerConfig {
+        enable_base64_decoding: true,
+        enable_dedup: true,
+        redact_secrets: true,  // Redact secrets in output
+        ..Default::default()
+    };
+    let scanner = Scanner::with_config(rules_db, config);
+    
+    // Scan directory
+    let dir = Path::new("./src");
+    let mut total_findings = 0;
+    
+    for entry in WalkDir::new(dir)
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| e.file_type().is_file())
+    {
+        let path = entry.path();
+        
+        match scanner.scan_file(path) {
+            Ok(findings) if !findings.is_empty() => {
+                println!("\n{}", path.display());
+                for finding in &findings {
+                    println!("  [{}] {} at line {}",
+                        finding.rule_id,
+                        finding.rule_name,
+                        finding.location.start_line);
+                }
+                total_findings += findings.len();
+            }
+            Err(e) => eprintln!("Error scanning {}: {}", path.display(), e),
+            _ => {}
+        }
+    }
+    
+    println!("\nTotal findings: {}", total_findings);
+    Ok(())
+}
+```
+
+---
+
+## Credential Validation (Optional)
+
+The `kingfisher-scanner` crate includes optional credential validation support. This allows you to check if detected secrets are still active/valid.
+
+### Enabling Validation
+
+Add the validation feature to your `Cargo.toml`:
+
+```toml
+[dependencies]
+kingfisher-scanner = { git = "https://github.com/mongodb/kingfisher", features = ["validation"] }
+```
+
+### Available Features
+
+| Feature | Description |
+|---------|-------------|
+| `validation` | Core validation support with HTTP validation |
+| `validation-http` | HTTP-based validation for API tokens |
+| `validation-aws` | AWS credential validation via STS |
+| `validation-all` | Enable all validation features |
+
+### HTTP Validation Example
+
+```rust
+use kingfisher_scanner::validation::{
+    build_request_builder, validate_response, CachedResponse,
+    from_string, GLOBAL_USER_AGENT,
+};
+use kingfisher_rules::ResponseMatcher;
+use reqwest::Client;
+use std::collections::BTreeMap;
+use std::time::Duration;
+
+async fn validate_api_token(token: &str) -> bool {
+    let client = Client::builder()
+        .timeout(Duration::from_secs(10))
+        .build()
+        .unwrap();
+    
+    let parser = liquid::ParserBuilder::with_stdlib().build().unwrap();
+    let mut globals = liquid::Object::new();
+    globals.insert("TOKEN".into(), liquid_core::Value::scalar(token.to_string()));
+    
+    let url = reqwest::Url::parse("https://api.example.com/validate").unwrap();
+    let mut headers = BTreeMap::new();
+    headers.insert("Authorization".to_string(), "Bearer {{ TOKEN }}".to_string());
+    
+    let request = build_request_builder(
+        &client,
+        "GET",
+        &url,
+        &headers,
+        &None,
+        Duration::from_secs(10),
+        &parser,
+        &globals,
+    ).unwrap();
+    
+    match request.send().await {
+        Ok(resp) => {
+            let status = resp.status();
+            let body = resp.text().await.unwrap_or_default();
+            
+            // Define matchers for valid response
+            let matchers = vec![
+                ResponseMatcher::StatusMatch {
+                    r#type: "status-match".to_string(),
+                    status: vec![200],
+                    match_all_status: false,
+                    negative: false,
+                },
+            ];
+            
+            validate_response(&matchers, &body, &status, resp.headers(), false)
+        }
+        Err(_) => false,
+    }
+}
+```
+
+### AWS Credential Validation
+
+Enable the `validation-aws` feature to validate AWS credentials:
+
+```toml
+[dependencies]
+kingfisher-scanner = { git = "https://github.com/mongodb/kingfisher", features = ["validation-aws"] }
+```
+
+```rust
+use kingfisher_scanner::validation::{
+    validate_aws_credentials, validate_aws_credentials_input,
+    aws_key_to_account_number, set_aws_skip_account_ids,
+};
+
+async fn check_aws_key(access_key_id: &str, secret_key: &str) {
+    // Validate format first
+    if let Err(e) = validate_aws_credentials_input(access_key_id, secret_key) {
+        println!("Invalid format: {}", e);
+        return;
+    }
+    
+    // Extract account number from the key
+    if let Ok(account) = aws_key_to_account_number(access_key_id) {
+        println!("AWS Account: {}", account);
+    }
+    
+    // Validate credentials via STS
+    match validate_aws_credentials(access_key_id, secret_key).await {
+        Ok((true, arn)) => println!("Valid! ARN: {}", arn),
+        Ok((false, msg)) => println!("Invalid: {}", msg),
+        Err(e) => println!("Error: {}", e),
+    }
+}
+
+// Skip validation for known canary/honeypot accounts
+fn setup_skip_list() {
+    set_aws_skip_account_ids(vec![
+        "111122223333",  // Test account
+        "444455556666",  // Canary account
+    ]);
+}
+```
+
+### Validation Response Types
+
+```rust
+use kingfisher_scanner::validation::{
+    CachedResponse, ValidationResponseBody,
+    from_string, as_str, VALIDATION_CACHE_SECONDS,
+};
+use http::StatusCode;
+use std::time::Duration;
+
+// Create a validation response body
+let body = from_string("Credential is valid");
+
+// Create a cached response
+let cached = CachedResponse::new(
+    body,
+    StatusCode::OK,
+    true,  // is_valid
+);
+
+// Check if cache is still fresh
+let cache_duration = Duration::from_secs(VALIDATION_CACHE_SECONDS);
+if cached.is_still_valid(cache_duration) {
+    println!("Using cached result: valid={}", cached.is_valid);
+}
+```
+
+---
+
+## API Stability
+
+These crates are currently internal to Kingfisher. The API may change between versions. For stable integration, pin to a specific git commit or tag.
+
+## See Also
+
+- [Main README](../README.md) - CLI usage and installation
+- [Rule Format](FINGERPRINT.md) - Rule definition details
+- [Changelog](../CHANGELOG.md) - Version history
diff --git a/src/blob.rs b/src/blob.rs
index f1cbe0e..bfa9581 100644
--- a/src/blob.rs
+++ b/src/blob.rs
@@ -1,417 +1,7 @@
-use std::{
-    convert::TryInto,
-    fs::File,
-    io::{Read, Write},
-    path::Path,
-    sync::Arc,
+//! Blob types for representing scannable content.
+//!
+//! This module re-exports types from [`kingfisher_core::blob`].
+
+pub use kingfisher_core::blob::{
+    Blob, BlobAppearance, BlobAppearanceSet, BlobData, BlobId, BlobIdMap, BlobMetadata,
 };
-
-use anyhow::Result;
-use bstr::{BString, ByteSlice};
-use gix::ObjectId;
-use hex;
-use once_cell::sync::OnceCell;
-use parking_lot::Mutex;
-use rustc_hash::FxHashMap;
-use serde::{Deserialize, Serialize};
-use sha1::{Digest, Sha1};
-use smallvec::SmallVec;
-use std::sync::atomic::{AtomicU64, Ordering};
-
-use crate::git_commit_metadata::CommitMetadata;
-// const LARGE_FILE_THRESHOLD: u64 = 512 * 1024; // 512 KB
-const LARGE_FILE_THRESHOLD: u64 = 0; // always mmap
-
-static NEXT_ID: AtomicU64 = AtomicU64::new(1);
-
-/// The data of a blob, either owned (small files) or memory mapped (large files).
-pub enum BlobData<'a> {
-    /// Small blobs – remains as-is.
-    Owned(Vec<u8>),
-
-    /// Large blobs read from disk with `memmap2`.
-    Mapped(memmap2::Mmap),
-
-    /// Bytes that already live inside gix’s pack-file mmap;
-    /// we only keep a pointer and length.
-    Borrowed(&'a [u8]),
-}
-
-impl<'a> AsRef<[u8]> for BlobData<'a> {
-    fn as_ref(&self) -> &[u8] {
-        match self {
-            BlobData::Owned(v) => v,
-            BlobData::Mapped(m) => m,
-            BlobData::Borrowed(slice) => slice,
-        }
-    }
-}
-
-impl<'a> BlobData<'a> {
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.as_ref().len()
-    }
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.as_ref().is_empty()
-    }
-}
-/// Where was a particular blob seen?
-#[derive(Clone, Debug, serde::Serialize)]
-pub struct BlobAppearance {
-    pub commit_metadata: Arc<CommitMetadata>,
-
-    /// The path given to the blob
-    pub path: BString,
-}
-impl BlobAppearance {
-    #[inline]
-    pub fn path(&self) -> Result<&Path, bstr::Utf8Error> {
-        self.path.to_path()
-    }
-}
-/// A set of `BlobAppearance` entries
-pub type BlobAppearanceSet = SmallVec<[BlobAppearance; 1]>;
-// -------------------------------------------------------------------------------------------------
-// Blob
-// -------------------------------------------------------------------------------------------------
-/// A Git blob, storing its SHA-1 id and its contents.
-
-pub struct Blob<'a> {
-    id: OnceCell<BlobId>,
-    data: BlobData<'a>,
-    temp_id: u64,
-}
-
-impl Blob<'_> {
-    #[inline]
-    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
-        let mut file = File::open(&path)?;
-        let file_size = file.metadata()?.len();
-        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
-
-        if file_size > LARGE_FILE_THRESHOLD {
-            // Large files: one mmap, zero extra copies.
-            let mmap = unsafe { memmap2::Mmap::map(&file)? };
-            Ok(Blob { id: OnceCell::new(), data: BlobData::Mapped(mmap), temp_id })
-        } else {
-            // Small files: reuse the same handle and pre-allocate exact capacity
-            let mut bytes = Vec::with_capacity(file_size as usize);
-            file.read_to_end(&mut bytes)?;
-            Ok(Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id })
-        }
-    }
-
-    /// Returns the blob's bytes as a slice.
-    #[inline]
-    pub fn bytes(&self) -> &[u8] {
-        self.data.as_ref()
-    }
-
-    /// Lazily compute and return the blob's SHA-1 `BlobId`.
-    #[inline]
-    pub fn id(&self) -> BlobId {
-        *self.id.get_or_init(|| BlobId::new(self.bytes()))
-    }
-
-    /// Get a reference to the blob's SHA-1 `BlobId`, computing it if necessary.
-    #[inline]
-    pub fn id_ref(&self) -> &BlobId {
-        self.id.get_or_init(|| BlobId::new(self.bytes()))
-    }
-
-    /// Return the temporary identifier assigned on blob creation.
-    #[inline]
-    pub fn temp_id(&self) -> u64 {
-        self.temp_id
-    }
-
-    /// Create a new `Blob` from a vector of bytes.
-    #[inline]
-    pub fn from_bytes(bytes: Vec<u8>) -> Self {
-        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
-        Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id }
-    }
-
-    /// Create a new `Blob` with the given id and data.
-    #[inline]
-    pub fn new(id: BlobId, bytes: Vec<u8>) -> Self {
-        let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
-        let cell = OnceCell::new();
-        let _ = cell.set(id);
-        Blob { id: cell, data: BlobData::Owned(bytes), temp_id }
-    }
-
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.bytes().len()
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.bytes().is_empty()
-    }
-}
-
-impl Drop for Blob<'_> {
-    fn drop(&mut self) {
-        // For owned data, clear the Vec. For memory-mapped data, the mmap will be unmapped
-        // automatically.
-        if let BlobData::Owned(ref mut v) = self.data {
-            v.clear();
-            v.shrink_to_fit();
-        }
-    }
-}
-/// A finite map with `BlobId` values as keys, designed for concurrent
-/// modification.
-///
-/// This implementation imposes an equivalence relation on blob IDs, assigning
-/// each to one of 256 classes (based on its first byte). Each class is
-/// represented by a standard `HashMap` protected by a `Mutex`. Since blob IDs
-/// are SHA-1 digests, and hence effectively random, the odds that two
-/// random blob IDs appear in the same class is 1/256.
-///
-/// We can model this as a generalized birthday problem. With 256
-/// mutex-protected hash maps, (i.e., "days in the year" or "possible
-/// birthdays"), you would need 20 threads (i.e., "people") accessing the set
-/// simultaneously to exceed 50% probability of 2 threads contending.
-///
-/// Or in other words, there should be relatively little contention on that
-/// global data structure even when using lots of threads.
-pub struct BlobIdMap<V> {
-    maps: [Mutex<FxHashMap<ObjectId, V>>; 256],
-}
-
-impl<V> BlobIdMap<V> {
-    pub fn new() -> Self {
-        BlobIdMap { maps: std::array::from_fn(|_| Mutex::new(FxHashMap::default())) }
-    }
-
-    /// Add the given `BlobId` to the map.
-    ///
-    /// Returns the old value mapped to the `BlobId`, if any.
-    #[inline]
-    pub fn insert(&self, blob_id: BlobId, v: V) -> Option<V> {
-        let idx = blob_id.as_bytes()[0] as usize;
-        self.maps[idx].lock().insert(blob_id.into(), v)
-    }
-
-    /// Check if the given `BlobId` is in the map without modifying it.
-    #[inline]
-    pub fn contains_key(&self, blob_id: &BlobId) -> bool {
-        let idx = blob_id.as_bytes()[0] as usize;
-        self.maps[idx].lock().contains_key(&ObjectId::from(blob_id))
-    }
-
-    /// Return the total number of blob IDs contained in the map.
-    ///
-    /// Note: this is not a cheap operation.
-    pub fn len(&self) -> usize {
-        self.maps.iter().map(|m| m.lock().len()).sum()
-    }
-
-    /// Is the map empty?
-    ///
-    /// Note: this is not a cheap operation.
-    pub fn is_empty(&self) -> bool {
-        self.maps.iter().all(|m| m.lock().is_empty())
-    }
-}
-impl<V: Copy> BlobIdMap<V> {
-    /// Get the value mapped to the given `BlobId`.
-    #[inline]
-    pub fn get(&self, blob_id: &BlobId) -> Option<V>
-    where
-        V: Copy,
-    {
-        let idx = blob_id.as_bytes()[0] as usize;
-        self.maps[idx].lock().get(&ObjectId::from(blob_id)).copied()
-    }
-}
-impl<V> Default for BlobIdMap<V> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-// -------------------------------------------------------------------------------------------------
-// BlobId
-// -------------------------------------------------------------------------------------------------
-#[derive(PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone, Serialize)]
-#[serde(into = "String")]
-pub struct BlobId([u8; 20]);
-impl BlobId {
-    // Create a new method to get a default (zero-filled) BlobId
-    pub fn default() -> Self {
-        BlobId([0; 20])
-    }
-
-    pub fn compute_from_bytes(bytes: &[u8]) -> Self {
-        let mut hasher = Sha1::new();
-        write!(&mut hasher, "blob {}\0", bytes.len()).unwrap();
-        hasher.update(bytes);
-        let digest: [u8; 20] = hasher.finalize().into();
-        BlobId(digest)
-    }
-}
-impl<'de> Deserialize<'de> for BlobId {
-    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
-        struct Vis;
-        impl serde::de::Visitor<'_> for Vis {
-            type Value = BlobId;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                formatter.write_str("a string")
-            }
-
-            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
-                BlobId::from_hex(v).map_err(|e| serde::de::Error::custom(e))
-            }
-        }
-        d.deserialize_str(Vis)
-    }
-}
-impl std::fmt::Debug for BlobId {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "BlobId({})", self.hex())
-    }
-}
-impl schemars::JsonSchema for BlobId {
-    fn schema_name() -> String {
-        "BlobId".into()
-    }
-
-    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
-        let s = String::json_schema(gen);
-        let mut o = s.into_object();
-        o.string().pattern = Some("[0-9a-f]{40}".into());
-        let md = o.metadata();
-        md.description = Some("A hex-encoded blob ID as computed by Git".into());
-        schemars::schema::Schema::Object(o)
-    }
-}
-impl BlobId {
-    /// Create a new BlobId computed from the given input.
-    #[inline]
-    pub fn new(input: &[u8]) -> Self {
-        const CHUNK: usize = 64 * 1024; // 64KB from start and end
-        let mut hasher = Sha1::new();
-        write!(&mut hasher, "blob {}\0", input.len()).unwrap();
-        if input.len() <= CHUNK * 2 {
-            hasher.update(input);
-        } else {
-            hasher.update(&input[..CHUNK]);
-            hasher.update(&input[input.len() - CHUNK..]);
-        }
-        let digest: [u8; 20] = hasher.finalize().into();
-        BlobId(digest)
-    }
-
-    #[inline]
-    pub fn from_hex(v: &str) -> Result<Self> {
-        Ok(BlobId(hex::decode(v)?.as_slice().try_into()?))
-    }
-
-    #[inline]
-    pub fn hex(&self) -> String {
-        hex::encode(self.0)
-    }
-
-    #[inline]
-    pub fn as_bytes(&self) -> &[u8] {
-        &self.0
-    }
-}
-impl From<BlobId> for String {
-    #[inline]
-    fn from(blob_id: BlobId) -> String {
-        blob_id.hex()
-    }
-}
-impl TryFrom<&str> for BlobId {
-    type Error = anyhow::Error;
-
-    #[inline]
-    fn try_from(s: &str) -> Result<Self, Self::Error> {
-        BlobId::from_hex(s)
-    }
-}
-impl std::fmt::Display for BlobId {
-    #[inline]
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.hex())
-    }
-}
-impl<'a> From<&'a gix::ObjectId> for BlobId {
-    #[inline]
-    fn from(id: &'a gix::ObjectId) -> Self {
-        BlobId(id.as_bytes().try_into().expect("oid should be a 20-byte value"))
-    }
-}
-impl From<gix::ObjectId> for BlobId {
-    #[inline]
-    fn from(id: gix::ObjectId) -> Self {
-        BlobId(id.as_bytes().try_into().expect("oid should be a 20-byte value"))
-    }
-}
-impl<'a> From<&'a BlobId> for gix::ObjectId {
-    #[inline]
-    fn from(blob_id: &'a BlobId) -> Self {
-        gix::hash::ObjectId::try_from(blob_id.as_bytes()).unwrap()
-    }
-}
-impl From<BlobId> for gix::ObjectId {
-    #[inline]
-    fn from(blob_id: BlobId) -> Self {
-        gix::hash::ObjectId::try_from(blob_id.as_bytes()).unwrap()
-    }
-}
-// -------------------------------------------------------------------------------------------------
-// test
-// -------------------------------------------------------------------------------------------------
-#[cfg(test)]
-mod test {
-    use pretty_assertions::assert_eq;
-
-    use super::*;
-    #[test]
-    fn simple() {
-        assert_eq!(BlobId::new(&vec![0; 0]).hex(), "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391");
-        assert_eq!(BlobId::new(&vec![0; 1024]).hex(), "06d7405020018ddf3cacee90fd4af10487da3d20");
-    }
-}
-/// Metadata about a blob
-#[derive(Debug, Clone, PartialEq, Eq, Hash, serde::Serialize, schemars::JsonSchema)]
-pub struct BlobMetadata {
-    /// The blob ID this metadata applies to
-    pub id: BlobId,
-
-    /// The length in bytes of the blob
-    pub num_bytes: usize,
-
-    /// The guessed multimedia type of the blob
-    pub mime_essence: Option<String>,
-
-    /// The guessed programming language of the blob
-    pub language: Option<String>,
-}
-impl BlobMetadata {
-    /// Get the length of the blob in bytes.
-    #[inline]
-    pub fn num_bytes(&self) -> usize {
-        self.num_bytes
-    }
-
-    /// Get the size of the blob in megabytes, rounded to 3 significant digits.
-    #[inline]
-    pub fn num_megabytes(&self) -> f64 {
-        let mb = self.num_bytes as f64 / 1_048_576.0;
-        format!("{:.3}", mb).parse::<f64>().unwrap_or(mb)
-    }
-
-    #[inline]
-    pub fn mime_essence(&self) -> Option<&str> {
-        self.mime_essence.as_deref()
-    }
-}
diff --git a/src/bstring_escape.rs b/src/bstring_escape.rs
index 8b0fa4d..edeb8b1 100644
--- a/src/bstring_escape.rs
+++ b/src/bstring_escape.rs
@@ -1,137 +1,5 @@
-use std::{
-    borrow::Cow,
-    fmt::{Display, Formatter},
-};
+//! Safe string escaping utilities.
+//!
+//! This module re-exports from [`kingfisher_core::bstring_escape`].
 
-use console::strip_ansi_codes;
-/// Escapes non-printing characters in a string while preserving whitespace.
-/// Returns borrowed data if no escaping was needed, avoiding allocations.
-fn escape_nonprinting(s: &str) -> Cow<'_, str> {
-    // Fast path - return original if no control chars (except whitespace)
-    if s.chars().all(|ch| !ch.is_control() || ch.is_whitespace()) {
-        return Cow::Borrowed(s);
-    }
-    // Allocate with extra capacity for possible escape sequences
-    let mut escaped = String::with_capacity(s.len() * 2);
-    let mut chars = s.chars().peekable();
-    while let Some(ch) = chars.next() {
-        match ch {
-            // Handle ANSI escape sequences
-            '\x1B' => continue,
-            // Escape non-whitespace control characters
-            ch if ch.is_control() && !ch.is_whitespace() => {
-                use std::fmt::Write;
-                write!(escaped, "{}", ch.escape_unicode()).expect("string writing must succeed");
-            }
-            // Pass through all other characters unchanged
-            ch => escaped.push(ch),
-        }
-    }
-    Cow::Owned(escaped)
-}
-/// A newtype around `&[u8]` that provides safe string formatting by:
-/// 1. Converting from UTF-8 with replacement of invalid sequences
-/// 2. Removing ANSI control sequences
-/// 3. Escaping remaining control characters (except whitespace)
-#[derive(Debug, Clone, Copy)]
-pub struct Escaped<'a>(pub &'a [u8]);
-impl Display for Escaped<'_> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        // First handle UTF-8 decoding with replacement characters
-        let decoded = String::from_utf8_lossy(self.0);
-        // Then strip ANSI sequences and escape control chars
-        let stripped = strip_ansi_codes(&decoded);
-        let escaped = escape_nonprinting(&stripped);
-        f.write_str(&escaped)
-    }
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn test_escape_nonprinting_normal_text() {
-        let input = "Hello, World!";
-        let result = escape_nonprinting(input);
-        assert!(matches!(result, Cow::Borrowed(_)), "Normal text should use borrowed data");
-        assert_eq!(result, "Hello, World!");
-    }
-    #[test]
-    fn test_escape_nonprinting_with_whitespace() {
-        let input = "Hello\n\t World!";
-        let result = escape_nonprinting(input);
-        assert!(
-            matches!(result, Cow::Borrowed(_)),
-            "Text with whitespace should use borrowed data"
-        );
-        assert_eq!(result, "Hello\n\t World!");
-    }
-    #[test]
-    fn test_escape_nonprinting_control_chars() {
-        let input = "Hello\x00World\x01";
-        let result = escape_nonprinting(input);
-        assert!(matches!(result, Cow::Owned(_)), "Text with control chars should be escaped");
-        assert_eq!(result, "Hello\\u{0}World\\u{1}");
-    }
-    #[test]
-    fn test_escape_nonprinting_mixed_content() {
-        let input = "Test\x00\n\x01\tEnd";
-        let result = escape_nonprinting(input);
-        assert!(matches!(result, Cow::Owned(_)));
-        assert_eq!(result, "Test\\u{0}\n\\u{1}\tEnd");
-    }
-    #[test]
-    fn test_escaped_struct_simple() {
-        let bytes = b"Hello World";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "Hello World");
-    }
-    #[test]
-    fn test_escaped_struct_ansi_codes() {
-        let bytes = b"\x1b[31mRed\x1b[0m \x1b[32mGreen\x1b[0m";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "Red Green");
-    }
-    #[test]
-    fn test_escaped_struct_invalid_utf8() {
-        let bytes = b"Hello\xFF\xFEWorld";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "Hello\u{FFFD}\u{FFFD}World");
-    }
-    #[test]
-    fn test_escaped_struct_control_chars_and_ansi() {
-        let bytes = b"\x1b[31mHello\x00World\x1b[0m";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "Hello\\u{0}World");
-    }
-    #[test]
-    fn test_escaped_struct_empty() {
-        let bytes = b"";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "");
-    }
-    #[test]
-    fn test_escaped_struct_all_whitespace() {
-        let bytes = b"\n\t \r\n";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "\n\t \r\n");
-    }
-    #[test]
-    fn test_escaped_struct_complex_mix() {
-        let bytes = b"\x1b[1mBold\x00\xFF\n\tText\x1b[0m";
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "Bold\\u{0}\u{FFFD}\n\tText");
-    }
-    #[test]
-    fn test_escape_nonprinting_emoji() {
-        let input = "Hello 👋 World!";
-        let result = escape_nonprinting(input);
-        assert!(matches!(result, Cow::Borrowed(_)));
-        assert_eq!(result, "Hello 👋 World!");
-    }
-    #[test]
-    fn test_escaped_struct_multibyte_chars() {
-        let bytes = "Hello 世界!".as_bytes();
-        let escaped = Escaped(bytes);
-        assert_eq!(escaped.to_string(), "Hello 世界!");
-    }
-}
+pub use kingfisher_core::bstring_escape::Escaped;
diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs
index 2281a0d..17f187c 100644
--- a/src/cli/commands/mod.rs
+++ b/src/cli/commands/mod.rs
@@ -9,4 +9,5 @@ pub mod inputs;
 pub mod output;
 pub mod rules;
 pub mod scan;
+pub mod validate;
 pub mod view;
diff --git a/src/cli/commands/validate.rs b/src/cli/commands/validate.rs
new file mode 100644
index 0000000..e8c1bbf
--- /dev/null
+++ b/src/cli/commands/validate.rs
@@ -0,0 +1,55 @@
+use clap::{Args, ValueHint};
+use std::path::PathBuf;
+
+/// Directly validate a known secret against a rule's validator
+#[derive(Args, Debug, Clone)]
+pub struct ValidateArgs {
+    /// Rule ID or prefix to use for validation (e.g., kingfisher.opsgenie.1 or kingfisher.opsgenie)
+    #[arg(long, required = true)]
+    pub rule: String,
+
+    /// The secret value to validate (use '-' to read from stdin)
+    #[arg(value_name = "SECRET")]
+    pub secret: Option<String>,
+
+    /// Additional values for validation, auto-assigned to template variables.
+    /// Values are assigned to non-TOKEN variables in alphabetical order.
+    /// Example: --arg AKIAEXAMPLE assigns to the first required variable.
+    #[arg(long = "arg", value_name = "VALUE")]
+    pub args: Vec<String>,
+
+    /// Named variables for validation template (e.g., --var AKID=xxx).
+    /// Use when you know the exact variable name. Overrides --arg assignments.
+    #[arg(long = "var", value_name = "NAME=VALUE")]
+    pub variables: Vec<String>,
+
+    /// Timeout for validation requests in seconds (1-60)
+    #[arg(
+        long = "timeout",
+        default_value_t = 10,
+        value_name = "SECONDS",
+        value_parser = clap::value_parser!(u64).range(1..=60)
+    )]
+    pub timeout: u64,
+
+    /// Number of retries for validation requests (0-5)
+    #[arg(
+        long = "retries",
+        default_value_t = 1,
+        value_name = "N",
+        value_parser = clap::value_parser!(u32).range(0..=5)
+    )]
+    pub retries: u32,
+
+    /// Path to custom rules file or directory
+    #[arg(long = "rules-path", value_hint = ValueHint::AnyPath)]
+    pub rules_path: Vec<PathBuf>,
+
+    /// Skip loading builtin rules (use only custom rules from --rules-path)
+    #[arg(long = "no-builtins", default_value_t = false)]
+    pub no_builtins: bool,
+
+    /// Output format: text or json
+    #[arg(long, default_value = "text", value_parser = ["text", "json"])]
+    pub format: String,
+}
diff --git a/src/cli/global.rs b/src/cli/global.rs
index bdd52e2..f41f207 100644
--- a/src/cli/global.rs
+++ b/src/cli/global.rs
@@ -7,7 +7,8 @@ use sysinfo::{MemoryRefreshKind, RefreshKind, System};
 use tracing::Level;
 
 use crate::cli::commands::{
-    access_map::AccessMapArgs, rules::RulesArgs, scan::ScanCommandArgs, view::ViewArgs,
+    access_map::AccessMapArgs, rules::RulesArgs, scan::ScanCommandArgs, validate::ValidateArgs,
+    view::ViewArgs,
 };
 
 #[deny(missing_docs)]
@@ -64,6 +65,9 @@ pub enum Command {
     #[command(alias = "rule")]
     Rules(RulesArgs),
 
+    /// Directly validate a known secret against a rule's validator (bypasses pattern matching)
+    Validate(ValidateArgs),
+
     /// Map a cloud credential to its identity, permissions, and blast radius
     #[command(name = "access-map", alias = "access_map")]
     AccessMap(AccessMapArgs),
diff --git a/src/defaults.rs b/src/defaults.rs
index e307258..4291656 100644
--- a/src/defaults.rs
+++ b/src/defaults.rs
@@ -1,35 +1,5 @@
-use std::path::Path;
+//! Builtin rules loading.
+//!
+//! This module re-exports from [`kingfisher_rules::defaults`].
 
-use anyhow::Result;
-use include_dir::{include_dir, Dir};
-
-use crate::rules::{rule::Confidence, Rules};
-
-pub static DEFAULT_RULES_DIR: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/data");
-
-fn load_yaml_files<'a>(dir: &Dir<'a>) -> Vec<(&'a Path, &'a [u8])> {
-    dir.find("**/*.yml")
-        .expect("Constant glob should compile")
-        .filter_map(|e| e.as_file())
-        .map(|f| (f.path(), f.contents()))
-        .collect()
-}
-/// Load the default YAML rule files, returning their pathnames and contents.
-fn get_default_rule_files() -> Vec<(&'static Path, &'static [u8])> {
-    let mut yaml_files = load_yaml_files(&DEFAULT_RULES_DIR);
-    yaml_files.sort_by_key(|t| t.0);
-    yaml_files
-}
-/// Load the default rules and rulesets.
-pub fn get_builtin_rules(confidence: Option<Confidence>) -> Result<Rules> {
-    let confidence = confidence.unwrap_or(Confidence::Medium);
-    Rules::from_paths_and_contents(get_default_rule_files(), confidence)
-}
-#[cfg(test)]
-mod test {
-    use super::*;
-    #[test]
-    fn test_get_default_rules() {
-        assert!(get_builtin_rules(None).unwrap().num_rules() >= 100);
-    }
-}
+pub use kingfisher_rules::defaults::{get_builtin_rules, DEFAULT_RULES_DIR};
diff --git a/src/direct_validate.rs b/src/direct_validate.rs
new file mode 100644
index 0000000..71cae0c
--- /dev/null
+++ b/src/direct_validate.rs
@@ -0,0 +1,850 @@
+//! Direct secret validation without pattern matching.
+//!
+//! This module provides functionality to validate a known secret directly against
+//! a rule's validator, bypassing the normal pattern-matching detection phase.
+
+use std::{
+    collections::{BTreeMap, BTreeSet},
+    io::{self, Read},
+    sync::Arc,
+    time::Duration,
+};
+
+use anyhow::{anyhow, bail, Context, Result};
+use crossbeam_skiplist::SkipMap;
+use liquid::Object;
+use liquid_core::{Value, ValueView};
+use regex::Regex;
+use reqwest::Client;
+use serde::Serialize;
+use tracing::debug;
+
+use crate::{
+    cli::{commands::validate::ValidateArgs, global::GlobalArgs},
+    liquid_filters::register_all,
+    rule_loader::RuleLoader,
+    rules::{rule::Rule, HttpValidation, Validation},
+    validation::{
+        aws::validate_aws_credentials,
+        azure::validate_azure_storage_credentials,
+        coinbase::validate_cdp_api_key,
+        gcp::GcpValidator,
+        httpvalidation::{build_request_builder, retry_request, validate_response},
+        jdbc::validate_jdbc,
+        jwt::validate_jwt,
+        mongodb::validate_mongodb,
+        mysql::validate_mysql,
+        postgres::validate_postgres,
+        GLOBAL_USER_AGENT,
+    },
+    validation_body,
+};
+
+/// Result of a direct validation attempt.
+#[derive(Debug, Clone, Serialize)]
+pub struct DirectValidationResult {
+    /// The rule ID that was used for validation.
+    pub rule_id: String,
+    /// The rule name.
+    pub rule_name: String,
+    /// Whether the secret was validated as valid.
+    pub is_valid: bool,
+    /// HTTP status code from the validation request (if applicable).
+    pub status_code: Option<u16>,
+    /// Response body or error message.
+    pub message: String,
+}
+
+/// Find all rules matching an ID or prefix.
+///
+/// Returns all matching rules, or an error if no rules match.
+fn find_rules_by_selector<'a>(
+    selector: &str,
+    rules: &'a BTreeMap<String, Rule>,
+) -> Result<Vec<&'a Rule>> {
+    let mut matches: Vec<&Rule> = Vec::new();
+
+    for (id, rule) in rules {
+        // Exact match OR "selector." is a prefix of id
+        if id == selector
+            || (id.starts_with(selector) && id.as_bytes().get(selector.len()) == Some(&b'.'))
+        {
+            matches.push(rule);
+        }
+    }
+
+    if matches.is_empty() {
+        bail!(
+            "No rule found matching '{}'. Use `kingfisher rules list` to see available rules.",
+            selector
+        );
+    }
+
+    Ok(matches)
+}
+
+/// Extract a string value from the globals object.
+fn get_global_var(globals: &Object, name: &str) -> Option<String> {
+    globals.get(name).and_then(|v| v.to_kstr().to_string().into())
+}
+
+/// Extract Liquid template variable names from a string.
+/// Matches patterns like {{ VAR }} or {{ VAR | filter }}.
+fn extract_template_vars(text: &str) -> BTreeSet<String> {
+    // Match {{ VAR }} or {{ VAR | filter }} patterns
+    // Variable names are alphanumeric with underscores
+    let re = Regex::new(r"\{\{\s*([A-Za-z_][A-Za-z0-9_]*)\s*(?:\|[^}]*)?\}\}").unwrap();
+    re.captures_iter(text).filter_map(|cap| cap.get(1).map(|m| m.as_str().to_uppercase())).collect()
+}
+
+/// Extract all template variables used in a validation configuration.
+fn extract_validation_vars(validation: &Validation) -> BTreeSet<String> {
+    let mut vars = BTreeSet::new();
+
+    match validation {
+        Validation::Http(http) => {
+            // Extract from URL
+            vars.extend(extract_template_vars(&http.request.url));
+
+            // Extract from headers
+            for (key, value) in &http.request.headers {
+                vars.extend(extract_template_vars(key));
+                vars.extend(extract_template_vars(value));
+            }
+
+            // Extract from body
+            if let Some(body) = &http.request.body {
+                vars.extend(extract_template_vars(body));
+            }
+        }
+        // Non-HTTP validators typically use fixed variable names
+        Validation::AWS => {
+            vars.insert("AKID".to_string());
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::GCP => {
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::MongoDB => {
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::MySQL => {
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::Postgres => {
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::Jdbc => {
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::JWT => {
+            vars.insert("TOKEN".to_string());
+        }
+        Validation::AzureStorage => {
+            vars.insert("TOKEN".to_string());
+            vars.insert("STORAGE_ACCOUNT".to_string());
+        }
+        Validation::Coinbase => {
+            vars.insert("TOKEN".to_string());
+            vars.insert("CRED_NAME".to_string());
+        }
+        Validation::Raw(_) => {
+            vars.insert("TOKEN".to_string());
+        }
+    }
+
+    vars
+}
+
+/// Build the globals object for Liquid template rendering.
+///
+/// - `secret`: The main secret value, assigned to TOKEN
+/// - `args`: Unnamed values to auto-assign to template variables (excluding TOKEN)
+/// - `variables`: Named variables in NAME=VALUE format (explicit overrides)
+/// - `template_vars`: Set of variable names used in the validation template
+fn build_globals(
+    secret: &str,
+    args: &[String],
+    variables: &[String],
+    template_vars: &BTreeSet<String>,
+) -> Result<Object> {
+    let mut globals = Object::new();
+
+    // Set TOKEN to the provided secret
+    globals.insert("TOKEN".into(), Value::scalar(secret.to_string()));
+
+    // Get non-TOKEN variables in alphabetical order for auto-assignment
+    let auto_assign_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect();
+
+    // Auto-assign --arg values to template variables
+    for (i, arg_value) in args.iter().enumerate() {
+        if i < auto_assign_vars.len() {
+            let var_name = auto_assign_vars[i];
+            debug!("Auto-assigning --arg '{}' to variable '{}'", arg_value, var_name);
+            globals.insert(var_name.clone().into(), Value::scalar(arg_value.clone()));
+        }
+    }
+
+    // Parse and add any --var overrides (these take precedence)
+    for var in variables {
+        let (name, value) = var
+            .split_once('=')
+            .ok_or_else(|| anyhow!("Invalid variable format '{}'. Expected NAME=VALUE", var))?;
+
+        let name = name.trim().to_uppercase();
+        let value = value.trim().to_string();
+
+        if name.is_empty() {
+            bail!("Variable name cannot be empty in '{}'", var);
+        }
+
+        globals.insert(name.into(), Value::scalar(value));
+    }
+
+    Ok(globals)
+}
+
+/// Read the secret value from the provided argument or stdin.
+fn read_secret(secret_arg: Option<&str>) -> Result<String> {
+    match secret_arg {
+        Some("-") => {
+            // Read from stdin
+            let mut buffer = String::new();
+            io::stdin().read_to_string(&mut buffer).context("Failed to read secret from stdin")?;
+            Ok(buffer.trim().to_string())
+        }
+        Some(s) => Ok(s.to_string()),
+        None => {
+            bail!("No secret provided. Pass a secret as an argument or use '-' to read from stdin.")
+        }
+    }
+}
+
+/// Render the validation URL using Liquid templates.
+async fn render_and_parse_url(
+    parser: &liquid::Parser,
+    globals: &Object,
+    url_template: &str,
+) -> Result<reqwest::Url> {
+    let template =
+        parser.parse(url_template).map_err(|e| anyhow!("Failed to parse URL template: {}", e))?;
+
+    let rendered =
+        template.render(globals).map_err(|e| anyhow!("Failed to render URL template: {}", e))?;
+
+    reqwest::Url::parse(&rendered).map_err(|e| anyhow!("Invalid URL '{}': {}", rendered, e))
+}
+
+/// Execute HTTP validation against the provided rule.
+async fn execute_http_validation(
+    http_validation: &HttpValidation,
+    globals: &Object,
+    client: &Client,
+    parser: &liquid::Parser,
+    timeout: Duration,
+    retries: u32,
+) -> Result<DirectValidationResult> {
+    // Render the URL
+    let url = render_and_parse_url(parser, globals, &http_validation.request.url).await?;
+
+    debug!("Validating against URL: {}", url);
+
+    // Build the request
+    let request_builder = build_request_builder(
+        client,
+        &http_validation.request.method,
+        &url,
+        &http_validation.request.headers,
+        &http_validation.request.body,
+        timeout,
+        parser,
+        globals,
+    )
+    .map_err(|e| anyhow!("Failed to build request: {}", e))?;
+
+    // Execute the request with retries
+    let backoff_min = Duration::from_millis(100);
+    let backoff_max = Duration::from_secs(2);
+
+    let response = retry_request(request_builder, retries, backoff_min, backoff_max)
+        .await
+        .map_err(|e| anyhow!("Request failed: {}", e))?;
+
+    let status = response.status();
+    let headers = response.headers().clone();
+    let body =
+        response.text().await.unwrap_or_else(|e| format!("Failed to read response body: {}", e));
+
+    // Truncate body for display if too long
+    let display_body = if body.len() > 500 { format!("{}...", &body[..500]) } else { body.clone() };
+
+    // Validate the response
+    let matchers = http_validation.request.response_matcher.as_deref().unwrap_or(&[]);
+    let html_allowed = http_validation.request.response_is_html;
+    let is_valid = validate_response(matchers, &body, &status, &headers, html_allowed);
+
+    Ok(DirectValidationResult {
+        rule_id: String::new(), // Will be filled in by caller
+        rule_name: String::new(),
+        is_valid,
+        status_code: Some(status.as_u16()),
+        message: display_body,
+    })
+}
+
+/// Run direct validation of a secret against one or more rules.
+///
+/// If the rule selector matches multiple rules, all matching rules are tried.
+/// Returns results for all rules that have validation defined.
+pub async fn run_direct_validation(
+    args: &ValidateArgs,
+    global_args: &GlobalArgs,
+) -> Result<Vec<DirectValidationResult>> {
+    // Read the secret
+    let secret = read_secret(args.secret.as_deref())?;
+
+    if secret.is_empty() {
+        bail!("Secret cannot be empty");
+    }
+
+    // Load rules
+    let loader = RuleLoader::new()
+        .load_builtins(!args.no_builtins)
+        .additional_rule_load_paths(&args.rules_path);
+
+    // Create minimal scan args for rule loading
+    let scan_args = create_minimal_scan_args();
+    let loaded = loader.load(&scan_args)?;
+
+    // Find all matching rules
+    let matching_rules = find_rules_by_selector(&args.rule, loaded.id_to_rule())?;
+    let num_matching_rules = matching_rules.len();
+
+    if num_matching_rules > 1 {
+        debug!("Rule selector '{}' matches {} rules, trying all", args.rule, num_matching_rules);
+    }
+
+    // Build HTTP client
+    let client = Client::builder()
+        .danger_accept_invalid_certs(global_args.ignore_certs)
+        .timeout(Duration::from_secs(args.timeout))
+        .user_agent(GLOBAL_USER_AGENT.as_str())
+        .gzip(true)
+        .deflate(true)
+        .brotli(true)
+        .build()
+        .context("Failed to build HTTP client")?;
+
+    // Build Liquid parser
+    let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
+
+    let timeout = Duration::from_secs(args.timeout);
+
+    let mut results = Vec::new();
+
+    // Try each matching rule
+    for rule in matching_rules {
+        let rule_id = rule.id().to_string();
+        let rule_name = rule.name().to_string();
+
+        debug!("Trying rule: {} ({})", rule_name, rule_id);
+
+        // Check if the rule has validation
+        let validation = match rule.syntax().validation.as_ref() {
+            Some(v) => v,
+            None => {
+                debug!("Rule '{}' has no validation defined, skipping", rule_id);
+                continue;
+            }
+        };
+
+        // Extract template variables from validation and build globals
+        let template_vars = extract_validation_vars(validation);
+
+        // Check if --arg values can be assigned to this rule's variables
+        let non_token_vars: Vec<&String> = template_vars.iter().filter(|v| *v != "TOKEN").collect();
+
+        // If more --arg values than variables, skip this rule when trying multiple rules
+        if args.args.len() > non_token_vars.len() {
+            if num_matching_rules > 1 {
+                debug!(
+                    "Rule '{}' expects {} variable(s) but {} --arg value(s) provided, skipping",
+                    rule_id,
+                    non_token_vars.len(),
+                    args.args.len()
+                );
+                continue;
+            } else {
+                // Single rule match - give a clear error
+                let var_list = if non_token_vars.is_empty() {
+                    "none".to_string()
+                } else {
+                    non_token_vars.iter().map(|s| s.as_str()).collect::<Vec<_>>().join(", ")
+                };
+                bail!(
+                    "Too many --arg values provided. Rule '{}' expects {} additional variable(s): {}",
+                    rule_id,
+                    non_token_vars.len(),
+                    var_list
+                );
+            }
+        }
+
+        let globals = build_globals(&secret, &args.args, &args.variables, &template_vars)?;
+
+        // Log auto-assignment info for debugging
+        if !non_token_vars.is_empty() && !args.args.is_empty() {
+            debug!(
+                "Rule '{}' uses variables: {:?}, auto-assigned from --arg: {:?}",
+                rule_id, non_token_vars, args.args
+            );
+        }
+
+        // Execute validation based on type
+        let mut result = match validation {
+            Validation::Http(http_validation) => {
+                execute_http_validation(
+                    http_validation,
+                    &globals,
+                    &client,
+                    &parser,
+                    timeout,
+                    args.retries,
+                )
+                .await?
+            }
+
+            Validation::AWS => {
+                // AWS needs AKID and TOKEN (secret access key)
+                let akid = get_global_var(&globals, "AKID")
+                .or_else(|| get_global_var(&globals, "ACCESS_KEY_ID"))
+                .ok_or_else(|| anyhow!(
+                    "AWS validation requires AKID variable. Use: --var AKID=<access_key_id> <secret_access_key>"
+                ))?;
+
+                match validate_aws_credentials(&akid, &secret).await {
+                    Ok((is_valid, message)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message,
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("AWS validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::GCP => {
+                // GCP expects the full service account JSON as the secret
+                match GcpValidator::new() {
+                    Ok(validator) => {
+                        match validator.validate_gcp_credentials(secret.as_bytes()).await {
+                            Ok((is_valid, metadata)) => DirectValidationResult {
+                                rule_id: String::new(),
+                                rule_name: String::new(),
+                                is_valid,
+                                status_code: None,
+                                message: if metadata.is_empty() {
+                                    "GCP credential validation completed".to_string()
+                                } else {
+                                    metadata.join(", ")
+                                },
+                            },
+                            Err(e) => DirectValidationResult {
+                                rule_id: String::new(),
+                                rule_name: String::new(),
+                                is_valid: false,
+                                status_code: None,
+                                message: format!("GCP validation error: {}", e),
+                            },
+                        }
+                    }
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("Failed to initialize GCP validator: {}", e),
+                    },
+                }
+            }
+
+            Validation::MongoDB => {
+                // MongoDB expects a connection URI as the secret
+                match validate_mongodb(&secret).await {
+                    Ok((is_valid, message)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message,
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("MongoDB validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::MySQL => {
+                // MySQL expects a connection URL as the secret
+                match validate_mysql(&secret).await {
+                    Ok((is_valid, metadata)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message: if metadata.is_empty() {
+                            "MySQL validation completed".to_string()
+                        } else {
+                            metadata.join(", ")
+                        },
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("MySQL validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::Postgres => {
+                // Postgres expects a connection URL as the secret
+                match validate_postgres(&secret).await {
+                    Ok((is_valid, metadata)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message: if metadata.is_empty() {
+                            "Postgres validation completed".to_string()
+                        } else {
+                            metadata.join(", ")
+                        },
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("Postgres validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::Jdbc => {
+                // JDBC expects a JDBC connection string as the secret
+                match validate_jdbc(&secret).await {
+                    Ok(outcome) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: outcome.valid,
+                        status_code: Some(outcome.status.as_u16()),
+                        message: outcome.message,
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("JDBC validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::JWT => {
+                // JWT expects a JWT token as the secret
+                match validate_jwt(&secret).await {
+                    Ok((is_valid, message)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message,
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("JWT validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::AzureStorage => {
+                // Azure Storage expects JSON with storage_account and storage_key
+                // Or use --var STORAGE_ACCOUNT=xxx and pass the storage key as the secret
+                let azure_json = if secret.starts_with('{') {
+                    // Secret is already JSON
+                    secret.clone()
+                } else {
+                    // Build JSON from variables
+                    let storage_account = get_global_var(&globals, "STORAGE_ACCOUNT")
+                    .ok_or_else(|| anyhow!(
+                        "Azure Storage validation requires either JSON input or --var STORAGE_ACCOUNT=<account_name> <storage_key>"
+                    ))?;
+                    serde_json::json!({
+                        "storage_account": storage_account,
+                        "storage_key": secret
+                    })
+                    .to_string()
+                };
+
+                let cache: Arc<SkipMap<String, crate::validation::CachedResponse>> =
+                    Arc::new(SkipMap::new());
+                match validate_azure_storage_credentials(&azure_json, &cache).await {
+                    Ok((is_valid, body)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message: validation_body::clone_as_string(&body),
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("Azure Storage validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::Coinbase => {
+                // Coinbase needs credential name and private key PEM
+                let cred_name = get_global_var(&globals, "CRED_NAME")
+                .or_else(|| get_global_var(&globals, "KEY_ID"))
+                .ok_or_else(|| anyhow!(
+                    "Coinbase validation requires CRED_NAME variable. Use: --var CRED_NAME=<key_id> <private_key_pem>"
+                ))?;
+
+                let cache: Arc<SkipMap<String, crate::validation::CachedResponse>> =
+                    Arc::new(SkipMap::new());
+                match validate_cdp_api_key(&cred_name, &secret, &client, &parser, &cache).await {
+                    Ok((is_valid, body)) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid,
+                        status_code: None,
+                        message: validation_body::clone_as_string(&body),
+                    },
+                    Err(e) => DirectValidationResult {
+                        rule_id: String::new(),
+                        rule_name: String::new(),
+                        is_valid: false,
+                        status_code: None,
+                        message: format!("Coinbase validation error: {}", e),
+                    },
+                }
+            }
+
+            Validation::Raw(_) => DirectValidationResult {
+                rule_id: String::new(),
+                rule_name: String::new(),
+                is_valid: false,
+                status_code: None,
+                message: "Raw validation type is not supported via direct validation.".to_string(),
+            },
+        };
+
+        result.rule_id = rule_id;
+        result.rule_name = rule_name;
+        results.push(result);
+    }
+
+    if results.is_empty() {
+        bail!(
+            "No rules with validation found matching '{}'. \
+             Use `kingfisher rules list` to see available rules.",
+            args.rule
+        );
+    }
+
+    Ok(results)
+}
+
+/// Create minimal scan args for rule loading.
+fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs {
+    use crate::cli::commands::{
+        azure::AzureRepoType,
+        bitbucket::BitbucketAuthArgs,
+        bitbucket::BitbucketRepoType,
+        gitea::GiteaRepoType,
+        github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
+        gitlab::GitLabRepoType,
+        inputs::{ContentFilteringArgs, InputSpecifierArgs},
+        output::{OutputArgs, ReportOutputFormat},
+        rules::RuleSpecifierArgs,
+        scan::{ConfidenceLevel, ScanArgs},
+    };
+    use url::Url;
+
+    ScanArgs {
+        num_jobs: 1,
+        rules: RuleSpecifierArgs {
+            rules_path: Vec::new(),
+            rule: vec!["all".into()],
+            load_builtins: true,
+        },
+        input_specifier_args: InputSpecifierArgs {
+            path_inputs: Vec::new(),
+            git_url: Vec::new(),
+            git_clone_dir: None,
+            keep_clones: false,
+            repo_clone_limit: None,
+            include_contributors: false,
+            github_user: Vec::new(),
+            github_organization: Vec::new(),
+            github_exclude: Vec::new(),
+            all_github_organizations: false,
+            github_api_url: Url::parse("https://api.github.com/").unwrap(),
+            github_repo_type: GitHubRepoType::Source,
+            gitlab_user: Vec::new(),
+            gitlab_group: Vec::new(),
+            gitlab_exclude: Vec::new(),
+            all_gitlab_groups: false,
+            gitlab_api_url: Url::parse("https://gitlab.com/").unwrap(),
+            gitlab_repo_type: GitLabRepoType::All,
+            gitlab_include_subgroups: false,
+            huggingface_user: Vec::new(),
+            huggingface_organization: Vec::new(),
+            huggingface_model: Vec::new(),
+            huggingface_dataset: Vec::new(),
+            huggingface_space: Vec::new(),
+            huggingface_exclude: Vec::new(),
+            gitea_user: Vec::new(),
+            gitea_organization: Vec::new(),
+            gitea_exclude: Vec::new(),
+            all_gitea_organizations: false,
+            gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
+            gitea_repo_type: GiteaRepoType::Source,
+            bitbucket_user: Vec::new(),
+            bitbucket_workspace: Vec::new(),
+            bitbucket_project: Vec::new(),
+            bitbucket_exclude: Vec::new(),
+            all_bitbucket_workspaces: false,
+            bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
+            bitbucket_repo_type: BitbucketRepoType::Source,
+            bitbucket_auth: BitbucketAuthArgs::default(),
+            azure_organization: Vec::new(),
+            azure_project: Vec::new(),
+            azure_exclude: Vec::new(),
+            all_azure_projects: false,
+            azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
+            azure_repo_type: AzureRepoType::Source,
+            jira_url: None,
+            jql: None,
+            confluence_url: None,
+            cql: None,
+            max_results: 100,
+            s3_bucket: None,
+            s3_prefix: None,
+            role_arn: None,
+            aws_local_profile: None,
+            gcs_bucket: None,
+            gcs_prefix: None,
+            gcs_service_account: None,
+            slack_query: None,
+            slack_api_url: Url::parse("https://slack.com/api/").unwrap(),
+            docker_image: Vec::new(),
+            git_clone: GitCloneMode::Bare,
+            git_history: GitHistoryMode::Full,
+            commit_metadata: true,
+            repo_artifacts: false,
+            scan_nested_repos: true,
+            since_commit: None,
+            branch: None,
+            branch_root: false,
+            branch_root_commit: None,
+            staged: false,
+        },
+        extra_ignore_comments: Vec::new(),
+        content_filtering_args: ContentFilteringArgs {
+            max_file_size_mb: 25.0,
+            no_extract_archives: true,
+            extraction_depth: 2,
+            exclude: Vec::new(),
+            no_binary: true,
+        },
+        confidence: ConfidenceLevel::Low, // Load all rules regardless of confidence
+        no_validate: true,
+        access_map: false,
+        rule_stats: false,
+        only_valid: false,
+        min_entropy: None,
+        redact: false,
+        git_repo_timeout: 1800,
+        no_dedup: false,
+        view_report: false,
+        baseline_file: None,
+        manage_baseline: false,
+        skip_regex: Vec::new(),
+        skip_word: Vec::new(),
+        skip_aws_account: Vec::new(),
+        skip_aws_account_file: None,
+        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
+        no_base64: false,
+        no_inline_ignore: false,
+        no_ignore_if_contains: false,
+        validation_timeout: 10,
+        validation_retries: 1,
+    }
+}
+
+/// Print validation results to stdout.
+pub fn print_results(results: &[DirectValidationResult], format: &str, use_color: bool) {
+    match format {
+        "json" => {
+            if results.len() == 1 {
+                println!("{}", serde_json::to_string_pretty(&results[0]).unwrap());
+            } else {
+                println!("{}", serde_json::to_string_pretty(results).unwrap());
+            }
+        }
+        _ => {
+            for (i, result) in results.iter().enumerate() {
+                if i > 0 {
+                    println!(); // Separator between results
+                }
+
+                let valid_str = if result.is_valid {
+                    if use_color {
+                        "\x1b[32m✓ VALID\x1b[0m"
+                    } else {
+                        "VALID"
+                    }
+                } else if use_color {
+                    "\x1b[31m✗ INVALID\x1b[0m"
+                } else {
+                    "INVALID"
+                };
+
+                println!("Rule:     {} ({})", result.rule_name, result.rule_id);
+                println!("Result:   {}", valid_str);
+                if let Some(status) = result.status_code {
+                    println!("Status:   {}", status);
+                }
+                if !result.message.is_empty() {
+                    println!("Response: {}", result.message);
+                }
+            }
+        }
+    }
+}
+
+/// Check if any result is valid.
+pub fn any_valid(results: &[DirectValidationResult]) -> bool {
+    results.iter().any(|r| r.is_valid)
+}
diff --git a/src/entropy.rs b/src/entropy.rs
index 0c5e146..41162db 100644
--- a/src/entropy.rs
+++ b/src/entropy.rs
@@ -1,51 +1,5 @@
-pub fn calculate_shannon_entropy(bytes: &[u8]) -> f32 {
-    if bytes.is_empty() {
-        return 0.0;
-    }
-    // Fixed array for counting occurrences of each byte (0-255)
-    let mut counts = [0u32; 256];
-    for &byte in bytes {
-        counts[byte as usize] += 1;
-    }
-    let total_bytes = bytes.len() as f32;
-    // Sum entropy contribution for each byte that appears at least once.
-    counts.iter().filter(|&&count| count > 0).fold(0.0, |entropy, &count| {
-        let probability = count as f32 / total_bytes;
-        entropy - probability * probability.log2()
-    })
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
+//! Shannon entropy calculation.
+//!
+//! This module re-exports from [`kingfisher_core::entropy`].
 
-    #[test]
-    fn test_entropy_calculation() {
-        // Empty input should return 0.0
-        let entropy = calculate_shannon_entropy(&[]);
-        assert_eq!(entropy, 0.0);
-        assert!(entropy.is_finite());
-
-        // Single repeated byte should return 0.0
-        let entropy = calculate_shannon_entropy(&[65, 65, 65, 65]);
-        assert_eq!(entropy, 0.0);
-        assert!(entropy.is_finite());
-
-        // Even distribution of two bytes should be exactly 1.0
-        let input = &[1, 2, 1, 2];
-        let entropy = calculate_shannon_entropy(input);
-        assert!((entropy - 1.0).abs() < 0.0001);
-        assert!(entropy.is_finite());
-
-        // Real password example should have mid-range entropy
-        let password = "Password123!".as_bytes();
-        let entropy = calculate_shannon_entropy(password);
-        assert!(entropy > 2.5);
-        assert!(entropy.is_finite());
-
-        // Random string should have high entropy
-        let random = "j2k#9K$mL*p&vN3".as_bytes();
-        let entropy = calculate_shannon_entropy(random);
-        assert!(entropy > 3.5);
-        assert!(entropy.is_finite());
-    }
-}
+pub use kingfisher_core::entropy::calculate_shannon_entropy;
diff --git a/src/git_commit_metadata.rs b/src/git_commit_metadata.rs
index db519cb..c4a046c 100644
--- a/src/git_commit_metadata.rs
+++ b/src/git_commit_metadata.rs
@@ -1,139 +1,5 @@
-use gix::{date::Time, ObjectId};
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
+//! Git commit metadata types.
+//!
+//! This module re-exports from [`kingfisher_core::git_commit_metadata`].
 
-#[repr(transparent)]
-#[derive(Serialize, Deserialize, Copy, Clone)]
-#[serde(remote = "Time")]
-struct TextTime(
-    #[serde(
-        getter = "text_time::getter",
-        serialize_with = "text_time::serialize",
-        deserialize_with = "text_time::deserialize"
-    )]
-    Time,
-);
-impl From<TextTime> for Time {
-    fn from(v: TextTime) -> Self {
-        v.0
-    }
-}
-impl From<Time> for TextTime {
-    fn from(v: Time) -> Self {
-        Self(v)
-    }
-}
-mod text_time {
-    use super::*;
-
-    #[inline]
-    pub fn getter(v: &Time) -> &Time {
-        v
-    }
-
-    #[inline]
-    pub fn serialize<S: serde::Serializer>(v: &Time, serializer: S) -> Result<S::Ok, S::Error> {
-        serializer.collect_str(v)
-    }
-
-    pub fn deserialize<'de, D: serde::Deserializer<'de>>(d: D) -> Result<Time, D::Error> {
-        struct Vis;
-        impl<'a> serde::de::Visitor<'a> for Vis {
-            type Value = Time;
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                formatter.write_str("a string representing a Git timestamp")
-            }
-            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
-                gix::date::parse(v, None).map_err(E::custom)
-            }
-        }
-        d.deserialize_str(Vis)
-    }
-}
-impl JsonSchema for TextTime {
-    fn schema_name() -> String {
-        "Time".into()
-    }
-
-    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
-        String::json_schema(gen)
-    }
-}
-
-#[repr(transparent)]
-#[derive(Serialize, Deserialize, Copy, Clone)]
-#[serde(remote = "ObjectId")]
-struct HexObjectId(
-    #[serde(
-        getter = "hex_object_id::getter",
-        serialize_with = "hex_object_id::serialize",
-        deserialize_with = "hex_object_id::deserialize"
-    )]
-    ObjectId,
-);
-impl From<ObjectId> for HexObjectId {
-    fn from(v: ObjectId) -> Self {
-        HexObjectId(v)
-    }
-}
-impl From<HexObjectId> for ObjectId {
-    fn from(v: HexObjectId) -> Self {
-        v.0
-    }
-}
-mod hex_object_id {
-    use super::*;
-
-    #[inline]
-    pub fn getter(v: &ObjectId) -> &ObjectId {
-        v
-    }
-
-    // Use `collect_str` to avoid intermediate string allocations:
-    #[inline]
-    pub fn serialize<S: serde::Serializer>(v: &ObjectId, serializer: S) -> Result<S::Ok, S::Error> {
-        serializer.collect_str(&v.to_hex())
-    }
-
-    pub fn deserialize<'de, D: serde::Deserializer<'de>>(d: D) -> Result<ObjectId, D::Error> {
-        struct Vis;
-        impl<'a> serde::de::Visitor<'a> for Vis {
-            type Value = ObjectId;
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                formatter.write_str("a 40-character hex string representing a Git object ID")
-            }
-            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<Self::Value, E> {
-                ObjectId::from_hex(v.as_bytes()).map_err(E::custom)
-            }
-        }
-        d.deserialize_str(Vis)
-    }
-}
-impl JsonSchema for HexObjectId {
-    fn schema_name() -> String {
-        "ObjectId".into()
-    }
-
-    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
-        let s = String::json_schema(gen);
-        let mut o = s.into_object();
-        o.string().pattern = Some("[0-9a-f]{40}".into());
-        let md = o.metadata();
-        md.description = Some("A hex-encoded object ID as computed by Git".into());
-        schemars::schema::Schema::Object(o)
-    }
-}
-
-/// Metadata about a Git commit.
-#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
-pub struct CommitMetadata {
-    #[serde(with = "HexObjectId")]
-    pub commit_id: ObjectId,
-
-    pub committer_name: String,
-
-    pub committer_email: String,
-
-    #[serde(with = "TextTime")]
-    pub committer_timestamp: Time,
-}
+pub use kingfisher_core::git_commit_metadata::CommitMetadata;
diff --git a/src/lib.rs b/src/lib.rs
index a7b3d39..21e8424 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,8 @@
+// Re-export library crates for easier access
+pub use kingfisher_core;
+pub use kingfisher_rules;
+pub use kingfisher_scanner;
+
 pub mod access_map;
 pub mod azure;
 pub mod baseline;
@@ -11,6 +16,7 @@ pub mod confluence;
 pub mod content_type;
 pub mod decompress;
 pub mod defaults;
+pub mod direct_validate;
 pub mod entropy;
 pub mod finding_data;
 pub mod findings_store;
diff --git a/src/liquid_filters.rs b/src/liquid_filters.rs
index d334360..ff79a3f 100644
--- a/src/liquid_filters.rs
+++ b/src/liquid_filters.rs
@@ -1,1156 +1,5 @@
-//! Collection of small Liquid filters that make HTTP validations & API-signing templates easy
+//! Liquid template filters for validation templates.
+//!
+//! This module re-exports from [`kingfisher_rules::liquid_filters`].
 
-use base64::{engine::general_purpose, Engine};
-use crc32fast::Hasher;
-use hmac::{Hmac, Mac};
-use liquid_core::{
-    Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection,
-    FromFilterParameters, ParseFilter, Result, Runtime, Value, ValueView,
-};
-
-use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
-use rand::{distr::Alphanumeric, Rng};
-use sha1::Sha1;
-use sha2::{Digest, Sha256, Sha384};
-use time::{format_description::well_known::Iso8601, OffsetDateTime};
-use uuid::Uuid;
-
-// -----------------------------------------------------------------------------
-// Helper macro – keeps most filters <10 lines long
-// -----------------------------------------------------------------------------
-// -- filters.rs (or wherever the macro lives) -------------------------------
-macro_rules! static_filter {
-    // ── original, zero-arg variant ────────────────────────────────
-    (
-        $(#[$outer:meta])*
-        $name:ident, $display:literal, $body:expr
-    ) => {
-        $(#[$outer])*
-        #[derive(Debug, Clone, FilterReflection, ParseFilter, Default)]
-        #[filter(name = $display, description = $display, parsed($name))]
-        pub struct $name;
-
-        impl std::fmt::Display for $name {
-            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-                write!(f, $display)
-            }
-        }
-        impl Filter for $name {
-            fn evaluate(
-                &self,
-                input: &dyn ValueView,
-                _runtime: &dyn Runtime,
-            ) -> Result<Value, LiquidError> {
-                Ok(Value::scalar($body(input)))
-            }
-        }
-    };
-
-    // -- NEW, second arm of the macro (add Default) ----------------------------
-(
-    $(#[$outer:meta])*
-    $name:ident { $( $(#[$f_meta:meta])* $field:ident : $ty:ty ),+ $(,)? },
-    $display:literal,
-    $body:expr
-) => {
-    $(#[$outer])*
-    #[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]   // ← added Default
-    #[filter(name = $display, description = $display, parsed($name))]
-    pub struct $name { $( $(#[$f_meta])* pub $field : $ty ),+ }
-
-    impl std::fmt::Display for $name {
-        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-            write!(f, $display)
-        }
-    }
-    impl Filter for $name {
-        fn evaluate(
-            &self,
-            input: &dyn ValueView,
-            _runtime: &dyn Runtime,
-        ) -> Result<Value, LiquidError> {
-            Ok(Value::scalar($body(self, input)))
-        }
-    }
-};
-}
-
-#[derive(Debug, FilterParameters)]
-struct ReplaceArgs {
-    #[parameter(description = "The substring to search for.", arg_type = "str")]
-    from: Expression,
-    #[parameter(description = "The string to replace it with.", arg_type = "str")]
-    to: Expression,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "replace",
-    description = "Replaces every occurrence of a substring with another.",
-    parameters(ReplaceArgs),
-    parsed(ReplaceFilter)
-)]
-pub struct Replace;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "replace"]
-struct ReplaceFilter {
-    #[parameters]
-    args: ReplaceArgs,
-}
-
-impl Filter for ReplaceFilter {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let from = args.from.to_kstr();
-        let to = args.to.to_kstr();
-        let input_str = input.to_kstr();
-        Ok(Value::scalar(input_str.replace(from.as_str(), to.as_str())))
-    }
-}
-
-#[derive(Debug, FilterParameters)]
-struct LstripCharsArgs {
-    #[parameter(
-        description = "Characters to remove from the start of the input.",
-        arg_type = "str"
-    )]
-    chars: Expression,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "lstrip_chars",
-    description = "Removes the provided characters from the beginning of the string.",
-    parameters(LstripCharsArgs),
-    parsed(LstripCharsFilter)
-)]
-pub struct LstripChars;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "lstrip_chars"]
-struct LstripCharsFilter {
-    #[parameters]
-    args: LstripCharsArgs,
-}
-
-impl Filter for LstripCharsFilter {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let chars = args.chars.to_string();
-        let input_str = input.to_kstr();
-        let trimmed = input_str.trim_start_matches(|c| chars.contains(c)).to_string();
-        Ok(Value::scalar(trimmed))
-    }
-}
-
-// ── HMAC args ─────────────────────────────────────
-#[derive(Debug, FilterParameters)]
-struct HmacArgs {
-    #[parameter(description = "HMAC key", arg_type = "str")]
-    key: Expression,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "hmac_sha256",
-    description = "HMAC-SHA256 – returns Base64.",
-    parameters(HmacArgs),
-    parsed(HmacSha256Filter)
-)]
-pub struct HmacSha256;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "hmac_sha256"]
-struct HmacSha256Filter {
-    #[parameters]
-    args: HmacArgs,
-}
-
-impl Filter for HmacSha256Filter {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        // Evaluate the arguments first…
-        let args = self.args.evaluate(runtime)?;
-        let key = args.key.to_kstr(); // evaluated to literal/variable value
-
-        // …then do the cryptography.
-        let mut mac = Hmac::<Sha256>::new_from_slice(key.as_bytes()).unwrap();
-        mac.update(input.to_kstr().as_bytes());
-        Ok(Value::scalar(
-            base64::engine::general_purpose::STANDARD.encode(mac.finalize().into_bytes()),
-        ))
-    }
-}
-
-// ── HMAC-SHA1 ─────────────────────────────────────────────
-#[derive(Debug, FilterParameters)]
-struct HmacSha1Args {
-    #[parameter(description = "HMAC key", arg_type = "str")]
-    key: Expression,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "hmac_sha1",
-    description = "HMAC-SHA1 – returns Base64.",
-    parameters(HmacSha1Args),
-    parsed(HmacSha1Filter)
-)]
-pub struct HmacSha1;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "hmac_sha1"]
-struct HmacSha1Filter {
-    #[parameters]
-    args: HmacSha1Args,
-}
-
-impl Filter for HmacSha1Filter {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        // Evaluate the arguments first…
-        let args = self.args.evaluate(runtime)?;
-        let key = args.key.to_kstr();
-
-        // …then do the cryptography.
-        let mut mac = Hmac::<Sha1>::new_from_slice(key.as_bytes()).unwrap();
-        mac.update(input.to_kstr().as_bytes());
-        Ok(Value::scalar(
-            base64::engine::general_purpose::STANDARD.encode(mac.finalize().into_bytes()),
-        ))
-    }
-}
-
-// ── HMAC-SHA384 ─────────────────────────────────────────────
-#[derive(Debug, FilterParameters)]
-struct Hmac384Args {
-    #[parameter(description = "HMAC key", arg_type = "str")]
-    key: Expression,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "hmac_sha384",
-    description = "HMAC-SHA384 – returns Base64.",
-    parameters(Hmac384Args),
-    parsed(HmacSha384Filter)
-)]
-pub struct HmacSha384;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "hmac_sha384"]
-struct HmacSha384Filter {
-    #[parameters]
-    args: Hmac384Args,
-}
-
-impl Filter for HmacSha384Filter {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        // Evaluate the arguments first…
-        let args = self.args.evaluate(runtime)?;
-        let key = args.key.to_kstr(); // evaluated to literal/variable value
-
-        // …then do the cryptography.
-        let mut mac = Hmac::<Sha384>::new_from_slice(key.as_bytes()).unwrap();
-        mac.update(input.to_kstr().as_bytes());
-        Ok(Value::scalar(
-            base64::engine::general_purpose::STANDARD.encode(mac.finalize().into_bytes()),
-        ))
-    }
-}
-
-// ── random_string ────────────────────────────────
-#[derive(Debug, FilterParameters)]
-struct RandomStringArgs {
-    #[parameter(description = "Desired output length", arg_type = "integer")]
-    len: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "random_string",
-    description = "Random alphanumeric string (default 32 chars).",
-    parameters(RandomStringArgs),
-    parsed(RandomString)
-)]
-pub struct RandomStringFilter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "random_string"]
-struct RandomString {
-    #[parameters]
-    args: RandomStringArgs,
-}
-
-impl Filter for RandomString {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let n = args
-            .len
-            .and_then(|value| {
-                let scalar = Value::scalar(value);
-                value_to_usize(&scalar)
-            })
-            .or_else(|| input.to_kstr().parse().ok())
-            .unwrap_or(32);
-
-        let value: String =
-            rand::rng().sample_iter(&Alphanumeric).take(n).map(char::from).collect();
-
-        Ok(Value::scalar(value))
-    }
-}
-
-#[derive(Debug, FilterParameters)]
-struct SuffixArgs {
-    #[parameter(description = "Number of trailing characters to keep", arg_type = "integer")]
-    len: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "suffix",
-    description = "Return the suffix (last N characters) of the provided string.",
-    parameters(SuffixArgs),
-    parsed(Suffix)
-)]
-pub struct SuffixFilter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "suffix"]
-struct Suffix {
-    #[parameters]
-    args: SuffixArgs,
-}
-
-impl Filter for Suffix {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let text = input.to_kstr();
-        let requested = args
-            .len
-            .and_then(|value| {
-                let scalar = Value::scalar(value);
-                value_to_usize(&scalar)
-            })
-            .unwrap_or_else(|| text.len());
-        if requested == 0 {
-            return Ok(Value::scalar(String::new()));
-        }
-
-        let mut chars: Vec<char> = text.chars().collect();
-        let keep = requested.min(chars.len());
-        chars.drain(0..chars.len().saturating_sub(keep));
-        Ok(Value::scalar(chars.into_iter().collect::<String>()))
-    }
-}
-
-#[derive(Debug, FilterParameters)]
-struct PrefixArgs {
-    #[parameter(description = "Number of leading characters to keep", arg_type = "integer")]
-    len: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "prefix",
-    description = "Return the prefix (first N characters) of the provided string.",
-    parameters(PrefixArgs),
-    parsed(Prefix)
-)]
-pub struct PrefixFilter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "prefix"]
-struct Prefix {
-    #[parameters]
-    args: PrefixArgs,
-}
-
-impl Filter for Prefix {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let text = input.to_kstr();
-        let requested = args
-            .len
-            .and_then(|value| {
-                let scalar = Value::scalar(value);
-                value_to_usize(&scalar)
-            })
-            .unwrap_or_else(|| text.len());
-        if requested == 0 {
-            return Ok(Value::scalar(String::new()));
-        }
-
-        let mut chars: Vec<char> = text.chars().collect();
-        chars.truncate(requested.min(chars.len()));
-        Ok(Value::scalar(chars.into_iter().collect::<String>()))
-    }
-}
-
-#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]
-#[filter(
-    name = "b64enc",
-    description = "Encodes the input string using Base64 encoding",
-    parsed(B64EncFilter)
-)]
-// pub struct B64EncFilterParser;
-
-pub struct B64EncFilter;
-
-impl std::fmt::Display for B64EncFilter {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "b64enc")
-    }
-}
-
-impl Filter for B64EncFilter {
-    fn evaluate(
-        &self,
-        input: &dyn ValueView,
-        _runtime: &dyn Runtime,
-    ) -> Result<Value, LiquidError> {
-        let input_str = input.to_kstr().into_owned();
-        let encoded = general_purpose::STANDARD.encode(input_str.as_bytes());
-        Ok(Value::scalar(encoded))
-    }
-}
-
-#[derive(Debug, Clone, Default, FilterReflection, ParseFilter)]
-#[filter(name = "b64dec", description = "Decodes a Base64 string", parsed(B64DecFilter))]
-pub struct B64DecFilter;
-
-impl std::fmt::Display for B64DecFilter {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "b64dec")
-    }
-}
-
-impl Filter for B64DecFilter {
-    fn evaluate(
-        &self,
-        input: &dyn ValueView,
-        _runtime: &dyn Runtime,
-    ) -> Result<Value, LiquidError> {
-        let input_str = input.to_kstr();
-        match general_purpose::STANDARD.decode(input_str.as_bytes()) {
-            Ok(bytes) => Ok(Value::scalar(String::from_utf8_lossy(&bytes).to_string())),
-            Err(e) => Err(LiquidError::with_msg(e.to_string())),
-        }
-    }
-}
-
-// -----------------------------------------------------------------------------
-// Authentication & Security
-// -----------------------------------------------------------------------------
-
-// {{ value | sha256 }} -- hex digest
-static_filter!(
-    /// SHA-256 hex digest.
-    Sha256Filter, "sha256",
-    |input: &dyn ValueView| -> String {
-        let mut h = Sha256::new();
-        h.update(input.to_kstr().as_bytes());
-        format!("{:x}", h.finalize())
-    }
-);
-
-static_filter!(
-    /// Compute the CRC32 of the input and return it as a decimal number.
-    Crc32Filter,
-    "crc32",
-    |input: &dyn ValueView| -> i64 {
-        let mut hasher = Hasher::new();
-        hasher.update(input.to_kstr().as_bytes());
-        i64::from(hasher.finalize())
-    }
-);
-
-#[derive(Debug, FilterParameters)]
-struct Crc32DecArgs {
-    #[parameter(
-        description = "Number of trailing decimal digits to return (zero padded)",
-        arg_type = "integer"
-    )]
-    digits: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "crc32_dec",
-    description = "Compute the CRC32 and optionally return the last N decimal digits.",
-    parameters(Crc32DecArgs),
-    parsed(Crc32Dec)
-)]
-pub struct Crc32DecFilter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "crc32_dec"]
-struct Crc32Dec {
-    #[parameters]
-    args: Crc32DecArgs,
-}
-
-impl Filter for Crc32Dec {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let mut hasher = Hasher::new();
-        hasher.update(input.to_kstr().as_bytes());
-        let checksum = u128::from(hasher.finalize());
-
-        let digits = args
-            .digits
-            .and_then(|value| {
-                let scalar = Value::scalar(value);
-                value_to_usize(&scalar)
-            })
-            .unwrap_or(0);
-
-        if digits == 0 {
-            return Ok(Value::scalar(checksum.to_string()));
-        }
-
-        let clamped_digits = digits.min(38); // 10^38 fits within u128
-        let modulus = 10u128.pow(clamped_digits as u32);
-        let truncated = checksum % modulus;
-        let mut value = truncated.to_string();
-        if clamped_digits > value.len() {
-            let mut padded = String::with_capacity(clamped_digits);
-            for _ in 0..(clamped_digits - value.len()) {
-                padded.push('0');
-            }
-            padded.push_str(&value);
-            value = padded;
-        }
-
-        Ok(Value::scalar(value))
-    }
-}
-
-#[derive(Debug, FilterParameters)]
-struct Crc32HexArgs {
-    #[parameter(
-        description = "Number of trailing hexadecimal digits to return (zero padded)",
-        arg_type = "integer"
-    )]
-    digits: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "crc32_hex",
-    description = "Compute the CRC32 and optionally return the last N hexadecimal digits.",
-    parameters(Crc32HexArgs),
-    parsed(Crc32Hex)
-)]
-pub struct Crc32HexFilter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "crc32_hex"]
-struct Crc32Hex {
-    #[parameters]
-    args: Crc32HexArgs,
-}
-
-impl Filter for Crc32Hex {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let mut hasher = Hasher::new();
-        hasher.update(input.to_kstr().as_bytes());
-        let checksum = hasher.finalize();
-        let mut hex = format!("{checksum:08x}");
-
-        let digits = args
-            .digits
-            .and_then(|value| {
-                let scalar = Value::scalar(value);
-                value_to_usize(&scalar)
-            })
-            .unwrap_or(0);
-
-        if digits == 0 {
-            return Ok(Value::scalar(hex));
-        }
-
-        let clamped = digits.min(32);
-        if clamped > hex.len() {
-            let mut padded = String::with_capacity(clamped);
-            for _ in 0..(clamped - hex.len()) {
-                padded.push('0');
-            }
-            padded.push_str(&hex);
-            hex = padded;
-        } else {
-            let start = hex.len() - clamped;
-            hex = hex[start..].to_string();
-        }
-
-        Ok(Value::scalar(hex))
-    }
-}
-
-#[derive(Debug, FilterParameters)]
-struct Crc32LeB64Args {
-    #[parameter(
-        description = "Number of leading characters from the Base64 string to keep",
-        arg_type = "integer"
-    )]
-    len: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "crc32_le_b64",
-    description = "Compute the CRC32, encode little-endian bytes as Base64, optionally truncating.",
-    parameters(Crc32LeB64Args),
-    parsed(Crc32LeB64)
-)]
-pub struct Crc32LeB64Filter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "crc32_le_b64"]
-struct Crc32LeB64 {
-    #[parameters]
-    args: Crc32LeB64Args,
-}
-
-impl Filter for Crc32LeB64 {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let mut hasher = Hasher::new();
-        hasher.update(input.to_kstr().as_bytes());
-        let checksum = hasher.finalize();
-        let encoded = general_purpose::STANDARD.encode(checksum.to_le_bytes());
-
-        let output = if let Some(len) = args.len.and_then(|value| {
-            let scalar = Value::scalar(value);
-            value_to_usize(&scalar)
-        }) {
-            encoded.chars().take(len).collect::<String>()
-        } else {
-            encoded
-        };
-
-        Ok(Value::scalar(output))
-    }
-}
-
-#[derive(Debug, FilterParameters)]
-struct Base62Args {
-    #[parameter(
-        description = "Pad the encoded value to at least this width",
-        arg_type = "integer"
-    )]
-    width: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "base62",
-    description = "Encode the provided integer value using Base62.",
-    parameters(Base62Args),
-    parsed(Base62)
-)]
-pub struct Base62Filter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "base62"]
-struct Base62 {
-    #[parameters]
-    args: Base62Args,
-}
-
-impl Filter for Base62 {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let value = input
-            .as_scalar()
-            .and_then(|scalar| {
-                if let Some(int) = scalar.to_integer() {
-                    Some(if int < 0 { 0 } else { int as u64 })
-                } else if let Some(float) = scalar.to_float() {
-                    Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 })
-                } else if let Some(boolean) = scalar.to_bool() {
-                    Some(u64::from(boolean))
-                } else {
-                    scalar.to_kstr().to_string().parse::<u64>().ok()
-                }
-            })
-            .or_else(|| input.to_kstr().to_string().parse::<u64>().ok())
-            .unwrap_or(0);
-
-        let mut encoded = encode_base62(value);
-        if let Some(width) = args.width.and_then(|value| {
-            let scalar = Value::scalar(value);
-            value_to_usize(&scalar)
-        }) {
-            if encoded.len() < width {
-                let mut padded = String::with_capacity(width);
-                for _ in 0..(width - encoded.len()) {
-                    padded.push('0');
-                }
-                padded.push_str(&encoded);
-                encoded = padded;
-            }
-        }
-
-        Ok(Value::scalar(encoded))
-    }
-}
-
-fn encode_base62(mut value: u64) -> String {
-    const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-    if value == 0 {
-        return "0".to_string();
-    }
-    let mut buf = Vec::new();
-    while value > 0 {
-        let rem = (value % 62) as usize;
-        buf.push(ALPHABET[rem] as char);
-        value /= 62;
-    }
-    buf.iter().rev().collect()
-}
-
-fn value_to_usize(value: &Value) -> Option<usize> {
-    let view = value.as_view();
-    view.as_scalar()
-        .and_then(|scalar| {
-            if let Some(int) = scalar.to_integer() {
-                Some(if int < 0 { 0 } else { int as usize })
-            } else if let Some(float) = scalar.to_float() {
-                Some(if float.is_sign_negative() { 0 } else { float.floor() as usize })
-            } else if let Some(boolean) = scalar.to_bool() {
-                Some(if boolean { 1 } else { 0 })
-            } else {
-                scalar.to_kstr().parse::<usize>().ok()
-            }
-        })
-        .or_else(|| view.to_kstr().parse::<usize>().ok())
-}
-
-#[derive(Debug, FilterParameters)]
-struct Base36Args {
-    #[parameter(
-        description = "Pad the encoded value to at least this width",
-        arg_type = "integer"
-    )]
-    width: Option<Expression>,
-}
-
-#[derive(Clone, ParseFilter, FilterReflection, Default)]
-#[filter(
-    name = "base36",
-    description = "Encode the provided integer value using Base36.",
-    parameters(Base36Args),
-    parsed(Base36)
-)]
-pub struct Base36Filter;
-
-#[derive(Debug, FromFilterParameters, Display_filter)]
-#[name = "base36"]
-struct Base36 {
-    #[parameters]
-    args: Base36Args,
-}
-
-impl Filter for Base36 {
-    fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result<Value> {
-        let args = self.args.evaluate(runtime)?;
-        let value = input
-            .as_scalar()
-            .and_then(|scalar| {
-                if let Some(int) = scalar.to_integer() {
-                    Some(if int < 0 { 0 } else { int as u64 })
-                } else if let Some(float) = scalar.to_float() {
-                    Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 })
-                } else if let Some(boolean) = scalar.to_bool() {
-                    Some(u64::from(boolean))
-                } else {
-                    scalar.to_kstr().to_string().parse::<u64>().ok()
-                }
-            })
-            .or_else(|| input.to_kstr().to_string().parse::<u64>().ok())
-            .unwrap_or(0);
-
-        let mut encoded = encode_base36(value);
-        if let Some(width) = args.width.and_then(|value| {
-            let scalar = Value::scalar(value);
-            value_to_usize(&scalar)
-        }) {
-            if encoded.len() < width {
-                let mut padded = String::with_capacity(width);
-                for _ in 0..(width - encoded.len()) {
-                    padded.push('0');
-                }
-                padded.push_str(&encoded);
-                encoded = padded;
-            }
-        }
-
-        Ok(Value::scalar(encoded))
-    }
-}
-
-fn encode_base36(mut value: u64) -> String {
-    const ALPHABET: &[u8; 36] = b"0123456789abcdefghijklmnopqrstuvwxyz";
-    if value == 0 {
-        return "0".to_string();
-    }
-    let mut buf = Vec::new();
-    while value > 0 {
-        let rem = (value % 36) as usize;
-        buf.push(ALPHABET[rem] as char);
-        value /= 36;
-    }
-    buf.iter().rev().collect()
-}
-
-// {{ value | b64url_enc }} – URL-safe base64 w/o padding
-static_filter!(
-    /// Base64 URL-safe (no ‘=’ padding).
-    B64UrlEncFilter, "b64url_enc",
-    |input: &dyn ValueView| -> String {
-        general_purpose::URL_SAFE_NO_PAD.encode(input.to_kstr().as_bytes())
-    }
-);
-
-// {{ algo | jwt_header }} – e.g. “HS256” -- Base64URL-encoded header
-static_filter!(
-    /// Generate a minimal JWT header for the given alg.
-    JwtHeaderFilter, "jwt_header",
-    |input: &dyn ValueView| -> String {
-        let alg = input.to_kstr();
-        let json = serde_json::json!({ "typ": "JWT", "alg": alg });
-        general_purpose::URL_SAFE_NO_PAD.encode(json.to_string())
-    }
-);
-
-// -----------------------------------------------------------------------------
-// Data Formatting
-// -----------------------------------------------------------------------------
-
-// {{ value | url_encode }}
-static_filter!(
-    /// Percent-encode for a URL.
-    UrlEncodeFilter, "url_encode",
-    |input: &dyn ValueView| -> String {
-        utf8_percent_encode(&input.to_kstr(), NON_ALPHANUMERIC).to_string()
-    }
-);
-
-// {{ value | json_escape }}
-static_filter!(
-    /// Escape string for JSON contexts.
-    JsonEscapeFilter, "json_escape",
-    |input: &dyn ValueView| -> String {
-        serde_json::to_string(&input.to_kstr().to_string()).unwrap_or_default()
-    }
-);
-
-// {{ "" | unix_timestamp }}
-static_filter!(
-    /// Current Unix epoch seconds.
-    UnixTimestampFilter, "unix_timestamp",
-    |_input: &dyn ValueView| -> i64 {
-        OffsetDateTime::now_utc().unix_timestamp()
-    }
-);
-
-// {{ "" | iso_timestamp_no_frac }}
-static_filter!(
-    /// Current ISO-8601 timestamp (UTC) with no fractional seconds.
-    IsoTimestampNoFracFilter, "iso_timestamp_no_frac",
-    |_input: &dyn ValueView| -> String {
-        let full = OffsetDateTime::now_utc()
-            .format(&Iso8601::DEFAULT)
-            .unwrap_or_else(|_| "1970-01-01T00:00:00Z".into());
-
-        // If there’s a fractional-second part, remove it but keep the trailing ‘Z’.
-        match full.split_once('.') {
-            Some((prefix, _)) => {
-                format!("{prefix}Z")
-            }
-            None => full,
-        }
-    }
-);
-
-// {{ "" | iso_timestamp }}
-static_filter!(
-    /// Current ISO-8601 timestamp (UTC).
-    IsoTimestampFilter, "iso_timestamp",
-    |_input: &dyn ValueView| -> String {
-        OffsetDateTime::now_utc()
-            .format(&Iso8601::DEFAULT)
-            .unwrap_or_else(|_| "1970-01-01T00:00:00Z".into())
-    }
-);
-
-// -----------------------------------------------------------------------------
-// Request Uniqueness
-// -----------------------------------------------------------------------------
-
-// {{ "" | uuid }}
-static_filter!(
-    /// Generate random UUID-v4.
-    UuidFilter, "uuid",
-    |_input: &dyn ValueView| -> String { Uuid::new_v4().to_string() }
-);
-
-pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder {
-    builder
-        // zero-arg helpers
-        .filter(Replace::default())
-        .filter(B64UrlEncFilter::default())
-        .filter(Sha256Filter::default())
-        .filter(UrlEncodeFilter::default())
-        .filter(JsonEscapeFilter::default())
-        .filter(UnixTimestampFilter::default())
-        .filter(IsoTimestampFilter::default())
-        .filter(IsoTimestampNoFracFilter::default())
-        .filter(UuidFilter::default())
-        .filter(JwtHeaderFilter::default())
-        .filter(B64EncFilter::default())
-        .filter(B64DecFilter::default())
-        .filter(RandomStringFilter::default())
-        .filter(SuffixFilter::default())
-        .filter(PrefixFilter::default())
-        .filter(LstripChars::default())
-        .filter(Crc32Filter::default())
-        .filter(Crc32DecFilter::default())
-        .filter(Crc32HexFilter::default())
-        .filter(Crc32LeB64Filter::default())
-        .filter(Base62Filter::default())
-        .filter(Base36Filter::default())
-        .filter(HmacSha256::default())
-        .filter(HmacSha1::default())
-        .filter(HmacSha384::default())
-}
-
-#[cfg(test)]
-mod tests {
-    use base64::{engine::general_purpose, Engine as _};
-    use hmac::{Hmac, Mac};
-    use liquid::{object, ParserBuilder};
-    use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
-    use regex::Regex;
-    use sha1::Sha1;
-    use sha2::{Digest, Sha256, Sha384};
-    use time::OffsetDateTime;
-
-    use super::*;
-
-    fn parser() -> liquid::Parser {
-        // Build a Liquid parser with stdlib + all custom filters
-        register_all(ParserBuilder::with_stdlib()).build().unwrap()
-    }
-
-    fn render(src: &str) -> String {
-        parser().parse(src).unwrap().render(&object!({})).unwrap()
-    }
-
-    // -------------------------------------------------------------------------
-    // Simple one-liner helpers
-    // -------------------------------------------------------------------------
-    #[test]
-    fn b64enc_filter() {
-        assert_eq!(render(r#"{{ "hello" | b64enc }}"#), "aGVsbG8=");
-    }
-
-    #[test]
-    fn b64dec_filter() {
-        assert_eq!(render(r#"{{ "aGVsbG8=" | b64dec }}"#), "hello");
-    }
-
-    #[test]
-    fn sha256_filter() {
-        let expect = format!("{:x}", Sha256::digest(b"hello"));
-        assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect);
-    }
-
-    #[test]
-    fn suffix_filter() {
-        assert_eq!(render(r#"{{ "abcdef" | suffix: 3 }}"#), "def");
-        assert_eq!(render(r#"{{ "short" | suffix: 10 }}"#), "short");
-        assert_eq!(render(r#"{{ "value" | suffix: 0 }}"#), "");
-    }
-
-    #[test]
-    fn prefix_filter() {
-        assert_eq!(render(r#"{{ "abcdef" | prefix: 3 }}"#), "abc");
-        assert_eq!(render(r#"{{ "short" | prefix: 10 }}"#), "short");
-        assert_eq!(render(r#"{{ "value" | prefix: 0 }}"#), "");
-    }
-
-    #[test]
-    fn crc32_and_base62_filters() {
-        assert_eq!(render(r#"{{ "hello" | crc32 }}"#), "907060870");
-        assert_eq!(render(r#"{{ "hello" | crc32 | base62 }}"#), "zNvy2");
-        assert_eq!(render(r#"{{ "hello" | crc32 | base62: 6 }}"#), "0zNvy2");
-    }
-
-    #[test]
-    fn base36_filter() {
-        assert_eq!(render(r#"{{ 123456 | base36 }}"#), "2n9c");
-        assert_eq!(render(r#"{{ 123456 | base36: 6 }}"#), "002n9c");
-    }
-
-    #[test]
-    fn crc32_dec_filter() {
-        assert_eq!(render(r#"{{ "hello" | crc32_dec }}"#), "907060870");
-        assert_eq!(render(r#"{{ "hello" | crc32_dec: 6 }}"#), "060870");
-    }
-
-    #[test]
-    fn crc32_hex_filter() {
-        assert_eq!(render(r#"{{ "hello" | crc32_hex }}"#), "3610a686");
-        assert_eq!(render(r#"{{ "hello" | crc32_hex: 4 }}"#), "a686");
-        assert_eq!(render(r#"{{ "hello" | crc32_hex: 10 }}"#), "003610a686");
-    }
-
-    #[test]
-    fn crc32_le_b64_filter() {
-        assert_eq!(render(r#"{{ "hello" | crc32_le_b64 }}"#), "hqYQNg==");
-        assert_eq!(render(r#"{{ "hello" | crc32_le_b64: 6 }}"#), "hqYQNg");
-    }
-
-    #[test]
-    fn hmac_sha1_filter() {
-        let key = b"key1";
-        let data = b"data";
-        let mut mac = Hmac::<Sha1>::new_from_slice(key).unwrap();
-        mac.update(data);
-        let expect = general_purpose::STANDARD.encode(mac.finalize().into_bytes());
-
-        assert_eq!(render(r#"{{ "data" | hmac_sha1: "key1" }}"#), expect);
-    }
-
-    #[test]
-    fn b64url_enc_filter() {
-        assert_eq!(
-            render(r#"{{ "++??" | b64url_enc }}"#),
-            general_purpose::URL_SAFE_NO_PAD.encode("++??")
-        );
-    }
-
-    #[test]
-    fn url_encode_filter() {
-        assert_eq!(
-            render(r#"{{ "hello world!" | url_encode }}"#),
-            utf8_percent_encode("hello world!", NON_ALPHANUMERIC).to_string()
-        );
-    }
-
-    #[test]
-    fn json_escape_filter() {
-        assert_eq!(render(r#"{{ '"hi"' | json_escape }}"#), r#""\"hi\"""#);
-    }
-
-    // -------------------------------------------------------------------------
-    // JWT header
-    // -------------------------------------------------------------------------
-    #[test]
-    fn jwt_header_filter() {
-        let expect = general_purpose::URL_SAFE_NO_PAD.encode(r#"{"typ":"JWT","alg":"HS256"}"#);
-        assert_eq!(render(r#"{{ "HS256" | jwt_header }}"#), expect);
-    }
-
-    // -------------------------------------------------------------------------
-    // HMAC helpers
-    // -------------------------------------------------------------------------
-    #[test]
-    fn hmac_sha256_filter() {
-        let key = b"secret";
-        let data = b"hi!";
-        // expected value
-        let mut mac = Hmac::<Sha256>::new_from_slice(key).unwrap();
-        mac.update(data);
-        let expect = general_purpose::STANDARD.encode(mac.finalize().into_bytes());
-
-        assert_eq!(render(r#"{{ "hi!" | hmac_sha256: "secret" }}"#), expect);
-    }
-
-    #[test]
-    fn hmac_sha384_filter() {
-        let key = b"topsecret";
-        let data = b"payload";
-        let mut mac = Hmac::<Sha384>::new_from_slice(key).unwrap();
-        mac.update(data);
-        let expect = general_purpose::STANDARD.encode(mac.finalize().into_bytes());
-
-        assert_eq!(render(r#"{{ "payload" | hmac_sha384: "topsecret" }}"#), expect);
-    }
-
-    // -------------------------------------------------------------------------
-    // Random string
-    // -------------------------------------------------------------------------
-    #[test]
-    fn random_string_filter_default_len() {
-        let out = render(r#"{{ "" | random_string }}"#);
-        assert_eq!(out.len(), 32);
-        assert!(out.chars().all(|c| c.is_ascii_alphanumeric()));
-    }
-
-    #[test]
-    fn random_string_filter_custom_len() {
-        let out = render(r#"{{ 10 | random_string }}"#);
-        assert_eq!(out.len(), 10);
-    }
-
-    // -------------------------------------------------------------------------
-    // Time helpers
-    // -------------------------------------------------------------------------
-    #[test]
-    fn unix_timestamp_filter_is_nowish() {
-        let tmpl_val: i64 = render(r#"{{ "" | unix_timestamp }}"#).parse().unwrap();
-        let now = OffsetDateTime::now_utc().unix_timestamp();
-        assert!((now - tmpl_val).abs() < 5, "timestamp differs by >5 s");
-    }
-
-    #[test]
-    fn iso_timestamp_filter_parses() {
-        let out = render(r#"{{ "" | iso_timestamp }}"#);
-        // Parse to make sure it’s valid ISO-8601
-        assert!(OffsetDateTime::parse(&out, &Iso8601::DEFAULT).is_ok());
-    }
-
-    // -------------------------------------------------------------------------
-    // UUID
-    // -------------------------------------------------------------------------
-    #[test]
-    fn uuid_filter_format() {
-        let uuid_re =
-            Regex::new(r"^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$")
-                .unwrap();
-        let v = render(r#"{{ "" | uuid }}"#);
-        assert!(uuid_re.is_match(&v));
-    }
-    // -------------------------------------------------------------------------
-    // Replace filter
-    // -------------------------------------------------------------------------
-    #[test]
-    fn replace_filter() {
-        assert_eq!(render(r#"{{ "hello world" | replace: "world", "mars" }}"#), "hello mars");
-    }
-
-    #[test]
-    fn lstrip_chars_single() {
-        assert_eq!(render(r#"{{ "000abc" | lstrip_chars: "0" }}"#), "abc");
-    }
-
-    #[test]
-    fn lstrip_chars_multiple_chars() {
-        assert_eq!(render(r#"{{ "-=--token" | lstrip_chars: "-=" }}"#), "token");
-    }
-
-    // -------------------------------------------------------------------------
-    // iso_timestamp_no_frac filter
-    // -------------------------------------------------------------------------
-    #[test]
-    fn iso_timestamp_no_frac_filter() {
-        let ts = render(r#"{{ "" | iso_timestamp_no_frac }}"#);
-        assert!(!ts.contains('.'), "timestamp should not include fractional seconds: {ts}");
-        // Verify it’s still valid ISO-8601
-        assert!(OffsetDateTime::parse(&ts, &Iso8601::DEFAULT).is_ok());
-    }
-}
+pub use kingfisher_rules::liquid_filters::*;
diff --git a/src/location.rs b/src/location.rs
index 69245df..a3d62fe 100644
--- a/src/location.rs
+++ b/src/location.rs
@@ -1,252 +1,7 @@
-use core::ops::Range;
-use std::cell::RefCell;
+//! Source location types for tracking positions in content.
+//!
+//! This module re-exports types from [`kingfisher_core::location`].
 
-use schemars::JsonSchema;
-use serde::{Deserialize, Serialize};
-
-/// A point defined by a byte offset.
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize, Copy, Clone)]
-pub struct OffsetPoint(pub usize);
-
-impl OffsetPoint {
-    #[inline]
-    pub fn new(idx: usize) -> Self {
-        OffsetPoint(idx)
-    }
-}
-
-/// A non‑empty span defined by two byte offsets (half‑open interval `[start, end)`).
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
-pub struct OffsetSpan {
-    pub start: usize,
-    pub end: usize,
-}
-
-impl std::fmt::Display for OffsetSpan {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}-{}", self.start, self.end)
-    }
-}
-
-impl OffsetSpan {
-    #[inline]
-    pub fn from_offsets(start: OffsetPoint, end: OffsetPoint) -> Self {
-        OffsetSpan { start: start.0, end: end.0 }
-    }
-
-    #[inline]
-    pub fn from_range(range: Range<usize>) -> Self {
-        OffsetSpan { start: range.start, end: range.end }
-    }
-
-    /// Length in bytes.
-    #[inline]
-    #[must_use]
-    pub fn len(&self) -> usize {
-        self.end.saturating_sub(self.start)
-    }
-
-    /// True if empty or inverted.
-    #[inline]
-    #[must_use]
-    pub fn is_empty(&self) -> bool {
-        self.start >= self.end
-    }
-
-    /// True if `other` lies entirely within `self`.
-    #[inline]
-    #[must_use]
-    pub fn fully_contains(&self, other: &Self) -> bool {
-        self.start <= other.start && other.end <= self.end
-    }
-}
-
-/// A point in the source file (line 1‑indexed, column 0‑indexed).
-#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone, Serialize, Deserialize, JsonSchema)]
-pub struct SourcePoint {
-    pub line: usize,
-    pub column: usize,
-}
-
-impl std::fmt::Display for SourcePoint {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}:{}", self.line, self.column)
-    }
-}
-
-/// A closed interval between two source points.
-#[derive(Debug, PartialEq, Eq, Hash, Clone, Serialize, Deserialize, JsonSchema)]
-pub struct SourceSpan {
-    pub start: SourcePoint,
-    pub end: SourcePoint,
-}
-
-impl std::fmt::Display for SourceSpan {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}-{}", self.start, self.end)
-    }
-}
-
-/// Records newline byte‑offsets to map offsets -- (line, column).
-pub struct LocationMapping<'a> {
-    bytes: &'a [u8],
-    newline_offsets: RefCell<Vec<usize>>,
-}
-
-impl<'a> LocationMapping<'a> {
-    /// Create a new mapping without pre-scanning the entire input.
-    pub fn new(input: &'a [u8]) -> Self {
-        LocationMapping { bytes: input, newline_offsets: RefCell::new(Vec::new()) }
-    }
-
-    fn ensure_offsets_up_to(&self, offset: usize) {
-        let mut offsets = self.newline_offsets.borrow_mut();
-        let start = offsets.last().map_or(0, |&last| last + 1);
-        if offset < start {
-            return;
-        }
-        let end = offset.min(self.bytes.len());
-        for nl in memchr::memchr_iter(b'\n', &self.bytes[start..end]) {
-            offsets.push(start + nl);
-        }
-    }
-
-    fn source_point_from_offsets(offsets: &[usize], offset: usize) -> SourcePoint {
-        let line = match offsets.binary_search(&offset) {
-            Ok(idx) => idx + 2,
-            Err(idx) => idx + 1,
-        };
-        let column = if let Some(&last) = offsets.get(line.saturating_sub(2)) {
-            offset.saturating_sub(last + 1)
-        } else {
-            offset
-        };
-        SourcePoint { line, column }
-    }
-
-    /// Map a byte offset to a `SourcePoint`.
-    pub fn get_source_point(&self, offset: usize) -> SourcePoint {
-        self.ensure_offsets_up_to(offset);
-        let offsets = self.newline_offsets.borrow();
-        Self::source_point_from_offsets(&offsets, offset)
-    }
-
-    /// Map an `OffsetSpan` -- `SourceSpan` (closed interval).
-    pub fn get_source_span(&self, span: &OffsetSpan) -> SourceSpan {
-        self.ensure_offsets_up_to(span.end.saturating_sub(1));
-        let offsets = self.newline_offsets.borrow();
-        let start = Self::source_point_from_offsets(&offsets, span.start);
-        let end = Self::source_point_from_offsets(&offsets, span.end.saturating_sub(1));
-        SourceSpan { start, end }
-    }
-}
-
-/// Compact representation of a source span to reduce per-match footprint while
-/// still being able to materialize full line/column data on demand.
-#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema)]
-pub struct CompactSourceSpan {
-    pub start_line: u32,
-    pub start_column: u32,
-    pub end_line: u32,
-    pub end_column: u32,
-}
-
-impl CompactSourceSpan {
-    #[inline]
-    fn zero() -> Self {
-        Self { start_line: 0, start_column: 0, end_line: 0, end_column: 0 }
-    }
-
-    #[inline]
-    fn from_source_span(span: &SourceSpan) -> Self {
-        Self {
-            start_line: span.start.line.try_into().unwrap_or(0),
-            start_column: span.start.column.try_into().unwrap_or(0),
-            end_line: span.end.line.try_into().unwrap_or(0),
-            end_column: span.end.column.try_into().unwrap_or(0),
-        }
-    }
-
-    #[inline]
-    fn to_source_span(self) -> SourceSpan {
-        SourceSpan {
-            start: SourcePoint {
-                line: usize::try_from(self.start_line).unwrap_or(0),
-                column: usize::try_from(self.start_column).unwrap_or(0),
-            },
-            end: SourcePoint {
-                line: usize::try_from(self.end_line).unwrap_or(0),
-                column: usize::try_from(self.end_column).unwrap_or(0),
-            },
-        }
-    }
-}
-
-/// Combined byte‑ and source‑span.
-#[derive(Debug, Clone, Deserialize, JsonSchema)]
-pub struct Location {
-    pub offset_span: OffsetSpan,
-    #[serde(
-        default,
-        serialize_with = "serialize_compact_source_span",
-        deserialize_with = "deserialize_compact_source_span"
-    )]
-    #[schemars(with = "SourceSpan")]
-    pub source_span: Option<CompactSourceSpan>,
-}
-
-impl serde::Serialize for Location {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        use serde::ser::SerializeStruct;
-
-        let mut state = serializer.serialize_struct("Location", 2)?;
-        state.serialize_field("offset_span", &self.offset_span)?;
-        let source_span = self.source_span().unwrap_or_else(CompactSourceSpan::zero);
-        state.serialize_field("source_span", &source_span.to_source_span())?;
-        state.end()
-    }
-}
-
-impl Location {
-    #[inline]
-    pub fn with_source_span(offset_span: OffsetSpan, source_span: Option<SourceSpan>) -> Self {
-        Self {
-            offset_span,
-            source_span: source_span.as_ref().map(CompactSourceSpan::from_source_span),
-        }
-    }
-
-    #[inline]
-    pub fn source_span(&self) -> Option<CompactSourceSpan> {
-        self.source_span
-    }
-
-    #[inline]
-    pub fn resolved_source_span(&self) -> SourceSpan {
-        self.source_span.unwrap_or_else(CompactSourceSpan::zero).to_source_span()
-    }
-}
-
-fn serialize_compact_source_span<S>(
-    span: &Option<CompactSourceSpan>,
-    serializer: S,
-) -> Result<S::Ok, S::Error>
-where
-    S: serde::Serializer,
-{
-    let source_span = span.unwrap_or_else(CompactSourceSpan::zero).to_source_span();
-    source_span.serialize(serializer)
-}
-
-fn deserialize_compact_source_span<'de, D>(
-    deserializer: D,
-) -> Result<Option<CompactSourceSpan>, D::Error>
-where
-    D: serde::Deserializer<'de>,
-{
-    let span = SourceSpan::deserialize(deserializer)?;
-    Ok(Some(CompactSourceSpan::from_source_span(&span)))
-}
+pub use kingfisher_core::location::{
+    CompactSourceSpan, Location, LocationMapping, OffsetPoint, OffsetSpan, SourcePoint, SourceSpan,
+};
diff --git a/src/main.rs b/src/main.rs
index 9384e7e..2f09520 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -48,7 +48,7 @@ use kingfisher::{
         global::Command,
         CommandLineArgs, GlobalArgs,
     },
-    findings_store,
+    direct_validate, findings_store,
     findings_store::FindingsStore,
     gitea, github, huggingface,
     reporter::{styles::Styles, DetailsReporter},
@@ -92,6 +92,7 @@ fn main() -> anyhow::Result<()> {
         Command::Scan(scan_args) => scan_args.scan_args.num_jobs,
         Command::SelfUpdate => 1, // Self-update doesn't need a thread pool
         Command::Rules(_) => num_cpus::get(), // Default for Rules commands
+        Command::Validate(_) => 1, // Single validation request
         Command::AccessMap(_) => 1,
         Command::View(_) => 1,
     };
@@ -197,6 +198,18 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
         }
         Command::View(view_args) => view::run(view_args).await,
         Command::AccessMap(identity_args) => access_map::run(identity_args).await,
+        Command::Validate(validate_args) => {
+            let results =
+                direct_validate::run_direct_validation(&validate_args, &global_args).await?;
+            let use_color = global_args.use_color(std::io::stdout());
+            direct_validate::print_results(&results, &validate_args.format, use_color);
+            // Exit with code 0 if any result is valid, 1 if all invalid
+            if direct_validate::any_valid(&results) {
+                Ok(())
+            } else {
+                std::process::exit(1);
+            }
+        }
         command => {
             let update_status = check_for_update_async(&global_args, None).await;
             match command {
@@ -380,6 +393,9 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
                 Command::AccessMap(_) => {
                     anyhow::bail!("AccessMap command should not reach this branch")
                 }
+                Command::Validate(_) => {
+                    anyhow::bail!("Validate command should not reach this branch")
+                }
                 Command::SelfUpdate => {
                     anyhow::bail!("SelfUpdate command should not reach this branch")
                 }
diff --git a/src/matcher.rs b/src/matcher.rs
index 40444cf..d7d29bb 100644
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -407,8 +407,8 @@ impl<'a> Matcher<'a> {
             self.user_data.raw_matches_scratch.iter().rev()
         {
             let rule_id_usize: usize = rule_id as usize;
-            let rule = Arc::clone(&rules_db.rules[rule_id_usize]);
-            let re = &rules_db.anchored_regexes[rule_id_usize];
+            let rule = Arc::clone(&rules_db.rules()[rule_id_usize]);
+            let re = &rules_db.anchored_regexes()[rule_id_usize];
             let start_idx_usize = start_idx as usize;
             let end_idx_usize = end_idx as usize;
             let current_span = OffsetSpan::from_range(start_idx_usize..end_idx_usize);
@@ -439,8 +439,8 @@ impl<'a> Matcher<'a> {
         if let Some(ref ts_results) = owned_ts_results {
             for (ts_range, ts_match, is_base64_decoded, _original_base64) in ts_results.iter() {
                 if *is_base64_decoded {
-                    for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
-                        let re = &rules_db.anchored_regexes[rule_id_usize];
+                    for (rule_id_usize, rule) in rules_db.rules().iter().enumerate() {
+                        let re = &rules_db.anchored_regexes()[rule_id_usize];
                         filter_match(
                             blob,
                             rule.clone(),
@@ -471,8 +471,8 @@ impl<'a> Matcher<'a> {
             let mut b64_stack: Vec<(DecodedData, usize)> =
                 b64_items.drain(..).map(|d| (d, 0)).collect();
             while let Some((item, depth)) = b64_stack.pop() {
-                for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
-                    let re = &rules_db.anchored_regexes[rule_id_usize];
+                for (rule_id_usize, rule) in rules_db.rules().iter().enumerate() {
+                    let re = &rules_db.anchored_regexes()[rule_id_usize];
                     filter_match(
                         blob,
                         rule.clone(),
@@ -1221,7 +1221,7 @@ mod test {
 
             let rules_db  = RulesDatabase::from_rules(vec![rule]).unwrap();
             let seen      = BlobIdMap::new();
-            let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+            let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
             let mut m     = Matcher::new(
                 &rules_db,
                 scanner_pool,
@@ -1297,7 +1297,7 @@ mod test {
         let enable_rule_profiling = true;
         // let mut matcher = Matcher::new(&rules_db, &seen_blobs, None,
         // enable_rule_profiling)?;
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut matcher = Matcher::new(
             &rules_db,
             scanner_pool,
@@ -1345,7 +1345,7 @@ mod test {
         let rules_db = RulesDatabase::from_rules(rules)?;
         let input = b"prefixgood prefixtest";
         let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut matcher = Matcher::new(
             &rules_db,
             scanner_pool,
@@ -1408,7 +1408,7 @@ mod test {
         let rules_db = RulesDatabase::from_rules(rules)?;
         let input = b"prefixgood prefixtest";
         let seen_blobs: BlobIdMap<bool> = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut matcher = Matcher::new(
             &rules_db,
             scanner_pool,
@@ -1522,7 +1522,7 @@ mod test {
 
         let rules_db = RulesDatabase::from_rules(vec![rule])?;
         let seen = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut m =
             Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
 
@@ -1560,7 +1560,7 @@ mod test {
         });
         let rules_db = RulesDatabase::from_rules(vec![rule])?;
         let seen = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut matcher =
             Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
 
@@ -1593,7 +1593,7 @@ mod test {
         });
         let rules_db = RulesDatabase::from_rules(vec![rule])?;
         let seen = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut matcher =
             Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
 
@@ -1638,7 +1638,7 @@ line2
         let origin = OriginSet::from(Origin::from_file(PathBuf::from("compat.txt")));
 
         let seen = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let mut matcher =
             Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?;
         let matches_without_compat =
@@ -1649,7 +1649,7 @@ line2
         assert_eq!(matches_without_compat, 1, "directive should be ignored without compat flag");
 
         let seen = BlobIdMap::new();
-        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+        let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
         let extra = vec![String::from("gitleaks:allow")];
         let mut matcher =
             Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false, true)?;
diff --git a/src/origin.rs b/src/origin.rs
index 0dcd207..b898f81 100644
--- a/src/origin.rs
+++ b/src/origin.rs
@@ -1,303 +1,7 @@
-use std::{
-    path::{Path, PathBuf},
-    sync::Arc,
+//! Provenance tracking for scanned content.
+//!
+//! This module re-exports types from [`kingfisher_core::origin`].
+
+pub use kingfisher_core::origin::{
+    get_repo_url, CommitOrigin, ExtendedOrigin, FileOrigin, GitRepoOrigin, Origin, OriginSet,
 };
-
-use anyhow::{anyhow, Result};
-use bstr::ByteSlice;
-use dashmap::DashMap;
-use once_cell::sync::Lazy;
-use rustc_hash::FxHashSet;
-use schemars::JsonSchema;
-use serde::{ser::SerializeSeq, Deserialize, Serialize};
-use smallvec::SmallVec;
-
-use crate::git_commit_metadata::CommitMetadata;
-static URL_CACHE: Lazy<DashMap<PathBuf, Arc<str>>> = Lazy::new(DashMap::default);
-
-fn compute_url(repo_path: &Path) -> Result<String> {
-    let repo = gix::open(repo_path)?;
-    let config = repo.config_snapshot();
-
-    let url_bytes =
-        config.string("remote.origin.url").ok_or_else(|| anyhow!("No remote URL found"))?;
-
-    if url_bytes.starts_with(b"http://") || url_bytes.starts_with(b"https://") {
-        Ok(String::from_utf8_lossy(url_bytes.as_bytes()).into_owned())
-    } else if url_bytes.starts_with(b"git@") {
-        let url_str = String::from_utf8_lossy(url_bytes.as_bytes());
-        if let Some(stripped) = url_str.strip_prefix("git@") {
-            if let Some((domain, path)) = stripped.split_once(':') {
-                Ok(format!("https://{}/{}", domain, path))
-            } else {
-                Err(anyhow!("Invalid SSH URL format"))
-            }
-        } else {
-            Err(anyhow!("Invalid SSH URL format"))
-        }
-    } else {
-        Err(anyhow!(
-            "Unsupported remote URL format: {}",
-            String::from_utf8_lossy(url_bytes.as_bytes())
-        ))
-    }
-}
-
-pub fn get_repo_url(repo_path: &Path) -> Result<Arc<str>> {
-    // Fast path: cache hit
-    if let Some(u) = URL_CACHE.get(repo_path) {
-        return Ok(u.clone());
-    }
-
-    // Slow path: compute, intern, cache
-    let url_arc: Arc<str> = compute_url(repo_path)?.into();
-    URL_CACHE.insert(repo_path.to_path_buf(), url_arc.clone());
-    Ok(url_arc)
-}
-
-impl FileOrigin {
-    pub fn new<P: Into<PathBuf>>(p: P) -> Self {
-        Self { path: Arc::new(p.into()) }
-    }
-}
-// -------------------------------------------------------------------------------------------------
-// Origin
-// -------------------------------------------------------------------------------------------------
-#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
-#[serde(rename_all = "snake_case", tag = "kind")]
-#[allow(clippy::large_enum_variant)]
-pub enum Origin {
-    File(FileOrigin),
-    GitRepo(GitRepoOrigin),
-    Extended(ExtendedOrigin),
-}
-
-impl Origin {
-    /// Create an `Origin` entry for a plain file.
-    pub fn from_file(path: PathBuf) -> Self {
-        Origin::File(FileOrigin::new(path))
-    }
-
-    /// Create an `Origin` entry for a blob found within a Git repo's history,
-    /// without any extra commit origin.
-    ///
-    /// See also `from_git_repo_with_first_commit`.
-    pub fn from_git_repo(repo_path: Arc<PathBuf>) -> Self {
-        Origin::GitRepo(GitRepoOrigin { repo_path, first_commit: None })
-    }
-
-    /// Create an `Origin` entry for a blob found within a Git repo's history,
-    /// with commit origin.
-    ///
-    /// See also `from_git_repo`.
-    pub fn from_git_repo_with_first_commit(
-        repo_path: Arc<PathBuf>,
-        commit_metadata: Arc<CommitMetadata>,
-        blob_path: String,
-    ) -> Self {
-        let first_commit = Some(CommitOrigin { commit_metadata, blob_path });
-        Origin::GitRepo(GitRepoOrigin { repo_path, first_commit })
-    }
-
-    /// Create an `Origin` entry from an arbitrary JSON value.
-    pub fn from_extended(value: serde_json::Value) -> Self {
-        Origin::Extended(ExtendedOrigin(value))
-    }
-
-    /// Get the path for the blob from this `Origin` entry, if one is specified.
-    pub fn blob_path(&self) -> Option<&Path> {
-        match self {
-            Self::File(e) => Some(&e.path),
-            Self::GitRepo(e) => e.first_commit.as_ref().map(|c| Path::new(&c.blob_path)),
-            Self::Extended(e) => e.path(),
-        }
-    }
-
-    pub fn full_path(&self) -> Option<PathBuf> {
-        match self {
-            Self::File(e) => Some((*e.path).clone()),
-            Self::GitRepo(e) => e.first_commit.as_ref().map(|c| e.repo_path.join(&c.blob_path)),
-            Self::Extended(e) => e.path().map(PathBuf::from),
-        }
-    }
-}
-impl std::fmt::Display for Origin {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Origin::File(e) => write!(f, "file {}", e.path.display()),
-            Origin::GitRepo(e) => match &e.first_commit {
-                Some(md) => write!(
-                    f,
-                    "git repo {}: first seen in commit {} as {}",
-                    e.repo_path.display(),
-                    md.commit_metadata.commit_id,
-                    &md.blob_path,
-                ),
-                None => write!(f, "git repo {}", e.repo_path.display()),
-            },
-            Origin::Extended(e) => write!(f, "extended {}", e),
-        }
-    }
-}
-// -------------------------------------------------------------------------------------------------
-// FileOrigin
-// -------------------------------------------------------------------------------------------------
-/// Indicates that a blob was seen at a particular file path
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
-pub struct FileOrigin {
-    pub path: Arc<PathBuf>,
-}
-// -------------------------------------------------------------------------------------------------
-// GitRepoOrigin
-// -------------------------------------------------------------------------------------------------
-/// Indicates that a blob was seen in a Git repo, optionally with particular
-/// commit origin info
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
-pub struct GitRepoOrigin {
-    pub repo_path: Arc<PathBuf>,
-    pub first_commit: Option<CommitOrigin>,
-}
-// -------------------------------------------------------------------------------------------------
-// CommitOrigin
-// -------------------------------------------------------------------------------------------------
-/// How was a particular Git commit encountered?
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
-pub struct CommitOrigin {
-    pub commit_metadata: Arc<CommitMetadata>,
-
-    pub blob_path: String,
-}
-// -------------------------------------------------------------------------------------------------
-// ExtendedOrigin
-// -------------------------------------------------------------------------------------------------
-/// An extended origin entry.
-///
-/// This is an arbitrary JSON value.
-/// If the value is an object containing certain fields, they will be
-/// interpreted specially by Kingfisher:
-///
-/// - A `path` field containing a string
-// - XXX A `url` string field that is a syntactically-valid URL
-// - XXX A `time` string field
-// - XXX A `display` string field
-//
-// - XXX A `parent_blob` string field with a hex-encoded blob ID that the associated blob was
-//   derived from
-// - XXX A `parent_transform` string field identifying the transform method used to derive the
-//   associated blob
-// - XXX A `parent_start_byte` integer field
-// - XXX A `parent_end_byte` integer field
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema, Hash)]
-pub struct ExtendedOrigin(pub serde_json::Value);
-impl std::fmt::Display for ExtendedOrigin {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        std::fmt::Display::fmt(&self.0, f)
-    }
-}
-impl ExtendedOrigin {
-    pub fn path(&self) -> Option<&Path> {
-        let p = self.0.get("path")?.as_str()?;
-        Some(Path::new(p))
-    }
-}
-/// A non-empty set of `Origin` entries.
-#[derive(Debug, Clone)]
-pub struct OriginSet {
-    origin: Origin,
-    more_provenance: SmallVec<[Origin; 1]>,
-}
-/// Serialize `OriginSet` as a flat sequence
-impl serde::Serialize for OriginSet {
-    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
-        let mut seq = s.serialize_seq(Some(self.len()))?;
-        for p in self.iter() {
-            seq.serialize_element(p)?;
-        }
-        seq.end()
-    }
-}
-impl JsonSchema for OriginSet {
-    fn schema_name() -> String {
-        "OriginSet".into()
-    }
-
-    fn json_schema(gen: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
-        let s = <Vec<Origin>>::json_schema(gen);
-        let mut o = s.into_object();
-        o.array().min_items = Some(1);
-        let md = o.metadata();
-        md.description = Some("A non-empty set of `Origin` entries".into());
-        schemars::schema::Schema::Object(o)
-    }
-}
-impl OriginSet {
-    /// Create a new `OriginSet` from the given items, filtering out redundant
-    /// less-specific `Origin` records.
-    #[inline]
-    pub fn single(origin: Origin) -> Self {
-        Self { origin, more_provenance: SmallVec::new() }
-    }
-
-    pub fn new(origin: Origin, more_origin: Vec<Origin>) -> Self {
-        let mut git_repos_with_detailed: FxHashSet<Arc<PathBuf>> = FxHashSet::default();
-        for p in std::iter::once(&origin).chain(&more_origin) {
-            if let Origin::GitRepo(e) = p {
-                if e.first_commit.is_some() {
-                    git_repos_with_detailed.insert(e.repo_path.clone());
-                }
-            }
-        }
-        let mut filtered = std::iter::once(origin).chain(more_origin).filter(|p| match p {
-            Origin::GitRepo(e) => {
-                e.first_commit.is_some() || !git_repos_with_detailed.contains(&e.repo_path)
-            }
-            Origin::File(_) => true,
-            Origin::Extended(_) => true,
-        });
-        Self { origin: filtered.next().unwrap(), more_provenance: filtered.collect() }
-    }
-
-    #[inline]
-    pub fn try_from_iter<I>(it: I) -> Option<Self>
-    where
-        I: IntoIterator<Item = Origin>,
-    {
-        let mut it = it.into_iter();
-        let provenance = it.next()?;
-        let more_provenance = it.collect();
-        Some(Self::new(provenance, more_provenance))
-    }
-
-    #[inline]
-    pub fn first(&self) -> &Origin {
-        &self.origin
-    }
-
-    #[allow(clippy::len_without_is_empty)]
-    #[inline]
-    pub fn len(&self) -> usize {
-        1 + self.more_provenance.len()
-    }
-
-    #[inline]
-    pub fn iter(&self) -> impl Iterator<Item = &Origin> {
-        std::iter::once(&self.origin).chain(&self.more_provenance)
-    }
-}
-impl IntoIterator for OriginSet {
-    type IntoIter =
-        std::iter::Chain<std::iter::Once<Origin>, <Vec<Origin> as IntoIterator>::IntoIter>;
-    type Item = Origin;
-
-    #[inline]
-    fn into_iter(self) -> Self::IntoIter {
-        std::iter::once(self.origin)
-            // turn the SmallVec into a Vec, then into_iter()
-            .chain(self.more_provenance.into_vec().into_iter())
-    }
-}
-impl From<Origin> for OriginSet {
-    fn from(p: Origin) -> Self {
-        Self::single(p)
-    }
-}
diff --git a/src/rule_loader.rs b/src/rule_loader.rs
index 447b643..43375af 100644
--- a/src/rule_loader.rs
+++ b/src/rule_loader.rs
@@ -115,6 +115,12 @@ impl LoadedRules {
         self.id_to_rule.values()
     }
 
+    /// Get a reference to the underlying rule map (rule ID -> Rule).
+    #[inline]
+    pub fn id_to_rule(&self) -> &BTreeMap<String, Rule> {
+        &self.id_to_rule
+    }
+
     pub fn resolve_enabled_rules(&self) -> Result<Vec<&Rule>> {
         let resolved_rules = match &self.enabled_rule_ids {
             // No selectors ⇒ every rule is enabled
diff --git a/src/rules.rs b/src/rules.rs
index d0484e7..68514dc 100644
--- a/src/rules.rs
+++ b/src/rules.rs
@@ -1,244 +1,17 @@
-use anyhow::{bail, Context, Result};
-use ignore::{types::TypesBuilder, WalkBuilder};
-use serde::Deserialize;
-use thiserror::Error;
-use tracing::{debug, debug_span, error};
+//! Rule definitions for secret detection.
+//!
+//! This module re-exports types from [`kingfisher_rules`].
 
-pub mod rule;
-use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
-
-use rule::{Confidence, RuleSyntax, Validation};
-use serde::de::DeserializeOwned;
-
-#[derive(Debug, Error)]
-pub enum RulesError {
-    #[error("Failed to parse YAML file at path: {0}")]
-    YamlParseError(String),
-
-    #[error("Invalid input: {0} is neither a file nor a directory")]
-    InvalidInputError(String),
-
-    #[error("File system error: {0}")]
-    FileSystemError(#[from] std::io::Error),
-
-    #[error("Error building YAML types: {0}")]
-    YamlTypesBuildError(String),
-
-    #[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")]
-    InvalidResponseMatcherVariant(String, usize, usize),
-
-    #[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")]
-    MissingResponseMatcher { path: String, rule_id: String },
+// Re-export the rule module
+pub mod rule {
+    pub use kingfisher_rules::rule::*;
 }
 
-#[derive(Clone, Default)]
-pub struct Rules {
-    pub rules: BTreeMap<String, RuleSyntax>,
-}
-
-#[derive(Deserialize)]
-struct RawRules {
-    rules: Vec<RuleSyntax>,
-}
-
-impl Rules {
-    pub fn new() -> Self {
-        Self { rules: BTreeMap::new() }
-    }
-
-    pub fn update(&mut self, other: Rules) {
-        self.rules.extend(other.rules);
-    }
-
-    pub fn from_paths_and_contents<'a, I: IntoIterator<Item = (&'a Path, &'a [u8])>>(
-        iterable: I,
-        confidence: Confidence,
-    ) -> Result<Self> {
-        let mut rules = Self::new();
-        for (path, contents) in iterable {
-            match serde_yaml::from_slice::<RawRules>(contents) {
-                Ok(rs) => {
-                    for rule_syntax in rs.rules {
-                        if !rule_syntax.confidence.is_at_least(&confidence) {
-                            continue;
-                        }
-                        if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
-                            if http_val
-                                .request
-                                .response_matcher
-                                .as_ref()
-                                .map_or(true, |m| m.is_empty())
-                            {
-                                bail!(RulesError::MissingResponseMatcher {
-                                    path: path.display().to_string(),
-                                    rule_id: rule_syntax.id.clone(),
-                                });
-                            }
-                        }
-                        rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
-                    }
-                }
-                Err(e) => {
-                    if let Some(location) = e.location() {
-                        error!(
-                            "Failed to parse rules YAML from {}: {}, at line: {}, column: {}",
-                            path.display(),
-                            e,
-                            location.line(),
-                            location.column()
-                        );
-                        bail!(RulesError::InvalidResponseMatcherVariant(
-                            path.display().to_string(),
-                            location.line(),
-                            location.column(),
-                        ));
-                    } else {
-                        error!("Failed to parse rules YAML from {}: {}", path.display(), e);
-                        bail!(RulesError::YamlParseError(format!(
-                            "Failed to load rules YAML from {}: {}",
-                            path.display(),
-                            e
-                        )));
-                    }
-                }
-            }
-        }
-        Ok(rules)
-    }
-
-    pub fn from_paths<P: AsRef<Path>, I: IntoIterator<Item = P>>(
-        paths: I,
-        confidence: Confidence,
-    ) -> Result<Self> {
-        let mut num_paths = 0;
-        let mut rules = Rules::new();
-        for input in paths {
-            num_paths += 1;
-            let input = input.as_ref();
-            if input.is_file() {
-                rules.update(Rules::from_yaml_file(input, confidence)?);
-            } else if input.is_dir() {
-                rules.update(Rules::from_directory(input, confidence)?);
-            } else {
-                error!("Invalid input type: {} is neither a file nor a directory", input.display());
-                bail!(RulesError::InvalidInputError(input.display().to_string()));
-            }
-        }
-        debug!("Loaded {} rules from {} paths", rules.num_rules(), num_paths);
-        Ok(rules)
-    }
-
-    pub fn from_yaml_file<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
-        let path = path.as_ref();
-        let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered();
-        match load_yaml_file::<RawRules, _>(path) {
-            Ok(rs) => {
-                let mut rules = Rules::new();
-                for rule_syntax in rs.rules {
-                    if !rule_syntax.confidence.is_at_least(&confidence) {
-                        continue;
-                    }
-                    if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
-                        if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty())
-                        {
-                            bail!(RulesError::MissingResponseMatcher {
-                                path: path.display().to_string(),
-                                rule_id: rule_syntax.id.clone(),
-                            });
-                        }
-                    }
-                    rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
-                }
-                debug!("Loaded {} rules from {}", rules.num_rules(), path.display());
-                Ok(rules)
-            }
-            Err(e) => {
-                error!("Failed to load rules YAML from {}: {}", path.display(), e);
-                bail!(RulesError::YamlParseError(format!(
-                    "Failed to load rules YAML from {}: {}",
-                    path.display(),
-                    e
-                )))
-            }
-        }
-    }
-
-    pub fn from_yaml_files<P: AsRef<Path>, I: IntoIterator<Item = P>>(
-        paths: I,
-        confidence: Confidence,
-    ) -> Result<Self> {
-        let mut num_paths = 0;
-        let mut rules = Rules::new();
-        for path in paths {
-            num_paths += 1;
-            rules.update(Rules::from_yaml_file(path.as_ref(), confidence)?);
-        }
-        debug!("Loaded {} rules from {} YAML files", rules.num_rules(), num_paths);
-        Ok(rules)
-    }
-
-    pub fn from_directory<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
-        let path = path.as_ref();
-        let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered();
-        let yaml_types =
-            TypesBuilder::new().add_defaults().select("yaml").build().map_err(|e| {
-                error!("Failed to build YAML types: {}", e);
-                RulesError::YamlTypesBuildError(e.to_string())
-            })?;
-        let walker = WalkBuilder::new(path)
-            .types(yaml_types)
-            .follow_links(true)
-            .standard_filters(false)
-            .build();
-        let mut yaml_files = Vec::new();
-        for entry in walker {
-            match entry {
-                Ok(entry) => {
-                    if entry.file_type().map_or(false, |t| !t.is_dir()) {
-                        yaml_files.push(entry.into_path());
-                    }
-                }
-                Err(e) => {
-                    debug!("Failed to read directory entry: {}", e);
-                }
-            }
-        }
-        yaml_files.sort();
-        debug!("Found {} YAML files in {}", yaml_files.len(), path.display());
-        Self::from_yaml_files(&yaml_files, confidence)
-    }
-
-    #[inline]
-    pub fn num_rules(&self) -> usize {
-        self.rules.len()
-    }
-
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.rules.is_empty()
-    }
-
-    #[inline]
-    pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> {
-        self.rules.values()
-    }
-}
-
-impl IntoIterator for Rules {
-    type Item = RuleSyntax;
-    type IntoIter = std::collections::btree_map::IntoValues<String, RuleSyntax>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.rules.into_values()
-    }
-}
-
-pub fn load_yaml_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
-    let path = path.as_ref();
-    let file = File::open(path)
-        .with_context(|| format!("Failed to open YAML file: {}", path.display()))?;
-    let reader = BufReader::new(file);
-    let data = serde_yaml::from_reader(reader)
-        .with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?;
-    Ok(data)
-}
+// Re-export everything from the rules module
+pub use kingfisher_rules::rules::{Rules, RulesError};
+pub use kingfisher_rules::{
+    ChecksumActual, ChecksumRequirement, Confidence, DependsOnRule, HttpRequest, HttpValidation,
+    MultipartConfig, MultipartPart, PatternRequirementContext, PatternRequirements,
+    PatternValidationResult, ReportResponseData, ResponseMatcher, Rule, RuleSyntax, Validation,
+    RULE_COMMENTS_PATTERN,
+};
diff --git a/src/rules/lib.rs b/src/rules/lib.rs
deleted file mode 100644
index 302717f..0000000
--- a/src/rules/lib.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-//! This module re-exports the public API from submodules for use by external crates.
-//! It also contains tests to verify behavior and demonstrate property-based testing.
-
-pub mod rule;
-mod rules;
-pub use rule::Confidence;
-mod util;
-pub use rule::{
-    DependsOnRule, HttpRequest, HttpValidation, ResponseMatcher, Rule, RuleSyntax, Validation,
-};
-pub use rules::Rules;
-
-#[cfg(test)]
-mod tests {
-    use pretty_assertions::assert_eq;
-    use proptest::prelude::*;
-
-    // Property-based test that generates strings matching the secret key pattern.
-    // This ensures that the regex for detecting keys generates valid secret strings.
-    proptest! {
-        #[test]
-        fn test_regex_generation(s in r"((?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16})") {
-            println!("{}", s);
-        }
-    }
-
-    // A simple test that is expected to fail.
-    #[test]
-    #[should_panic(expected = "assertion failed")]
-    fn test_failure() {
-        assert_eq!(5, 42);
-    }
-}
diff --git a/src/rules/util.rs b/src/rules/util.rs
deleted file mode 100644
index 6b4e61b..0000000
--- a/src/rules/util.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-use std::{fs::File, io::BufReader, path::Path};
-
-use anyhow::{Context, Result};
-use serde::de::DeserializeOwned;
-
-/// Loads and deserializes a YAML file into a value of type `T`.
-pub fn load_yaml_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
-    let path = path.as_ref();
-    let file = File::open(path)
-        .with_context(|| format!("Failed to open YAML file: {}", path.display()))?;
-    let reader = BufReader::new(file);
-    let data = serde_yaml::from_reader(reader)
-        .with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?;
-    Ok(data)
-}
diff --git a/src/rules_database.rs b/src/rules_database.rs
index 81cfd1c..e2f7122 100644
--- a/src/rules_database.rs
+++ b/src/rules_database.rs
@@ -1,240 +1,5 @@
-use std::{sync::Arc, time::Instant};
+//! Compiled rules database for pattern matching.
+//!
+//! This module re-exports types from [`kingfisher_rules::rules_database`].
 
-use anyhow::{anyhow, bail, Result};
-use regex::bytes::Regex;
-use tracing::{debug, debug_span, error};
-use vectorscan_rs::{BlockDatabase, Flag, Pattern};
-
-use crate::rules::rule::{Rule, RULE_COMMENTS_PATTERN};
-
-pub struct RulesDatabase {
-    // pub(crate) rules: Vec<Rule,>,
-    pub(crate) rules: Vec<Arc<Rule>>,
-    pub(crate) anchored_regexes: Vec<Regex>,
-    pub(crate) vsdb: BlockDatabase,
-}
-
-pub fn format_regex_pattern(pattern: &str) -> String {
-    // Remove comments and whitespace while preserving the regex pattern
-    let no_comment_pattern = RULE_COMMENTS_PATTERN.replace_all(pattern, "");
-    // flattens multi-line regex into a single line
-    no_comment_pattern
-        .lines()
-        .map(|line| line.trim())
-        .filter(|line| !line.is_empty())
-        .collect::<Vec<&str>>()
-        .join("")
-}
-
-impl RulesDatabase {
-    pub fn get_regex_by_rule_id(&self, rule_id: &str) -> Option<&Regex> {
-        self.rules
-            .iter()
-            .position(|r| r.syntax().id == rule_id)
-            .and_then(|index| self.anchored_regexes.get(index))
-    }
-
-    pub fn get_rule_by_finding_fingerprint(&self, finding_fingerprint: &str) -> Option<Arc<Rule>> {
-        self.rules.iter().find(|r| r.finding_sha1_fingerprint() == finding_fingerprint).cloned()
-    }
-
-    pub fn get_rule_by_text_id(&self, text_id: &str) -> Option<Arc<Rule>> {
-        self.rules.iter().find(|r| r.id() == text_id).cloned()
-    }
-
-    pub fn get_rule_by_name(&self, name: &str) -> Option<Arc<Rule>> {
-        self.rules.iter().find(|r| r.name() == name).cloned()
-    }
-
-    pub fn from_rules(rules: Vec<Rule>) -> Result<Self> {
-        let rules: Vec<Arc<Rule>> = rules.into_iter().map(Arc::new).collect();
-        let _span = debug_span!("RulesDatabase::from_rules").entered();
-        if rules.is_empty() {
-            bail!("No rules to compile");
-        }
-        let patterns: Vec<Pattern> = rules
-            .iter()
-            .enumerate()
-            .map(|(id, rule)| {
-                Pattern::new(
-                    rule.syntax().pattern.clone().into_bytes(),
-                    Flag::default(),
-                    Some(id.try_into().unwrap()),
-                )
-            })
-            .collect();
-        let t1 = Instant::now();
-        match BlockDatabase::new(patterns) {
-            Ok(vsdb) => {
-                let d1 = t1.elapsed().as_secs_f64();
-                let (anchored_regexes, d2) = Self::compile_regexes(&rules)?;
-                debug!("Compiled {} rules: vectorscan {}s; regex {}s", rules.len(), d1, d2);
-                Ok(RulesDatabase { rules, vsdb, anchored_regexes })
-            }
-            Err(e) => {
-                error!(
-                    "Failed to create BlockDatabase: {}. Attempting to compile rules individually.",
-                    e
-                );
-                Self::compile_rules_individually(rules)
-                    .map_err(|err| anyhow!("Failed to compile rules: {}\n{}", e, err))
-            }
-        }
-    }
-
-    fn compile_rules_individually(rules: Vec<Arc<Rule>>) -> Result<Self> {
-        // NOTE: This function only used when attempting to determine which rule failed
-        // to compile
-        let mut compiled_rules = Vec::new();
-        let mut compiled_patterns = Vec::new();
-        let mut compiled_regexes = Vec::new();
-        let mut error_messages = Vec::new();
-        for (id, rule) in rules.into_iter().enumerate() {
-            let pattern = Pattern::new(
-                rule.syntax().pattern.clone().into_bytes(),
-                Flag::default(),
-                Some(id.try_into().unwrap()),
-            );
-            match BlockDatabase::new(vec![pattern]) {
-                Ok(_) => {
-                    // Recreate the pattern for the final compilation
-                    let final_pattern = Pattern::new(
-                        rule.syntax().pattern.clone().into_bytes(),
-                        Flag::default(),
-                        Some(id.try_into().unwrap()),
-                    );
-                    compiled_patterns.push(final_pattern);
-                    match rule.syntax().as_regex() {
-                        Ok(regex) => {
-                            compiled_regexes.push(regex);
-                            compiled_rules.push(rule);
-                        }
-                        Err(e) => {
-                            error_messages.push(format!(
-                                "Failed to compile Regex for rule '{}' (ID: {}): {}",
-                                rule.name(),
-                                rule.id(),
-                                e
-                            ));
-                        }
-                    }
-                }
-                Err(e) => {
-                    error_messages.push(format!(
-                        "Failed to compile vectorscan pattern for rule '{}' (ID: {}): {}",
-                        rule.name(),
-                        rule.id(),
-                        e
-                    ));
-                }
-            }
-        }
-        if !error_messages.is_empty() {
-            error!(
-                "Errors occurred while compiling rules individually:\n{}",
-                error_messages.join("\n")
-            );
-            bail!("Failed to compile the following rules:\n{}", error_messages.join("\n"));
-        }
-        let vsdb = BlockDatabase::new(compiled_patterns)?;
-        Ok(RulesDatabase { rules: compiled_rules, vsdb, anchored_regexes: compiled_regexes })
-    }
-
-    fn compile_regexes(rules: &[Arc<Rule>]) -> Result<(Vec<Regex>, f64)> {
-        // fn compile_regexes(rules: &[Rule],) -> Result<(Vec<Regex,>, f64,),> {
-        let t2 = Instant::now();
-        let mut anchored_regexes = Vec::with_capacity(rules.len());
-        for rule in rules {
-            match rule.syntax().as_regex() {
-                Ok(regex) => anchored_regexes.push(regex),
-                Err(e) => {
-                    error!(
-                        "Failed to compile Regex for rule '{}' (ID: {}): {}",
-                        rule.name(),
-                        rule.id(),
-                        e
-                    );
-                    return Err(anyhow!(
-                        "Failed to compile Regex for rule '{}' (ID: {}): {}",
-                        rule.name(),
-                        rule.id(),
-                        e
-                    ));
-                }
-            }
-        }
-        let d2 = t2.elapsed().as_secs_f64();
-        Ok((anchored_regexes, d2))
-    }
-
-    #[inline]
-    pub fn num_rules(&self) -> usize {
-        self.rules.len()
-    }
-
-    #[inline]
-    pub fn get_rule(&self, index: usize) -> Option<Arc<Rule>> {
-        self.rules.get(index).cloned()
-    }
-
-    pub fn rules(&self) -> &[Arc<Rule>] {
-        &self.rules
-    }
-}
-#[cfg(test)]
-mod test_vectorscan {
-    use pretty_assertions::assert_eq;
-
-    use super::*;
-    #[test]
-    pub fn test_vectorscan_sanity() -> Result<()> {
-        use vectorscan_rs::{BlockDatabase, BlockScanner, Pattern, Scan};
-        let input = b"some test data for vectorscan";
-        let pattern = Pattern::new(b"test".to_vec(), Flag::CASELESS | Flag::SOM_LEFTMOST, None);
-        let db: BlockDatabase = BlockDatabase::new(vec![pattern])?;
-        let mut scanner = BlockScanner::new(&db)?;
-        let mut matches: Vec<(u64, u64)> = vec![];
-        scanner.scan(input, |id: u32, from: u64, to: u64, _flags: u32| {
-            println!("found pattern #{} @ [{}, {})", id, from, to);
-            matches.push((from, to));
-            Scan::Continue
-        })?;
-        assert_eq!(matches, vec![(5, 9)]);
-        Ok(())
-    }
-}
-#[cfg(test)]
-#[cfg(test)]
-mod test_regex_cleaning {
-    use super::*;
-    #[test]
-    fn test_format_regex_pattern() {
-        let input = r#"(?x)
-            (?i)
-            (?:
-              \\b
-              (?:AWS|AMAZON|AMZN|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)
-              (?:\\.|[\\n\\r]){0,32}?  (?# THIS IS A COMMENTCOMMENTCOMMENTCOMMENTCOMMENTCOMMENTCOMMENT)
-              (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) # THIS IS A COMMENT THAT SHOULD NOT BE USED BUT MIGHT BE
-              (?:\\.|[\\n\\r]){0,32}?
-              \\b
-              (
-                [A-Za-z0-9/+=]{40}
-              )
-              \\b
-            |
-              \\b
-              (?:SECRET|PRIVATE|ACCESS)
-              (?:\\.|[\\n\\r]){0,16}?
-              (?:KEY|TOKEN)
-              (?:\\.|[\\n\\r]){0,32}?
-              \\b
-              (
-                [A-Za-z0-9/+=]{40}
-              )
-              \\b
-            )"#;
-        let data = format_regex_pattern(input);
-        println!("{}", data);
-    }
-}
+pub use kingfisher_rules::rules_database::{format_regex_pattern, RulesDatabase};
diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs
index bb9b1e0..9a8ebff 100644
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@@ -162,7 +162,7 @@ pub fn enumerate_filesystem_inputs(
     let t1 = Instant::now();
     let num_blob_processors = Mutex::new(0u64);
     let seen_blobs = BlobIdMap::new();
-    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
 
     let matcher = Matcher::new(
         &rules_db,
diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs
index d8f1f1f..9076375 100644
--- a/src/scanner/repos.rs
+++ b/src/scanner/repos.rs
@@ -823,7 +823,7 @@ pub async fn fetch_s3_objects(
     let role_arn = args.input_specifier_args.role_arn.as_deref();
     let profile = args.input_specifier_args.aws_local_profile.as_deref();
 
-    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
     let seen_blobs = BlobIdMap::new();
     let matcher = Matcher::new(
         rules_db,
@@ -905,7 +905,7 @@ pub async fn fetch_gcs_objects(
     let prefix = args.input_specifier_args.gcs_prefix.as_deref();
     let service_account = args.input_specifier_args.gcs_service_account.as_deref();
 
-    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone())));
+    let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vectorscan_db().clone())));
     let seen_blobs = BlobIdMap::new();
     let matcher = Matcher::new(
         rules_db,
diff --git a/src/validation.rs b/src/validation.rs
index ec34fc5..2e431ff 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -25,16 +25,16 @@ use crate::{
     validation_body::{self, ValidationResponseBody},
 };
 
-mod aws;
-mod azure;
-mod coinbase;
+pub mod aws;
+pub mod azure;
+pub mod coinbase;
 pub mod gcp;
-mod httpvalidation;
-mod jdbc;
-mod jwt;
-mod mongodb;
-mod mysql;
-mod postgres;
+pub mod httpvalidation;
+pub mod jdbc;
+pub mod jwt;
+pub mod mongodb;
+pub mod mysql;
+pub mod postgres;
 pub use mysql::validate_mysql;
 pub use postgres::validate_postgres;
 pub mod utils;