From 0ab21ace9906ec4253c7b753ff6a4885cf4f61e0 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Sat, 12 Jul 2025 22:44:34 -0700
Subject: [PATCH 1/8] improved azure storage rule. Added rule to detect
 TravisCI encrypted values

---
 CHANGELOG.md                  |  3 +++
 Cargo.toml                    |  2 +-
 data/rules/azurestorage.yml   | 16 +++++++++++-----
 data/rules/travisci.yml       | 21 +++++++++++++++++++++
 src/reporter/pretty_format.rs |  4 +++-
 5 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b14fae..ebfc95f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 All notable changes to this project will be documented in this file.
 
+## [1.21.0]
+- Improved Azure Storage rule
+- Added rule to detect TravisCI encrypted values
 
 ## [1.20.0]
 - Removed confirmation prompt when user provides --self-update flag
diff --git a/Cargo.toml b/Cargo.toml
index 19c6f95..179f285 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,7 +10,7 @@ publish = false
 
 [package]
 name = "kingfisher"
-version = "1.20.0"
+version = "1.21.0"
 edition.workspace = true
 rust-version.workspace = true
 license.workspace = true
diff --git a/data/rules/azurestorage.yml b/data/rules/azurestorage.yml
index 8f2921f..01dad46 100644
--- a/data/rules/azurestorage.yml
+++ b/data/rules/azurestorage.yml
@@ -1,9 +1,12 @@
 rules:
   - name: Azure Storage Account Name
-    id: kingfisher.azurestorage.name.1
+    id: kingfisher.azurestorage.1
     pattern: |
       (?xi)
       (?:
+        \b
+        azure
+        (?:.|[\n\r]){0,32}?
         (?i:
           (?:Account|Storage)
           (?:[._-]Account)?
@@ -19,13 +22,16 @@ rules:
     visible: false
     confidence: medium
     examples:
-      - storage_name=mystorageaccount123
+      - azure_storage_name=mystorageaccount123
       - mystorageaccount.blob.core.windows.net
 
   - name: Azure Storage Account Key
-    id: kingfisher.azurestorage.key.1
+    id: kingfisher.azurestorage.2
     pattern: |
       (?xi)
+      \b
+      azure
+      (?:.|[\n\r]){0,32}?
       (?i:(?:Access|Account|Storage)[_.-]?Key)
       (?:.|[\n\r]){0,25}?
       (
@@ -34,9 +40,9 @@ rules:
     min_entropy: 4.0
     confidence: medium
     examples:
-      - AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\
+      - Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\
     validation:
       type: AzureStorage
     depends_on_rule:
-      - rule_id: kingfisher.azurestorage.name.1
+      - rule_id: kingfisher.azurestorage.1
         variable: AZURENAME
diff --git a/data/rules/travisci.yml b/data/rules/travisci.yml
index 3461fbb..5a61c0a 100644
--- a/data/rules/travisci.yml
+++ b/data/rules/travisci.yml
@@ -31,3 +31,24 @@ rules:
               - report_response: true
               - type: StatusMatch
                 status: [200]
+  - name: Travis CI Encrypted Variable
+    id: kingfisher.travisci.1
+    pattern: |
+      (?xis)
+      \b
+      (?:language|env|deploy|script):[\r\n]
+      (?:.|[\\n\r]){0,256}?
+      \b
+      (
+        secure:\s*"?[A-Za-z0-9+/=\\]+"?\s*
+      )
+      \b
+    min_entropy: 3.0
+    confidence: medium
+    examples:
+      - | 
+        env:
+          global:
+            # This sets FOO=super-secret, but the plaintext never appears here.
+            - secure: "VJh0l9gOb+6AVNDk6cziZSs1AqVM8CqtZU6ot9ZQeJ+KfL1pxnGQ4qQF8Cz9\M1q85c3l1N1+qkQ0uV12QG6O6ylq6Qq1l3VjAJM3h2pY3jdmrA8kX2ZIxRjC/\8+Xj1wVtKQ0R+owM/6i5Y6cyx4hRb3VvSeYlC0lD1iTzQ2vgMyE="
+
diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs
index 740f82a..0e78477 100644
--- a/src/reporter/pretty_format.rs
+++ b/src/reporter/pretty_format.rs
@@ -163,7 +163,9 @@ impl<'a> Display for PrettyFinding<'a> {
         writeln!(f, " |Fingerprint...: {}", rm.m.finding_fingerprint)?;
         writeln!(f, " |Confidence....: {}", rm.match_confidence.to_string())?;
         writeln!(f, " |Entropy.......: {:.2}", rm.m.calculated_entropy)?;
-        let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
+        let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16()
+            || rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16()
+        {
             "Not Attempted".to_string()
         } else if rm.validation_success {
             "Active Credential".to_string()

From 3520c5fba53b0c404b3c262d04143812cb5ec2bb Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 13:18:24 -0700
Subject: [PATCH 2/8] Added baseline feature with --baseline-file and
 --manage-baseline flags. Introduced --exclude option for skipping paths

---
 CHANGELOG.md                  |   4 ++
 Cargo.toml                    |   1 +
 README.md                     |  44 +++++++++++++
 docs/BASELINE.md              |  55 ++++++++++++++++
 src/baseline.rs               | 115 ++++++++++++++++++++++++++++++++++
 src/cli/commands/inputs.rs    |  10 ++-
 src/cli/commands/scan.rs      |   8 +++
 src/lib.rs                    |  25 ++++++++
 src/main.rs                   |  19 +++++-
 src/reporter/json_format.rs   |   5 +-
 src/reporter/pretty_format.rs |   4 +-
 src/scanner/enumerate.rs      |   8 +--
 src/scanner/runner.rs         |  10 +++
 tests/int_dedup.rs            |   4 +-
 tests/int_github.rs           |   4 +-
 tests/int_gitlab.rs           |   4 +-
 tests/int_validation_cache.rs |   4 +-
 tests/int_vulnerable_files.rs |   8 ++-
 tests/smoke_baseline.rs       |  56 +++++++++++++++++
 tests/smoke_exclude.rs        |  34 ++++++++++
 20 files changed, 401 insertions(+), 21 deletions(-)
 create mode 100644 docs/BASELINE.md
 create mode 100644 src/baseline.rs
 create mode 100644 tests/smoke_baseline.rs
 create mode 100644 tests/smoke_exclude.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebfc95f..2e1179b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,10 @@ All notable changes to this project will be documented in this file.
 ## [1.21.0]
 - Improved Azure Storage rule
 - Added rule to detect TravisCI encrypted values
+- Added baseline feature with `--baseline-file` and `--manage-baseline` flags
+- Introduced `--exclude` option for skipping paths
+- Added tests covering baseline and exclude workflow
+
 
 ## [1.20.0]
 - Removed confirmation prompt when user provides --self-update flag
diff --git a/Cargo.toml b/Cargo.toml
index 179f285..dc2db13 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -161,6 +161,7 @@ trust-dns-resolver   = { version = "0.23.2", default-features = false, features
 atty = "0.2.14"
 self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] }
 semver = "1.0.26"
+globset = "0.4.16"
 
 [dependencies.tikv-jemallocator]
 version = "0.6"
diff --git a/README.md b/README.md
index 40bbce6..d7ae8b6 100644
--- a/README.md
+++ b/README.md
@@ -145,6 +145,24 @@ kingfisher scan /path/to/repo --rule-stats
 kingfisher scan ./my-project --ignore-tests
 ```
 
+### Exclude specific paths
+```bash
+# Skip all Python files and any directory named tests
+kingfisher scan ./my-project \
+  --exclude '*.py' \
+  --exclude tests
+```
+
+If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via --exclude):
+
+```bash
+# Skip all Python files and any directory named tests, and report to stderr any skipped files
+kingfisher scan ./my-project \
+  --exclude '*.py' \
+  --exclude tests \
+  -v
+```
+
 ---
 
 ## Scanning GitHub
@@ -287,6 +305,32 @@ kingfisher github repos list --organization my-org
 - `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
 - `--redact`: Replaces discovered secrets with a one-way hash for secure output
 - `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
+- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax)
+- `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
+- `--manage-baseline`: Create or update the baseline file with current findings
+
+## Build a Baseline / Detect New Secrets
+
+There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list.
+
+The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches):
+
+```bash
+kingfisher scan /path/to/code \
+  --confidence low \
+  --manage-baseline \
+  --baseline-file ./baseline-file.yml
+```
+
+Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings:
+
+```bash
+kingfisher scan /path/to/code \
+  --baseline-file /path/to/baseline-file.yaml
+```
+
+See ([docs/BASELINE.md](docs/BASELINE.md)) for full detail.
+
 
 ## Finding Fingerprint
 
diff --git a/docs/BASELINE.md b/docs/BASELINE.md
new file mode 100644
index 0000000..aef0274
--- /dev/null
+++ b/docs/BASELINE.md
@@ -0,0 +1,55 @@
+
+# Build a Baseline / Detect Only New Secrets
+
+There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list.
+
+The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches):
+
+```bash
+kingfisher scan /path/to/code \
+  --confidence low \
+  --manage-baseline \
+  --baseline-file ./baseline-file.yml
+```
+
+This generates a YAML file named `baseline-file.yml` in the current directory. The file tracks each finding under an `ExactFindings` section:
+
+```yaml
+ExactFindings:
+  matches:
+  - filepath: ruby_vulnerable.rb/
+    fingerprint: 056876f00ffd0622
+    linenum: 52
+    lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
+  - filepath: ruby_vulnerable.rb/
+    fingerprint: ce41d19b83b2b1b0
+    linenum: 53
+    lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
+  - filepath: ruby_vulnerable.rb/
+    fingerprint: e8644d91fa6654f5
+    linenum: 40
+    lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
+```
+
+`fingerprint` reuses Kingfisher's 64-bit *finding fingerprint* algorithm with offsets set to zero. It hashes the secret value together with the normalized filepath, so moving a secret around does not create a new entry.
+
+Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings:
+
+```bash
+kingfisher scan /path/to/code \
+  --baseline-file /path/to/baseline-file.yaml
+```
+
+If you intentionally add a new secret that should be ignored later, rerun the scan with both `--manage-baseline` and `--baseline-file` to append new matches to the file without removing existing entries:
+
+```bash
+kingfisher scan /path/to/code \
+  --manage-baseline \
+  --baseline-file /path/to/baseline-file.yml
+```
+
+If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via `--exclude`):
+
+```bash
+kingfisher scan /path/to/project -v
+```
\ No newline at end of file
diff --git a/src/baseline.rs b/src/baseline.rs
new file mode 100644
index 0000000..c2588f2
--- /dev/null
+++ b/src/baseline.rs
@@ -0,0 +1,115 @@
+use std::{
+    collections::HashSet,
+    fs,
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+
+use anyhow::{Context, Result};
+use chrono::Local;
+use serde::{Deserialize, Serialize};
+use tracing::debug;
+
+use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint};
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct BaselineFile {
+    #[serde(rename = "ExactFindings", default)]
+    pub exact_findings: ExactFindings,
+}
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct ExactFindings {
+    #[serde(default)]
+    pub matches: Vec<BaselineFinding>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct BaselineFinding {
+    pub filepath: String,
+    pub fingerprint: String,
+    pub linenum: usize,
+    pub lastupdated: String,
+}
+
+pub fn load_baseline(path: &Path) -> Result<BaselineFile> {
+    let data = fs::read_to_string(path).context("read baseline file")?;
+    Ok(serde_yaml::from_str(&data).context("parse baseline yaml")?)
+}
+
+pub fn save_baseline(path: &Path, baseline: &BaselineFile) -> Result<()> {
+    let data = serde_yaml::to_string(baseline).context("serialize baseline")?;
+    fs::write(path, data).context("write baseline file")
+}
+
+fn normalize_path(p: &Path, roots: &[PathBuf]) -> String {
+    for root in roots {
+        if let Ok(stripped) = p.strip_prefix(root) {
+            if let Some(name) = root.file_name() {
+                return PathBuf::from(name).join(stripped).to_string_lossy().into();
+            }
+        }
+    }
+    p.to_string_lossy().into()
+}
+
+fn compute_hash(secret: &str, path: &str) -> String {
+    let fp = compute_finding_fingerprint(secret, path, 0, 0);
+    format!("{:016x}", fp)
+}
+
+fn extract_secret(m: &crate::matcher::Match) -> String {
+    m.groups
+        .captures
+        .get(1)
+        .or_else(|| m.groups.captures.get(0))
+        .map(|c| c.value.to_string())
+        .unwrap_or_default()
+}
+
+pub fn apply_baseline(
+    store: &mut FindingsStore,
+    baseline_path: &Path,
+    manage: bool,
+    roots: &[PathBuf],
+) -> Result<()> {
+    let mut baseline = if baseline_path.exists() {
+        load_baseline(baseline_path)?
+    } else {
+        BaselineFile::default()
+    };
+
+    let mut known: HashSet<String> =
+        baseline.exact_findings.matches.iter().map(|m| m.fingerprint.clone()).collect();
+
+    let mut new_entries = Vec::new();
+    for arc_msg in store.get_matches_mut() {
+        let (origin, _blob, m) = Arc::make_mut(arc_msg);
+        let file_path = origin.iter().filter_map(|o| o.full_path()).next();
+        if let Some(fp) = file_path {
+            let normalized = normalize_path(&fp, roots);
+            let secret = extract_secret(m);
+            let hash = compute_hash(&secret, &normalized);
+            if known.contains(&hash) {
+                debug!("Skipping {} due to baseline (hash {})", normalized, hash);
+                m.visible = false;
+            } else if manage {
+                known.insert(hash.clone());
+                let entry = BaselineFinding {
+                    filepath: normalized,
+                    fingerprint: hash,
+                    linenum: m.location.source_span.start.line,
+                    lastupdated: Local::now().to_rfc2822(),
+                };
+                new_entries.push(entry);
+            }
+        }
+    }
+
+    if manage && !new_entries.is_empty() {
+        baseline.exact_findings.matches.extend(new_entries);
+        save_baseline(baseline_path, &baseline)?;
+    }
+
+    Ok(())
+}
diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs
index acfc220..20d3dde 100644
--- a/src/cli/commands/inputs.rs
+++ b/src/cli/commands/inputs.rs
@@ -111,9 +111,13 @@ pub struct ContentFilteringArgs {
     #[arg(long("max-file-size"), default_value_t = 25.0)]
     pub max_file_size_mb: f64,
 
-    /// Use custom path-based ignore rules from the given file(s)
-    #[arg(long, short, value_hint = ValueHint::FilePath)]
-    pub ignore: Vec<PathBuf>,
+    // /// Use custom path-based ignore rules from the given file(s)
+    // #[arg(long, short, value_hint = ValueHint::FilePath)]
+    // pub ignore: Vec<PathBuf>,
+    /// Skip any file or directory whose path matches this glob pattern. Multiple
+    /// patterns may be provided by repeating the flag.
+    #[arg(long, value_name = "PATTERN")]
+    pub exclude: Vec<String>,
 
     /// If true, do NOT extract archive files
     #[arg(long("no-extract-archives"), default_value_t = false)]
diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs
index 36726e0..406c896 100644
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@@ -106,6 +106,14 @@ pub struct ScanArgs {
     /// Bytes of context before and after each match
     #[arg(long, default_value_t = 256, value_name = "BYTES")]
     pub snippet_length: usize,
+
+    /// Baseline file to filter known secrets
+    #[arg(long, value_name = "FILE")]
+    pub baseline_file: Option<std::path::PathBuf>,
+
+    /// Create or update the baseline file with current findings
+    #[arg(long, default_value_t = false)]
+    pub manage_baseline: bool,
 }
 
 /// Confidence levels for findings
diff --git a/src/lib.rs b/src/lib.rs
index 4395b89..9f04d46 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod baseline;
 pub mod binary;
 pub mod blob;
 pub mod bstring_escape;
@@ -43,6 +44,7 @@ use crossbeam_channel::Sender;
 pub use git_repo_enumerator::{GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator};
 pub use gix::{self, Repository, ThreadSafeRepository};
 use gix::{open::Options, open_opts};
+use globset::{Glob, GlobSet, GlobSetBuilder};
 pub use ignore::gitignore::{Gitignore, GitignoreBuilder};
 use ignore::{DirEntry, WalkBuilder, WalkState};
 use tokio::time::Duration;
@@ -204,6 +206,7 @@ pub struct FilesystemEnumerator {
     extract_archives: bool,
     extraction_depth: usize,
     no_dedup: bool,
+    exclude_globset: Option<std::sync::Arc<GlobSet>>,
 }
 
 impl FilesystemEnumerator {
@@ -234,6 +237,7 @@ impl FilesystemEnumerator {
             extract_archives: !args.content_filtering_args.no_extract_archives,
             extraction_depth: args.content_filtering_args.extraction_depth as usize,
             no_dedup: args.no_dedup,
+            exclude_globset: None,
         })
     }
 
@@ -287,6 +291,27 @@ impl FilesystemEnumerator {
         self
     }
 
+    pub fn set_exclude_patterns(&mut self, patterns: &[String]) -> Result<&mut Self> {
+        if patterns.is_empty() {
+            return Ok(self);
+        }
+        let mut builder = GlobSetBuilder::new();
+        for pat in patterns {
+            builder.add(Glob::new(pat)?);
+        }
+        let globset = std::sync::Arc::new(builder.build()?);
+        self.exclude_globset = Some(globset.clone());
+        self.filter_entry(move |entry| {
+            let path = entry.path();
+            let matches = globset.is_match(path);
+            if matches {
+                debug!("Skipping {} due to --exclude", path.display());
+            }
+            !matches
+        });
+        Ok(self)
+    }
+
     pub fn gitignore(&self) -> Result<Gitignore> {
         Ok(self.gitignore_builder.build()?)
     }
diff --git a/src/main.rs b/src/main.rs
index 280422c..2567187 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -132,7 +132,18 @@ pub fn determine_exit_code(datastore: &Arc<Mutex<findings_store::FindingsStore>>
     // exit with code 0 if there are NO findings discovered
     let ds = datastore.lock().unwrap();
     // Get all matches
-    let all_matches = ds.get_matches();
+    // let all_matches = ds.get_matches();
+
+    // Only consider visible matches when determining the exit code
+    let all_matches = ds
+        .get_matches()
+        .iter()
+        .filter(|msg| {
+            let (_, _, match_item) = &***msg;
+            match_item.visible
+        })
+        .collect::<Vec<_>>();
+
     if all_matches.is_empty() {
         // No findings discovered
         0
@@ -141,7 +152,7 @@ pub fn determine_exit_code(datastore: &Arc<Mutex<findings_store::FindingsStore>>
         let validated_matches = all_matches
             .iter()
             .filter(|msg| {
-                let (_, _, match_item) = &***msg;
+                let (_, _, match_item) = &****msg;
                 match_item.validation_success
             })
             .count();
@@ -278,7 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
             max_file_size_mb: 25.0,
             no_extract_archives: true,
             extraction_depth: 2,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
             no_binary: true,
         },
         confidence: ConfidenceLevel::Medium,
@@ -291,6 +302,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
         no_dedup: false,
         ignore_tests: false,
         snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
         output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
     }
 }
diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
index ec45e83..c1185b2 100644
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@@ -383,6 +383,7 @@ mod tests {
         rules::rule::Confidence,
         util::intern,
     };
+
     fn create_default_args() -> cli::commands::scan::ScanArgs {
         use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope
 
@@ -424,7 +425,7 @@ mod tests {
                 max_file_size_mb: 25.0,
                 no_extract_archives: false,
                 extraction_depth: 2,
-                ignore: Vec::new(),
+                exclude: Vec::new(), // Exclude patterns
                 no_binary: true,
             },
             confidence: ConfidenceLevel::Medium,
@@ -436,6 +437,8 @@ mod tests {
             git_repo_timeout: 1800, // 30 minutes
             output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
             snippet_length: 256,
+            baseline_file: None,
+            manage_baseline: false,
         }
     }
 
diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs
index 0e78477..7a6212c 100644
--- a/src/reporter/pretty_format.rs
+++ b/src/reporter/pretty_format.rs
@@ -351,7 +351,7 @@ fn test_pretty_format_with_nan_entropy_panics() {
             max_file_size_mb: 25.0,
             no_extract_archives: false,
             extraction_depth: 2,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
             no_binary: true,
         },
 
@@ -369,6 +369,8 @@ fn test_pretty_format_with_nan_entropy_panics() {
 
         // display
         snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
     };
 
     // This will panic if the entropy isn't checked for NaN
diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs
index d9ca0f8..62dbd56 100644
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@@ -246,13 +246,7 @@ fn make_fs_enumerator(
         // Pass no_dedup when enumerating git history
         ie.no_dedup(args.no_dedup);
 
-        // Load any specified ignore files
-        for ignore_path in args.content_filtering_args.ignore.iter() {
-            debug!("Using ignore rules from {}", ignore_path.display());
-            ie.add_ignore(ignore_path).with_context(|| {
-                format!("Failed to load ignore rules from {}", ignore_path.display())
-            })?;
-        }
+        ie.set_exclude_patterns(&args.content_filtering_args.exclude)?;
         // Determine whether to collect git metadata or not
         let collect_git_metadata = false;
         ie.collect_git_metadata(collect_git_metadata);
diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs
index 112a04e..7c2f964 100644
--- a/src/scanner/runner.rs
+++ b/src/scanner/runner.rs
@@ -100,6 +100,16 @@ pub async fn run_async_scan(
         ds.replace_matches(deduped_arcs);
     }
 
+    // If baseline management is enabled, apply the baseline
+    if args.baseline_file.is_some() || args.manage_baseline {
+        let path = args
+            .baseline_file
+            .clone()
+            .unwrap_or_else(|| std::path::PathBuf::from("baseline-file.yaml"));
+        let mut ds = datastore.lock().unwrap();
+        crate::baseline::apply_baseline(&mut ds, &path, args.manage_baseline, &input_roots)?;
+    }
+
     // If validation is enabled, run it as a second phase
     if !args.no_validate {
         info!("Starting secret validation phase...");
diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs
index 94206d0..3c41afd 100644
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@@ -90,7 +90,7 @@ rules:
             extraction_depth: 1,
             no_binary: true,
             no_extract_archives: false,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
         },
         confidence: ConfidenceLevel::Low,
         no_validate: true,
@@ -103,6 +103,8 @@ rules:
         no_dedup,
         ignore_tests: false,
         snippet_length: 64,
+        baseline_file: None,
+        manage_baseline: false,
     };
 
     let global_args = GlobalArgs {
diff --git a/tests/int_github.rs b/tests/int_github.rs
index e541812..c66fdb8 100644
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@@ -77,7 +77,7 @@ fn test_github_remote_scan() -> Result<()> {
             no_extract_archives: false,
             extraction_depth: 2,
             no_binary: true,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
         },
         confidence: ConfidenceLevel::Medium,
         no_validate: false,
@@ -90,6 +90,8 @@ fn test_github_remote_scan() -> Result<()> {
         no_dedup: true,
         ignore_tests: false,
         snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
     };
     // Create global arguments
     let global_args = GlobalArgs {
diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs
index ea0acaf..308f6a0 100644
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@@ -74,7 +74,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
             no_extract_archives: false,
             extraction_depth: 2,
             no_binary: true,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
         },
         confidence: ConfidenceLevel::Medium,
         no_validate: false,
@@ -87,6 +87,8 @@ fn test_gitlab_remote_scan() -> Result<()> {
         no_dedup: true,
         ignore_tests: false,
         snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
     };
 
     let global_args = GlobalArgs {
diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs
index 7f9ca80..06bc55a 100644
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@@ -133,7 +133,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
             extraction_depth: 2,
             no_binary: true,
             no_extract_archives: false,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
         },
         confidence: ConfidenceLevel::Low,
         no_validate: false,
@@ -146,6 +146,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
         no_dedup: true, // keep duplicates so the cache is stressed
         ignore_tests: false,
         snippet_length: 128,
+        baseline_file: None,
+        manage_baseline: false,
     };
 
     /* --------------------------------------------------------- *
diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs
index 20ad438..032a51b 100644
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@@ -76,7 +76,7 @@ impl TestContext {
                 extraction_depth: 2,
                 no_binary: true,
                 no_extract_archives: false,
-                ignore: Vec::new(),
+                exclude: Vec::new(), // Exclude patterns
             },
             confidence: ConfidenceLevel::Low,
             no_validate: true,
@@ -89,6 +89,8 @@ impl TestContext {
             no_dedup: true,
             ignore_tests: false,
             snippet_length: 256,
+            baseline_file: None,
+            manage_baseline: false,
         };
 
         let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
@@ -140,7 +142,7 @@ impl TestContext {
                 extraction_depth: 2,
                 no_binary: true,
                 no_extract_archives: false,
-                ignore: Vec::new(),
+                exclude: Vec::new(), // Exclude patterns
             },
             confidence: ConfidenceLevel::Low,
             no_validate: true,
@@ -153,6 +155,8 @@ impl TestContext {
             no_dedup: true,
             ignore_tests: false,
             snippet_length: 256,
+            baseline_file: None,
+            manage_baseline: false,
         };
 
         let global_args = GlobalArgs {
diff --git a/tests/smoke_baseline.rs b/tests/smoke_baseline.rs
new file mode 100644
index 0000000..d5d1962
--- /dev/null
+++ b/tests/smoke_baseline.rs
@@ -0,0 +1,56 @@
+use std::fs;
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::tempdir;
+
+const GH_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";
+
+#[test]
+fn baseline_create_and_filter() -> anyhow::Result<()> {
+    let dir = tempdir()?;
+    let file = dir.path().join("leak.txt");
+    fs::write(&file, format!("token = \"{}\"\n", GH_PAT))?;
+    let baseline = dir.path().join("baseline.yaml");
+
+    // Create baseline with manage flag
+    Command::cargo_bin("kingfisher")?
+        .args([
+            "scan",
+            dir.path().to_str().unwrap(),
+            "--no-binary",
+            "--confidence=low",
+            "--no-validate",
+            "--format",
+            "json",
+            "--manage-baseline",
+            "--baseline-file",
+            baseline.to_str().unwrap(),
+            "--no-update-check",
+        ])
+        .assert()
+        .code(200)
+        .stdout(predicate::str::contains(GH_PAT));
+
+    assert!(baseline.exists(), "baseline file created");
+
+    // Scan again using the baseline
+    Command::cargo_bin("kingfisher")?
+        .args([
+            "scan",
+            dir.path().to_str().unwrap(),
+            "--no-binary",
+            "--confidence=low",
+            "--no-validate",
+            "--format",
+            "json",
+            "--baseline-file",
+            baseline.to_str().unwrap(),
+            "--no-update-check",
+        ])
+        .assert()
+        .code(0)
+        .stdout(predicate::str::contains(GH_PAT).not());
+
+    Ok(())
+}
diff --git a/tests/smoke_exclude.rs b/tests/smoke_exclude.rs
new file mode 100644
index 0000000..76e647d
--- /dev/null
+++ b/tests/smoke_exclude.rs
@@ -0,0 +1,34 @@
+use std::fs;
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::tempdir;
+
+const SECRET: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";
+
+#[test]
+fn exclude_pattern_hides_matches() -> anyhow::Result<()> {
+    let dir = tempdir()?;
+    let py = dir.path().join("foo.py");
+    let txt = dir.path().join("bar.txt");
+    fs::write(&py, format!("token = \"{}\"\n", SECRET))?;
+    fs::write(&txt, format!("token = \"{}\"\n", SECRET))?;
+
+    Command::cargo_bin("kingfisher")?
+        .args([
+            "scan",
+            dir.path().to_str().unwrap(),
+            "--confidence=low",
+            "--no-binary",
+            "--no-validate",
+            "--format",
+            "json",
+            "--exclude=*.py",
+            "--no-update-check",
+        ])
+        .assert()
+        .code(200)
+        .stdout(predicate::str::contains("bar.txt").and(predicate::str::contains("foo.py").not()));
+
+    Ok(())
+}

From 93f1e3b1da14ee91680f3e82de31450d9d5ce94e Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 15:31:44 -0700
Subject: [PATCH 3/8] JWT validation performs OpenID Connect discovery using
 the iss claim and verifies signatures via JWKS

---
 CHANGELOG.md          |   2 +
 Cargo.toml            |   2 +
 data/rules/jwt.yml    |   4 +-
 src/rules/rule.rs     |   1 +
 src/validation.rs     |  59 ++++++------
 src/validation/jwt.rs | 207 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 245 insertions(+), 30 deletions(-)
 create mode 100644 src/validation/jwt.rs

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2e1179b..032379c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,8 @@ All notable changes to this project will be documented in this file.
 - Added baseline feature with `--baseline-file` and `--manage-baseline` flags
 - Introduced `--exclude` option for skipping paths
 - Added tests covering baseline and exclude workflow
+- Added validation for JWT tokens that checks `exp` and `nbf` claims
+- JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
 
 
 ## [1.20.0]
diff --git a/Cargo.toml b/Cargo.toml
index dc2db13..e220d39 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -162,6 +162,8 @@ atty = "0.2.14"
 self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] }
 semver = "1.0.26"
 globset = "0.4.16"
+jsonwebtoken = "9.3.1"
+ipnet = "2.11.0"
 
 [dependencies.tikv-jemallocator]
 version = "0.6"
diff --git a/data/rules/jwt.yml b/data/rules/jwt.yml
index cd3f78d..e596027 100644
--- a/data/rules/jwt.yml
+++ b/data/rules/jwt.yml
@@ -22,4 +22,6 @@ rules:
       - https://datatracker.ietf.org/doc/html/rfc7519
       - https://en.wikipedia.org/wiki/Base64#URL_applications
       - https://datatracker.ietf.org/doc/html/rfc4648
-      - https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen
\ No newline at end of file
+      - https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen
+    validation:
+      type: JWT
\ No newline at end of file
diff --git a/src/rules/rule.rs b/src/rules/rule.rs
index bf923f3..a301a09 100644
--- a/src/rules/rule.rs
+++ b/src/rules/rule.rs
@@ -38,6 +38,7 @@ pub enum Validation {
     GCP,
     MongoDB,
     Postgres,
+    JWT,
     Raw(String),
     Http(HttpValidation),
 }
diff --git a/src/validation.rs b/src/validation.rs
index 44a7a6f..172888d 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -27,6 +27,7 @@ mod aws;
 mod azure;
 mod gcp;
 mod httpvalidation;
+mod jwt;
 mod mongodb;
 mod postgres;
 mod utils;
@@ -58,35 +59,6 @@ pub fn init_validation_caches() {
     IN_FLIGHT.set(DashMap::new()).ok();
 }
 
-// #[derive(Clone, FilterReflection, ParseFilter)]
-// #[filter(
-//     name = "b64enc",
-//     description = "Encodes the input string using Base64 encoding",
-//     parsed(B64EncFilter)
-// )]
-// pub struct B64EncFilterParser;
-
-// #[derive(Debug, Default, Clone)]
-// pub struct B64EncFilter;
-
-// impl std::fmt::Display for B64EncFilter {
-//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-//         write!(f, "b64enc")
-//     }
-// }
-
-// impl Filter for B64EncFilter {
-//     fn evaluate(
-//         &self,
-//         input: &dyn ValueView,
-//         _runtime: &dyn Runtime,
-//     ) -> Result<Value, LiquidError> {
-//         let input_str = input.to_kstr().into_owned();
-//         let encoded = general_purpose::STANDARD.encode(input_str.as_bytes());
-//         Ok(Value::scalar(encoded))
-//     }
-// }
-
 #[derive(Clone)]
 pub struct CachedResponse {
     pub body: String,
@@ -700,7 +672,36 @@ async fn timed_validate_single_match<'a>(
                 },
             );
         }
+        // ---------------------------------------------------- JWT validator
+        Some(Validation::JWT) => {
+            let token = captured_values
+                .iter()
+                .find(|(n, ..)| n == "TOKEN")
+                .map(|(_, v, ..)| v.clone())
+                .unwrap_or_default();
 
+            if token.is_empty() {
+                m.validation_success = false;
+                m.validation_response_body = "JWT token not found.".to_string();
+                m.validation_response_status = StatusCode::BAD_REQUEST;
+                commit_and_return(m);
+                return;
+            }
+
+            match jwt::validate_jwt(&token, client).await {
+                Ok((ok, msg)) => {
+                    m.validation_success = ok;
+                    m.validation_response_body = msg;
+                    m.validation_response_status =
+                        if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+                }
+                Err(e) => {
+                    m.validation_success = false;
+                    m.validation_response_body = format!("JWT validation error: {}", e);
+                    m.validation_response_status = StatusCode::BAD_REQUEST;
+                }
+            }
+        }
         // ---------------------------------------------------- AWS validator
         Some(Validation::AWS) => {
             let secret = captured_values
diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs
new file mode 100644
index 0000000..ed2bd3c
--- /dev/null
+++ b/src/validation/jwt.rs
@@ -0,0 +1,207 @@
+use anyhow::{anyhow, Result};
+use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
+use chrono::Utc;
+use ipnet::IpNet;
+use jsonwebtoken::{decode, decode_header, jwk::JwkSet, DecodingKey, Validation as JwtValidation};
+use reqwest::{redirect::Policy, Client, Url};
+use serde::Deserialize;
+use tokio::net::lookup_host;
+
+use super::utils::check_url_resolvable;
+
+/// RFC 1918 + loopback + link-local nets we refuse to contact
+const BLOCKED_NETS: &[&str] = &[
+    "10.0.0.0/8",
+    "172.16.0.0/12",
+    "192.168.0.0/16", // private
+    "127.0.0.0/8",
+    "169.254.0.0/16", // loopback / link-local
+];
+
+//  aud is allowed to be either a string or an array, so let Serde flatten it.
+#[derive(Debug, Deserialize)]
+#[serde(untagged)]
+enum Aud {
+    Str(String),
+    Arr(Vec<String>),
+}
+
+#[derive(Debug, Deserialize)]
+struct Claims {
+    exp: Option<i64>,
+    nbf: Option<i64>,
+    iss: Option<String>,
+    aud: Option<Aud>,
+}
+
+pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String)> {
+    // --- insecure payload decode -------------------------------------------------
+    let claims: Claims = {
+        let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?;
+        let payload_json = URL_SAFE_NO_PAD
+            .decode(payload_b64)
+            .map_err(|e| anyhow!("invalid base64 in payload: {e}"))?;
+        serde_json::from_slice(&payload_json).map_err(|e| anyhow!("invalid JSON claims: {e}"))?
+    };
+
+    // temporal checks
+    let now = Utc::now().timestamp();
+    if let Some(nbf) = claims.nbf {
+        if now < nbf {
+            return Ok((false, format!("Token not valid before {nbf}")));
+        }
+    }
+    if let Some(exp) = claims.exp {
+        if now > exp {
+            return Ok((false, format!("Token expired at {exp}")));
+        }
+    }
+
+    // ---------------------------------------------------------------------------
+    let issuer = claims.iss.clone().unwrap_or_default();
+
+    if let Some(iss) = claims.iss.clone() {
+        // parse header now (kid, alg)
+        let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?;
+
+        // build discovery URL and fetch it (redirects disabled)
+        let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/'));
+        let no_redirect_client = Client::builder()
+            .redirect(Policy::none())
+            .build()
+            .map_err(|e| anyhow!("client build: {e}"))?;
+
+        let cfg_resp = no_redirect_client
+            .get(&config_url)
+            .send()
+            .await
+            .map_err(|e| anyhow!("issuer discovery failed: {e}"))?;
+
+        if !cfg_resp.status().is_success() {
+            return Ok((false, format!("issuer discovery failed: {}", cfg_resp.status())));
+        }
+
+        let cfg_json: serde_json::Value =
+            cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?;
+
+        // extract jwks_uri
+        let jwks_uri = cfg_json
+            .get("jwks_uri")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| anyhow!("jwks_uri missing"))?;
+
+        // must be HTTPS
+        let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?;
+        if url.scheme() != "https" {
+            return Ok((false, "jwks_uri must use https".to_string()));
+        }
+
+        // host must match issuer host  —  prevents open redirects / SSRF-on-other-host
+        let iss_host = Url::parse(&iss)
+            .map_err(|e| anyhow!("invalid iss: {e}"))?
+            .host_str()
+            .unwrap_or_default()
+            .to_ascii_lowercase();
+        let jwks_host = url.host_str().unwrap_or_default().to_ascii_lowercase();
+        if jwks_host != iss_host {
+            return Ok((
+                false,
+                format!("jwks_uri host ({jwks_host}) must match issuer host ({iss_host})"),
+            ));
+        }
+
+        // -----------------------------------------------------------------------
+        // DNS resolution + private-range block
+        for addr in lookup_host((jwks_host.as_str(), 443)).await? {
+            if is_blocked_ip(addr.ip()) {
+                return Ok((false, "jwks_uri resolves to private or link-local IP".to_string()));
+            }
+        }
+
+        // reachability check (existing helper)
+        check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?;
+
+        // fetch JWKS with redirect-free client
+        let jwks_resp = no_redirect_client
+            .get(url)
+            .send()
+            .await
+            .map_err(|e| anyhow!("jwks fetch failed: {e}"))?;
+        if !jwks_resp.status().is_success() {
+            return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status())));
+        }
+
+        let jwk_set: JwkSet =
+            jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?;
+
+        // select key by kid
+        let kid = header.kid.ok_or_else(|| anyhow!("no kid in header"))?;
+        let jwk = jwk_set
+            .keys
+            .iter()
+            .find(|k| k.common.key_id.as_deref() == Some(&kid))
+            .ok_or_else(|| anyhow!("kid not found in jwks"))?;
+
+        // verify signature
+        let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?;
+        let mut validation = JwtValidation::new(header.alg);
+        validation.set_audience(&extract_aud_strings(&claims));
+        validation.validate_exp = false;
+        validation.validate_nbf = false;
+
+        decode::<Claims>(token, &decoding_key, &validation)
+            .map_err(|e| anyhow!("signature verification failed: {e}"))?;
+
+        return Ok((
+            true,
+            format!("JWT valid (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)),
+        ));
+    }
+
+    Ok((true, format!("JWT not expired (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims))))
+}
+
+/// Helper: normalize aud into a flat Vec<String>
+fn extract_aud_strings(claims: &Claims) -> Vec<String> {
+    match &claims.aud {
+        Some(Aud::Str(s)) => vec![s.clone()],
+        Some(Aud::Arr(v)) => v.clone(),
+        None => vec![],
+    }
+}
+/// returns true if IP is in a blocked network
+fn is_blocked_ip(ip: std::net::IpAddr) -> bool {
+    BLOCKED_NETS.iter().filter_map(|cidr| cidr.parse::<IpNet>().ok()).any(|net| net.contains(&ip))
+}
+
+#[cfg(test)]
+mod tests {
+    use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
+    use chrono::{Duration as ChronoDuration, Utc};
+    use reqwest::Client;
+
+    use super::validate_jwt;
+
+    fn build_token(exp_offset: i64) -> String {
+        let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#);
+        let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp();
+        let payload = URL_SAFE_NO_PAD.encode(format!("{{\"exp\":{exp}}}"));
+        format!("{header}.{payload}.")
+    }
+
+    #[tokio::test]
+    async fn valid_token() {
+        let token = build_token(60);
+        let client = Client::new();
+        let res = validate_jwt(&token, &client).await.unwrap();
+        assert!(res.0);
+    }
+
+    #[tokio::test]
+    async fn expired_token() {
+        let token = build_token(-60);
+        let client = Client::new();
+        let res = validate_jwt(&token, &client).await.unwrap();
+        assert!(!res.0);
+    }
+}

From 45e735ec8974ab0a3a4a74c123f362f44d0e9ad2 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 16:55:19 -0700
Subject: [PATCH 4/8] Removed --ignore-tests argument, because the --exclude
 flag provides more granular functionality

---
 CHANGELOG.md                  |  1 +
 README.md                     |  8 ++++++--
 src/cli/commands/scan.rs      |  4 ----
 src/git_repo_enumerator.rs    | 26 ++++++++++++++++++-------
 src/lib.rs                    |  6 +++++-
 src/main.rs                   |  1 -
 src/reporter/pretty_format.rs |  1 -
 src/scanner/enumerate.rs      | 36 ++++++++++++++++-------------------
 tests/int_dedup.rs            |  1 -
 tests/int_github.rs           |  1 -
 tests/int_gitlab.rs           |  1 -
 tests/int_validation_cache.rs |  1 -
 tests/int_vulnerable_files.rs |  2 --
 13 files changed, 47 insertions(+), 42 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 032379c..fb1ede4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
 - Added tests covering baseline and exclude workflow
 - Added validation for JWT tokens that checks `exp` and `nbf` claims
 - JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
+- Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality
 
 
 ## [1.20.0]
diff --git a/README.md b/README.md
index d7ae8b6..0797344 100644
--- a/README.md
+++ b/README.md
@@ -142,7 +142,12 @@ kingfisher scan /path/to/repo --rule-stats
 
 ```bash
 # Scan source but skip likely unit / integration tests
-kingfisher scan ./my-project --ignore-tests
+kingfisher scan ./my-project \
+  --exclude='test' \
+  --exclude='spec' \
+  --exclude='fixture' \
+  --exclude='example' \
+  --exclude='sample'
 ```
 
 ### Exclude specific paths
@@ -304,7 +309,6 @@ kingfisher github repos list --organization my-org
 - `--no-extract-archives`: Do not scan inside archives
 - `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
 - `--redact`: Replaces discovered secrets with a one-way hash for secure output
-- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
 - `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax)
 - `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
 - `--manage-baseline`: Create or update the baseline file with current findings
diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs
index 406c896..fe4d5d6 100644
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@@ -88,10 +88,6 @@ pub struct ScanArgs {
     #[arg(long, default_value_t = false)]
     pub no_dedup: bool,
 
-    /// Ignore matches that appear to come from test files
-    #[arg(long, default_value_t = false)]
-    pub ignore_tests: bool,
-
     /// Redact findings values using a secure hash
     #[arg(long, short = 'r', default_value_t = false)]
     pub redact: bool,
diff --git a/src/git_repo_enumerator.rs b/src/git_repo_enumerator.rs
index 77e506e..9b3151e 100644
--- a/src/git_repo_enumerator.rs
+++ b/src/git_repo_enumerator.rs
@@ -73,11 +73,16 @@ pub struct GitBlobMetadata {
 pub struct GitRepoWithMetadataEnumerator<'a> {
     path: &'a Path,
     repo: Repository,
+    exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
 }
 
 impl<'a> GitRepoWithMetadataEnumerator<'a> {
-    pub fn new(path: &'a Path, repo: Repository) -> Self {
-        Self { path, repo }
+    pub fn new(
+        path: &'a Path,
+        repo: Repository,
+        exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
+    ) -> Self {
+        Self { path, repo, exclude_globset }
     }
 
     pub fn run(self) -> Result<GitRepoResult> {
@@ -173,12 +178,19 @@ impl<'a> GitRepoWithMetadataEnumerator<'a> {
                         }
                         let filtered = appearances
                             .into_iter()
-                            .filter(|entry| {
-                                // Apply your actual ignore-logic here:
-                                match entry.path.to_path() {
-                                    Ok(_path) => true,
-                                    Err(_) => true,
+                            .filter(|entry| match entry.path.to_path() {
+                                Ok(p) => {
+                                    if let Some(gs) = &self.exclude_globset {
+                                        let m = gs.is_match(p);
+                                        if m {
+                                            debug!("Skipping {} due to --exclude", p.display());
+                                        }
+                                        !m
+                                    } else {
+                                        true
+                                    }
                                 }
+                                Err(_) => true,
                             })
                             .collect::<SmallVec<_>>();
                         if filtered.is_empty() {
diff --git a/src/lib.rs b/src/lib.rs
index 9f04d46..26703fc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -82,7 +82,7 @@ struct EnumeratorConfig {
     enumerate_git_history: bool,
     collect_git_metadata: bool,
     repo_scan_timeout: Duration,
-    // gitignore: Gitignore,
+    exclude_globset: Option<std::sync::Arc<GlobSet>>,
 }
 
 pub enum FoundInput {
@@ -312,6 +312,10 @@ impl FilesystemEnumerator {
         Ok(self)
     }
 
+    pub fn exclude_globset(&self) -> Option<std::sync::Arc<GlobSet>> {
+        self.exclude_globset.clone()
+    }
+
     pub fn gitignore(&self) -> Result<Gitignore> {
         Ok(self.gitignore_builder.build()?)
     }
diff --git a/src/main.rs b/src/main.rs
index 2567187..44a3ec3 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -300,7 +300,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
         redact: false,
         git_repo_timeout: 1800,
         no_dedup: false,
-        ignore_tests: false,
         snippet_length: 256,
         baseline_file: None,
         manage_baseline: false,
diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs
index 7a6212c..b9c868c 100644
--- a/src/reporter/pretty_format.rs
+++ b/src/reporter/pretty_format.rs
@@ -314,7 +314,6 @@ fn test_pretty_format_with_nan_entropy_panics() {
         // core execution / performance
         num_jobs: 1,
         no_dedup: false,
-        ignore_tests: false,
 
         // rule selection
         rules: RuleSpecifierArgs {
diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs
index 62dbd56..8cfa9ff 100644
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@@ -82,9 +82,10 @@ pub fn enumerate_filesystem_inputs(
     }()
     .context("Failed to initialize filesystem enumerator")?;
 
-    let (enum_thread, input_recv) = {
+    let (enum_thread, input_recv, exclude_globset) = {
         let fs_enumerator = make_fs_enumerator(args, input_roots.into())
             .context("Failed to initialize filesystem enumerator")?;
+        let exclude_globset = fs_enumerator.as_ref().and_then(|ie| ie.exclude_globset());
         let channel_size = std::cmp::max(args.num_jobs * 128, 1024);
 
         let (input_send, input_recv) = crossbeam_channel::bounded(channel_size);
@@ -97,7 +98,7 @@ pub fn enumerate_filesystem_inputs(
                 Ok(())
             })
             .context("Failed to enumerate filesystem inputs")?;
-        (input_enumerator_thread, input_recv)
+        (input_enumerator_thread, input_recv, exclude_globset)
     };
 
     let enum_cfg = EnumeratorConfig {
@@ -107,6 +108,7 @@ pub fn enumerate_filesystem_inputs(
         },
         collect_git_metadata: args.input_specifier_args.commit_metadata,
         repo_scan_timeout,
+        exclude_globset,
     };
     let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs);
     let datastore_writer_thread =
@@ -189,23 +191,11 @@ pub fn enumerate_filesystem_inputs(
                     Ok(Some((origin_set, blob_metadata, vec_of_matches))) => {
                         for (_, single_match) in vec_of_matches {
                             // Send each match
-                            let is_test = if args.ignore_tests {
-                                origin_set
-                                    .iter()
-                                    .filter_map(|o| o.full_path())
-                                    .any(|p| is_test_like_path(&p))
-                            } else {
-                                false
-                            };
-
-                            if !is_test {
-                                // Send each match
-                                send_ds.send((
-                                    Arc::new(origin_set.clone()),
-                                    Arc::new(blob_metadata.clone()),
-                                    single_match,
-                                ))?;
-                            }
+                            send_ds.send((
+                                Arc::new(origin_set.clone()),
+                                Arc::new(blob_metadata.clone()),
+                                single_match,
+                            ))?;
                         }
                     }
                     Err(e) => {
@@ -604,9 +594,15 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
                 // Spawn an enumerator thread so we can time-out cleanly
                 let path_clone = path.to_path_buf();
                 let (tx, rx) = std::sync::mpsc::channel();
+                let exclude_globset = cfg.exclude_globset.clone();
                 let handle = std::thread::spawn(move || {
                     let res = if collect_git_metadata {
-                        GitRepoWithMetadataEnumerator::new(&path_clone, repository).run()
+                        GitRepoWithMetadataEnumerator::new(
+                            &path_clone,
+                            repository,
+                            exclude_globset.clone(),
+                        )
+                        .run()
                     } else {
                         GitRepoEnumerator::new(&path_clone, repository).run()
                     };
diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs
index 3c41afd..64c2c92 100644
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@@ -101,7 +101,6 @@ rules:
         git_repo_timeout: 1800, // 30 minutes
         output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
         no_dedup,
-        ignore_tests: false,
         snippet_length: 64,
         baseline_file: None,
         manage_baseline: false,
diff --git a/tests/int_github.rs b/tests/int_github.rs
index c66fdb8..330299b 100644
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@@ -88,7 +88,6 @@ fn test_github_remote_scan() -> Result<()> {
         git_repo_timeout: 1800, // 30 minutes
         output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
         no_dedup: true,
-        ignore_tests: false,
         snippet_length: 256,
         baseline_file: None,
         manage_baseline: false,
diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs
index 308f6a0..66a7f37 100644
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@@ -85,7 +85,6 @@ fn test_gitlab_remote_scan() -> Result<()> {
         git_repo_timeout: 1800, // 30 minutes
         output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
         no_dedup: true,
-        ignore_tests: false,
         snippet_length: 256,
         baseline_file: None,
         manage_baseline: false,
diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs
index 06bc55a..7e422e1 100644
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@@ -144,7 +144,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
         git_repo_timeout: 1800, // 30 minutes
         output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
         no_dedup: true, // keep duplicates so the cache is stressed
-        ignore_tests: false,
         snippet_length: 128,
         baseline_file: None,
         manage_baseline: false,
diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs
index 032a51b..bb1d064 100644
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@@ -87,7 +87,6 @@ impl TestContext {
             git_repo_timeout: 1800, // 30 minutes
             output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
             no_dedup: true,
-            ignore_tests: false,
             snippet_length: 256,
             baseline_file: None,
             manage_baseline: false,
@@ -153,7 +152,6 @@ impl TestContext {
             git_repo_timeout: 1800, // 30 minutes
             output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
             no_dedup: true,
-            ignore_tests: false,
             snippet_length: 256,
             baseline_file: None,
             manage_baseline: false,

From 77c8940ba2d87b6bbdd6ca4f876b2e297235af09 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 17:09:04 -0700
Subject: [PATCH 5/8] Removed --ignore-tests argument, because the --exclude
 flag provides more granular functionality

---
 CHANGELOG.md |  3 ++-
 README.md    | 10 ++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fb1ede4..d9bce79 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,7 +11,8 @@ All notable changes to this project will be documented in this file.
 - Added validation for JWT tokens that checks `exp` and `nbf` claims
 - JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
 - Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality
-
+- DigitalOcean rule update
+- Adafruit rule update
 
 ## [1.20.0]
 - Removed confirmation prompt when user provides --self-update flag
diff --git a/README.md b/README.md
index 0797344..f94fc36 100644
--- a/README.md
+++ b/README.md
@@ -140,12 +140,14 @@ kingfisher scan /path/to/repo --rule-stats
 
 ### Scan while ignoring likely test files
 
+`--exclude` skips any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive)
+
 ```bash
 # Scan source but skip likely unit / integration tests
 kingfisher scan ./my-project \
-  --exclude='test' \
+  --exclude='[Tt]est' \
   --exclude='spec' \
-  --exclude='fixture' \
+  --exclude='[Ff]ixture' \
   --exclude='example' \
   --exclude='sample'
 ```
@@ -155,7 +157,7 @@ kingfisher scan ./my-project \
 # Skip all Python files and any directory named tests
 kingfisher scan ./my-project \
   --exclude '*.py' \
-  --exclude tests
+  --exclude '[Tt]ests'
 ```
 
 If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via --exclude):
@@ -309,7 +311,7 @@ kingfisher github repos list --organization my-org
 - `--no-extract-archives`: Do not scan inside archives
 - `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
 - `--redact`: Replaces discovered secrets with a one-way hash for secure output
-- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax)
+- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive)
 - `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
 - `--manage-baseline`: Create or update the baseline file with current findings
 

From d72452159bb30e631010ee7d17229fb88204e6dd Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 17:10:53 -0700
Subject: [PATCH 6/8] Removed --ignore-tests argument, because the --exclude
 flag provides more granular functionality

---
 src/git_metadata_graph.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/git_metadata_graph.rs b/src/git_metadata_graph.rs
index 90c28fd..9144068 100644
--- a/src/git_metadata_graph.rs
+++ b/src/git_metadata_graph.rs
@@ -147,9 +147,9 @@ impl RepositoryIndex {
             .with_ordering(Ordering::PackLexicographicalThenLooseLexicographical)
         {
             let oid = unwrap_ok_or_continue!(oid, |e| debug!("Failed to read object id: {e}"));
-            if count % 100000 == 0 {
-                debug!("Indexed {} objects in RepositoryIndex::new", count);
-            }
+            // if count % 100000 == 0 {
+            //     debug!("Indexed {} objects in RepositoryIndex::new", count);
+            // }
             let hdr = unwrap_ok_or_continue!(odb.header(oid), |e| {
                 debug!("Failed to read object header for {oid}: {e}")
             });

From ee6332a78d9ec7070bfd594708efbdc038811ef4 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 17:22:37 -0700
Subject: [PATCH 7/8] change that hoists the redirect-free reqwest::Client into
 a single, lazily-initialized static so every call to validate_jwt re-uses the
 same handle (and therefore the same connection-pool, DNS cache, TLS session
 cache, etc)

---
 src/reporter/json_format.rs |  1 -
 src/scanner/enumerate.rs    |  1 -
 src/validation/jwt.rs       | 21 ++++++++++++++-------
 tests/smoke_update.rs       |  5 -----
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
index c1185b2..a4e8730 100644
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@@ -390,7 +390,6 @@ mod tests {
         cli::commands::scan::ScanArgs {
             num_jobs: 1,
             no_dedup: false,
-            ignore_tests: false,
             rules: RuleSpecifierArgs {
                 rules_path: Vec::new(),
                 rule: vec!["all".into()],
diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs
index 8cfa9ff..0e0c795 100644
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@@ -37,7 +37,6 @@ use crate::{
         util::is_compressed_file,
     },
     scanner_pool::ScannerPool,
-    util::is_test_like_path,
     EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator, FoundInput,
     GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator, PathBuf,
 };
diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs
index ed2bd3c..ca0ac43 100644
--- a/src/validation/jwt.rs
+++ b/src/validation/jwt.rs
@@ -3,12 +3,24 @@ use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
 use chrono::Utc;
 use ipnet::IpNet;
 use jsonwebtoken::{decode, decode_header, jwk::JwkSet, DecodingKey, Validation as JwtValidation};
+use once_cell::sync::Lazy;
 use reqwest::{redirect::Policy, Client, Url};
 use serde::Deserialize;
 use tokio::net::lookup_host;
 
 use super::utils::check_url_resolvable;
 
+/// One global, redirect-free client.  Building a `Client` is comparatively
+/// expensive; re-using it lets reqwest share its internal connection pool
+/// and TLS sessions across JWT validations.  `Lazy` ensures thread-safe,
+/// one-time initialisation.
+static NO_REDIRECT_CLIENT: Lazy<Client> = Lazy::new(|| {
+    Client::builder()
+        .redirect(Policy::none()) // disable all redirects
+        .build()
+        .expect("failed to build no-redirect Client")
+});
+
 /// RFC 1918 + loopback + link-local nets we refuse to contact
 const BLOCKED_NETS: &[&str] = &[
     "10.0.0.0/8",
@@ -66,12 +78,7 @@ pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String)
 
         // build discovery URL and fetch it (redirects disabled)
         let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/'));
-        let no_redirect_client = Client::builder()
-            .redirect(Policy::none())
-            .build()
-            .map_err(|e| anyhow!("client build: {e}"))?;
-
-        let cfg_resp = no_redirect_client
+        let cfg_resp = NO_REDIRECT_CLIENT
             .get(&config_url)
             .send()
             .await
@@ -122,7 +129,7 @@ pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String)
         check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?;
 
         // fetch JWKS with redirect-free client
-        let jwks_resp = no_redirect_client
+        let jwks_resp = NO_REDIRECT_CLIENT
             .get(url)
             .send()
             .await
diff --git a/tests/smoke_update.rs b/tests/smoke_update.rs
index 45573fa..8224d42 100644
--- a/tests/smoke_update.rs
+++ b/tests/smoke_update.rs
@@ -1,9 +1,4 @@
-use std::fs::{self, File};
-
-use flate2::{write::GzEncoder, Compression};
 use kingfisher::{cli::global::GlobalArgs, update::check_for_update};
-use tar::Builder;
-use tempfile::tempdir;
 use tokio;
 use wiremock::{
     matchers::{method, path},

From 352d8ff65959bbce5475941f5d33dbcc46a6da72 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Mon, 14 Jul 2025 17:22:51 -0700
Subject: [PATCH 8/8] change that hoists the redirect-free reqwest::Client into
 a single, lazily-initialized static so every call to validate_jwt re-uses the
 same handle (and therefore the same connection-pool, DNS cache, TLS session
 cache, etc)

---
 src/validation.rs     | 2 +-
 src/validation/jwt.rs | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/validation.rs b/src/validation.rs
index 172888d..6cb3711 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -688,7 +688,7 @@ async fn timed_validate_single_match<'a>(
                 return;
             }
 
-            match jwt::validate_jwt(&token, client).await {
+            match jwt::validate_jwt(&token).await {
                 Ok((ok, msg)) => {
                     m.validation_success = ok;
                     m.validation_response_body = msg;
diff --git a/src/validation/jwt.rs b/src/validation/jwt.rs
index ca0ac43..6449fd2 100644
--- a/src/validation/jwt.rs
+++ b/src/validation/jwt.rs
@@ -46,7 +46,7 @@ struct Claims {
     aud: Option<Aud>,
 }
 
-pub async fn validate_jwt(token: &str, client: &Client) -> Result<(bool, String)> {
+pub async fn validate_jwt(token: &str) -> Result<(bool, String)> {
     // --- insecure payload decode -------------------------------------------------
     let claims: Claims = {
         let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?;
@@ -199,16 +199,14 @@ mod tests {
     #[tokio::test]
     async fn valid_token() {
         let token = build_token(60);
-        let client = Client::new();
-        let res = validate_jwt(&token, &client).await.unwrap();
+        let res = validate_jwt(&token).await.unwrap();
         assert!(res.0);
     }
 
     #[tokio::test]
     async fn expired_token() {
         let token = build_token(-60);
-        let client = Client::new();
-        let res = validate_jwt(&token, &client).await.unwrap();
+        let res = validate_jwt(&token).await.unwrap();
         assert!(!res.0);
     }
 }