Merge pull request #36 from mongodb/development

v1.21.0
2025-07-14 18:07:58 -07:00 · 2025-07-14 18:07:58 -07:00 · 9bee9e6b5d
commit 9bee9e6b5d
parent eb4407784d 352d8ff659
29 changed files with 743 additions and 110 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,17 @@

 All notable changes to this project will be documented in this file.

+## [1.21.0]
+- Improved Azure Storage rule
+- Added rule to detect TravisCI encrypted values
+- Added baseline feature with `--baseline-file` and `--manage-baseline` flags
+- Introduced `--exclude` option for skipping paths
+- Added tests covering baseline and exclude workflow
+- Added validation for JWT tokens that checks `exp` and `nbf` claims
+- JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
+- Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality
+- DigitalOcean rule update
+- Adafruit rule update

 ## [1.20.0]
 - Removed confirmation prompt when user provides --self-update flag
--- a/Cargo.toml
+++ b/Cargo.toml
@ -10,7 +10,7 @@ publish = false

 [package]
 name = "kingfisher"
-version = "1.20.0"
+version = "1.21.0"
 edition.workspace = true
 rust-version.workspace = true
 license.workspace = true
@ -161,6 +161,9 @@ trust-dns-resolver   = { version = "0.23.2", default-features = false, features
 atty = "0.2.14"
 self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] }
 semver = "1.0.26"
+globset = "0.4.16"
+jsonwebtoken = "9.3.1"
+ipnet = "2.11.0"

 [dependencies.tikv-jemallocator]
 version = "0.6"
--- a/README.md
+++ b/README.md
@ -140,9 +140,34 @@ kingfisher scan /path/to/repo --rule-stats

 ### Scan while ignoring likely test files

+`--exclude` skips any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive)
+
 ```bash
 # Scan source but skip likely unit / integration tests
-kingfisher scan ./my-project --ignore-tests
+kingfisher scan ./my-project \
+  --exclude='[Tt]est' \
+  --exclude='spec' \
+  --exclude='[Ff]ixture' \
+  --exclude='example' \
+  --exclude='sample'
+```
+
+### Exclude specific paths
+```bash
+# Skip all Python files and any directory named tests
+kingfisher scan ./my-project \
+  --exclude '*.py' \
+  --exclude '[Tt]ests'
+```
+
+If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via --exclude):
+
+```bash
+# Skip all Python files and any directory named tests, and report to stderr any skipped files
+kingfisher scan ./my-project \
+  --exclude '*.py' \
+  --exclude tests \
+  -v
 ```

 ---
@ -286,7 +311,32 @@ kingfisher github repos list --organization my-org
 - `--no-extract-archives`: Do not scan inside archives
 - `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
 - `--redact`: Replaces discovered secrets with a one-way hash for secure output
- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
+- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive)
+- `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
+- `--manage-baseline`: Create or update the baseline file with current findings
+
+## Build a Baseline / Detect New Secrets
+
+There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list.
+
+The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches):
+
+```bash
+kingfisher scan /path/to/code \
+  --confidence low \
+  --manage-baseline \
+  --baseline-file ./baseline-file.yml
+```
+
+Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings:
+
+```bash
+kingfisher scan /path/to/code \
+  --baseline-file /path/to/baseline-file.yaml
+```
+
+See ([docs/BASELINE.md](docs/BASELINE.md)) for full detail.
+

 ## Finding Fingerprint

--- a/data/rules/azurestorage.yml
+++ b/data/rules/azurestorage.yml
@ -1,9 +1,12 @@
 rules:
  - name: Azure Storage Account Name
-    id: kingfisher.azurestorage.name.1
+    id: kingfisher.azurestorage.1
    pattern: |
      (?xi)
      (?:
+        \b
+        azure
+        (?:.|[\n\r]){0,32}?
        (?i:
          (?:Account|Storage)
          (?:[._-]Account)?
@ -19,13 +22,16 @@ rules:
    visible: false
    confidence: medium
    examples:
-      - storage_name=mystorageaccount123
+      - azure_storage_name=mystorageaccount123
      - mystorageaccount.blob.core.windows.net

  - name: Azure Storage Account Key
-    id: kingfisher.azurestorage.key.1
+    id: kingfisher.azurestorage.2
    pattern: |
      (?xi)
+      \b
+      azure
+      (?:.|[\n\r]){0,32}?
      (?i:(?:Access|Account|Storage)[_.-]?Key)
      (?:.|[\n\r]){0,25}?
      (
@ -34,9 +40,9 @@ rules:
    min_entropy: 4.0
    confidence: medium
    examples:
-      - AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\
+      - Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\
    validation:
      type: AzureStorage
    depends_on_rule:
-      - rule_id: kingfisher.azurestorage.name.1
+      - rule_id: kingfisher.azurestorage.1
        variable: AZURENAME
--- a/data/rules/jwt.yml
+++ b/data/rules/jwt.yml
@ -22,4 +22,6 @@ rules:
      - https://datatracker.ietf.org/doc/html/rfc7519
      - https://en.wikipedia.org/wiki/Base64#URL_applications
      - https://datatracker.ietf.org/doc/html/rfc4648
-      - https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen
+      - https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen
+    validation:
+      type: JWT
--- a/data/rules/travisci.yml
+++ b/data/rules/travisci.yml
@ -31,3 +31,24 @@ rules:
              - report_response: true
              - type: StatusMatch
                status: [200]
+  - name: Travis CI Encrypted Variable
+    id: kingfisher.travisci.1
+    pattern: |
+      (?xis)
+      \b
+      (?:language|env|deploy|script):[\r\n]
+      (?:.|[\\n\r]){0,256}?
+      \b
+      (
+        secure:\s*"?[A-Za-z0-9+/=\\]+"?\s*
+      )
+      \b
+    min_entropy: 3.0
+    confidence: medium
+    examples:
+      - | 
+        env:
+          global:
+            # This sets FOO=super-secret, but the plaintext never appears here.
+            - secure: "VJh0l9gOb+6AVNDk6cziZSs1AqVM8CqtZU6ot9ZQeJ+KfL1pxnGQ4qQF8Cz9\M1q85c3l1N1+qkQ0uV12QG6O6ylq6Qq1l3VjAJM3h2pY3jdmrA8kX2ZIxRjC/\8+Xj1wVtKQ0R+owM/6i5Y6cyx4hRb3VvSeYlC0lD1iTzQ2vgMyE="
+
--- a/docs/BASELINE.md
+++ b/docs/BASELINE.md
@ -0,0 +1,55 @@
+
+# Build a Baseline / Detect Only New Secrets
+
+There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list.
+
+The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches):
+
+```bash
+kingfisher scan /path/to/code \
+  --confidence low \
+  --manage-baseline \
+  --baseline-file ./baseline-file.yml
+```
+
+This generates a YAML file named `baseline-file.yml` in the current directory. The file tracks each finding under an `ExactFindings` section:
+
+```yaml
+ExactFindings:
+  matches:
+  - filepath: ruby_vulnerable.rb/
+    fingerprint: 056876f00ffd0622
+    linenum: 52
+    lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
+  - filepath: ruby_vulnerable.rb/
+    fingerprint: ce41d19b83b2b1b0
+    linenum: 53
+    lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
+  - filepath: ruby_vulnerable.rb/
+    fingerprint: e8644d91fa6654f5
+    linenum: 40
+    lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
+```
+
+`fingerprint` reuses Kingfisher's 64-bit *finding fingerprint* algorithm with offsets set to zero. It hashes the secret value together with the normalized filepath, so moving a secret around does not create a new entry.
+
+Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings:
+
+```bash
+kingfisher scan /path/to/code \
+  --baseline-file /path/to/baseline-file.yaml
+```
+
+If you intentionally add a new secret that should be ignored later, rerun the scan with both `--manage-baseline` and `--baseline-file` to append new matches to the file without removing existing entries:
+
+```bash
+kingfisher scan /path/to/code \
+  --manage-baseline \
+  --baseline-file /path/to/baseline-file.yml
+```
+
+If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via `--exclude`):
+
+```bash
+kingfisher scan /path/to/project -v
+```
--- a/src/baseline.rs
+++ b/src/baseline.rs
@ -0,0 +1,115 @@
+use std::{
+    collections::HashSet,
+    fs,
+    path::{Path, PathBuf},
+    sync::Arc,
+};
+
+use anyhow::{Context, Result};
+use chrono::Local;
+use serde::{Deserialize, Serialize};
+use tracing::debug;
+
+use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint};
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct BaselineFile {
+    #[serde(rename = "ExactFindings", default)]
+    pub exact_findings: ExactFindings,
+}
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct ExactFindings {
+    #[serde(default)]
+    pub matches: Vec<BaselineFinding>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct BaselineFinding {
+    pub filepath: String,
+    pub fingerprint: String,
+    pub linenum: usize,
+    pub lastupdated: String,
+}
+
+pub fn load_baseline(path: &Path) -> Result<BaselineFile> {
+    let data = fs::read_to_string(path).context("read baseline file")?;
+    Ok(serde_yaml::from_str(&data).context("parse baseline yaml")?)
+}
+
+pub fn save_baseline(path: &Path, baseline: &BaselineFile) -> Result<()> {
+    let data = serde_yaml::to_string(baseline).context("serialize baseline")?;
+    fs::write(path, data).context("write baseline file")
+}
+
+fn normalize_path(p: &Path, roots: &[PathBuf]) -> String {
+    for root in roots {
+        if let Ok(stripped) = p.strip_prefix(root) {
+            if let Some(name) = root.file_name() {
+                return PathBuf::from(name).join(stripped).to_string_lossy().into();
+            }
+        }
+    }
+    p.to_string_lossy().into()
+}
+
+fn compute_hash(secret: &str, path: &str) -> String {
+    let fp = compute_finding_fingerprint(secret, path, 0, 0);
+    format!("{:016x}", fp)
+}
+
+fn extract_secret(m: &crate::matcher::Match) -> String {
+    m.groups
+        .captures
+        .get(1)
+        .or_else(|| m.groups.captures.get(0))
+        .map(|c| c.value.to_string())
+        .unwrap_or_default()
+}
+
+pub fn apply_baseline(
+    store: &mut FindingsStore,
+    baseline_path: &Path,
+    manage: bool,
+    roots: &[PathBuf],
+) -> Result<()> {
+    let mut baseline = if baseline_path.exists() {
+        load_baseline(baseline_path)?
+    } else {
+        BaselineFile::default()
+    };
+
+    let mut known: HashSet<String> =
+        baseline.exact_findings.matches.iter().map(|m| m.fingerprint.clone()).collect();
+
+    let mut new_entries = Vec::new();
+    for arc_msg in store.get_matches_mut() {
+        let (origin, _blob, m) = Arc::make_mut(arc_msg);
+        let file_path = origin.iter().filter_map(|o| o.full_path()).next();
+        if let Some(fp) = file_path {
+            let normalized = normalize_path(&fp, roots);
+            let secret = extract_secret(m);
+            let hash = compute_hash(&secret, &normalized);
+            if known.contains(&hash) {
+                debug!("Skipping {} due to baseline (hash {})", normalized, hash);
+                m.visible = false;
+            } else if manage {
+                known.insert(hash.clone());
+                let entry = BaselineFinding {
+                    filepath: normalized,
+                    fingerprint: hash,
+                    linenum: m.location.source_span.start.line,
+                    lastupdated: Local::now().to_rfc2822(),
+                };
+                new_entries.push(entry);
+            }
+        }
+    }
+
+    if manage && !new_entries.is_empty() {
+        baseline.exact_findings.matches.extend(new_entries);
+        save_baseline(baseline_path, &baseline)?;
+    }
+
+    Ok(())
+}
--- a/src/cli/commands/inputs.rs
+++ b/src/cli/commands/inputs.rs
@ -111,9 +111,13 @@ pub struct ContentFilteringArgs {
    #[arg(long("max-file-size"), default_value_t = 25.0)]
    pub max_file_size_mb: f64,

-    /// Use custom path-based ignore rules from the given file(s)
-    #[arg(long, short, value_hint = ValueHint::FilePath)]
-    pub ignore: Vec<PathBuf>,
+    // /// Use custom path-based ignore rules from the given file(s)
+    // #[arg(long, short, value_hint = ValueHint::FilePath)]
+    // pub ignore: Vec<PathBuf>,
+    /// Skip any file or directory whose path matches this glob pattern. Multiple
+    /// patterns may be provided by repeating the flag.
+    #[arg(long, value_name = "PATTERN")]
+    pub exclude: Vec<String>,

    /// If true, do NOT extract archive files
    #[arg(long("no-extract-archives"), default_value_t = false)]
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@ -88,10 +88,6 @@ pub struct ScanArgs {
    #[arg(long, default_value_t = false)]
    pub no_dedup: bool,

-    /// Ignore matches that appear to come from test files
-    #[arg(long, default_value_t = false)]
-    pub ignore_tests: bool,
-
    /// Redact findings values using a secure hash
    #[arg(long, short = 'r', default_value_t = false)]
    pub redact: bool,
@ -106,6 +102,14 @@ pub struct ScanArgs {
    /// Bytes of context before and after each match
    #[arg(long, default_value_t = 256, value_name = "BYTES")]
    pub snippet_length: usize,
+
+    /// Baseline file to filter known secrets
+    #[arg(long, value_name = "FILE")]
+    pub baseline_file: Option<std::path::PathBuf>,
+
+    /// Create or update the baseline file with current findings
+    #[arg(long, default_value_t = false)]
+    pub manage_baseline: bool,
 }

 /// Confidence levels for findings
--- a/src/git_metadata_graph.rs
+++ b/src/git_metadata_graph.rs
@ -147,9 +147,9 @@ impl RepositoryIndex {
            .with_ordering(Ordering::PackLexicographicalThenLooseLexicographical)
        {
            let oid = unwrap_ok_or_continue!(oid, |e| debug!("Failed to read object id: {e}"));
-            if count % 100000 == 0 {
-                debug!("Indexed {} objects in RepositoryIndex::new", count);
-            }
+            // if count % 100000 == 0 {
+            //     debug!("Indexed {} objects in RepositoryIndex::new", count);
+            // }
            let hdr = unwrap_ok_or_continue!(odb.header(oid), |e| {
                debug!("Failed to read object header for {oid}: {e}")
            });
--- a/src/git_repo_enumerator.rs
+++ b/src/git_repo_enumerator.rs
@ -73,11 +73,16 @@ pub struct GitBlobMetadata {
 pub struct GitRepoWithMetadataEnumerator<'a> {
    path: &'a Path,
    repo: Repository,
+    exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
 }

 impl<'a> GitRepoWithMetadataEnumerator<'a> {
-    pub fn new(path: &'a Path, repo: Repository) -> Self {
-        Self { path, repo }
+    pub fn new(
+        path: &'a Path,
+        repo: Repository,
+        exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
+    ) -> Self {
+        Self { path, repo, exclude_globset }
    }

    pub fn run(self) -> Result<GitRepoResult> {
@ -173,12 +178,19 @@ impl<'a> GitRepoWithMetadataEnumerator<'a> {
                        }
                        let filtered = appearances
                            .into_iter()
-                            .filter(|entry| {
-                                // Apply your actual ignore-logic here:
-                                match entry.path.to_path() {
-                                    Ok(_path) => true,
-                                    Err(_) => true,
+                            .filter(|entry| match entry.path.to_path() {
+                                Ok(p) => {
+                                    if let Some(gs) = &self.exclude_globset {
+                                        let m = gs.is_match(p);
+                                        if m {
+                                            debug!("Skipping {} due to --exclude", p.display());
+                                        }
+                                        !m
+                                    } else {
+                                        true
+                                    }
                                }
+                                Err(_) => true,
                            })
                            .collect::<SmallVec<_>>();
                        if filtered.is_empty() {
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,3 +1,4 @@
+pub mod baseline;
 pub mod binary;
 pub mod blob;
 pub mod bstring_escape;
@ -43,6 +44,7 @@ use crossbeam_channel::Sender;
 pub use git_repo_enumerator::{GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator};
 pub use gix::{self, Repository, ThreadSafeRepository};
 use gix::{open::Options, open_opts};
+use globset::{Glob, GlobSet, GlobSetBuilder};
 pub use ignore::gitignore::{Gitignore, GitignoreBuilder};
 use ignore::{DirEntry, WalkBuilder, WalkState};
 use tokio::time::Duration;
@ -80,7 +82,7 @@ struct EnumeratorConfig {
    enumerate_git_history: bool,
    collect_git_metadata: bool,
    repo_scan_timeout: Duration,
-    // gitignore: Gitignore,
+    exclude_globset: Option<std::sync::Arc<GlobSet>>,
 }

 pub enum FoundInput {
@ -204,6 +206,7 @@ pub struct FilesystemEnumerator {
    extract_archives: bool,
    extraction_depth: usize,
    no_dedup: bool,
+    exclude_globset: Option<std::sync::Arc<GlobSet>>,
 }

 impl FilesystemEnumerator {
@ -234,6 +237,7 @@ impl FilesystemEnumerator {
            extract_archives: !args.content_filtering_args.no_extract_archives,
            extraction_depth: args.content_filtering_args.extraction_depth as usize,
            no_dedup: args.no_dedup,
+            exclude_globset: None,
        })
    }

@ -287,6 +291,31 @@ impl FilesystemEnumerator {
        self
    }

+    pub fn set_exclude_patterns(&mut self, patterns: &[String]) -> Result<&mut Self> {
+        if patterns.is_empty() {
+            return Ok(self);
+        }
+        let mut builder = GlobSetBuilder::new();
+        for pat in patterns {
+            builder.add(Glob::new(pat)?);
+        }
+        let globset = std::sync::Arc::new(builder.build()?);
+        self.exclude_globset = Some(globset.clone());
+        self.filter_entry(move |entry| {
+            let path = entry.path();
+            let matches = globset.is_match(path);
+            if matches {
+                debug!("Skipping {} due to --exclude", path.display());
+            }
+            !matches
+        });
+        Ok(self)
+    }
+
+    pub fn exclude_globset(&self) -> Option<std::sync::Arc<GlobSet>> {
+        self.exclude_globset.clone()
+    }
+
    pub fn gitignore(&self) -> Result<Gitignore> {
        Ok(self.gitignore_builder.build()?)
    }
--- a/src/main.rs
+++ b/src/main.rs
@ -132,7 +132,18 @@ pub fn determine_exit_code(datastore: &Arc<Mutex<findings_store::FindingsStore>>
    // exit with code 0 if there are NO findings discovered
    let ds = datastore.lock().unwrap();
    // Get all matches
-    let all_matches = ds.get_matches();
+    // let all_matches = ds.get_matches();
+
+    // Only consider visible matches when determining the exit code
+    let all_matches = ds
+        .get_matches()
+        .iter()
+        .filter(|msg| {
+            let (_, _, match_item) = &***msg;
+            match_item.visible
+        })
+        .collect::<Vec<_>>();
+
    if all_matches.is_empty() {
        // No findings discovered
        0
@ -141,7 +152,7 @@ pub fn determine_exit_code(datastore: &Arc<Mutex<findings_store::FindingsStore>>
        let validated_matches = all_matches
            .iter()
            .filter(|msg| {
-                let (_, _, match_item) = &***msg;
+                let (_, _, match_item) = &****msg;
                match_item.validation_success
            })
            .count();
@ -278,7 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
            max_file_size_mb: 25.0,
            no_extract_archives: true,
            extraction_depth: 2,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
            no_binary: true,
        },
        confidence: ConfidenceLevel::Medium,
@ -289,8 +300,9 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
        redact: false,
        git_repo_timeout: 1800,
        no_dedup: false,
-        ignore_tests: false,
        snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
    }
 }
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@ -383,13 +383,13 @@ mod tests {
        rules::rule::Confidence,
        util::intern,
    };
+
    fn create_default_args() -> cli::commands::scan::ScanArgs {
        use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope

        cli::commands::scan::ScanArgs {
            num_jobs: 1,
            no_dedup: false,
-            ignore_tests: false,
            rules: RuleSpecifierArgs {
                rules_path: Vec::new(),
                rule: vec!["all".into()],
@ -424,7 +424,7 @@ mod tests {
                max_file_size_mb: 25.0,
                no_extract_archives: false,
                extraction_depth: 2,
-                ignore: Vec::new(),
+                exclude: Vec::new(), // Exclude patterns
                no_binary: true,
            },
            confidence: ConfidenceLevel::Medium,
@ -436,6 +436,8 @@ mod tests {
            git_repo_timeout: 1800, // 30 minutes
            output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
            snippet_length: 256,
+            baseline_file: None,
+            manage_baseline: false,
        }
    }

--- a/src/reporter/pretty_format.rs
+++ b/src/reporter/pretty_format.rs
@ -163,7 +163,9 @@ impl<'a> Display for PrettyFinding<'a> {
        writeln!(f, " |Fingerprint...: {}", rm.m.finding_fingerprint)?;
        writeln!(f, " |Confidence....: {}", rm.match_confidence.to_string())?;
        writeln!(f, " |Entropy.......: {:.2}", rm.m.calculated_entropy)?;
-        let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
+        let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16()
+            || rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16()
+        {
            "Not Attempted".to_string()
        } else if rm.validation_success {
            "Active Credential".to_string()
@ -312,7 +314,6 @@ fn test_pretty_format_with_nan_entropy_panics() {
        // core execution / performance
        num_jobs: 1,
        no_dedup: false,
-        ignore_tests: false,

        // rule selection
        rules: RuleSpecifierArgs {
@ -349,7 +350,7 @@ fn test_pretty_format_with_nan_entropy_panics() {
            max_file_size_mb: 25.0,
            no_extract_archives: false,
            extraction_depth: 2,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
            no_binary: true,
        },

@ -367,6 +368,8 @@ fn test_pretty_format_with_nan_entropy_panics() {

        // display
        snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
    };

    // This will panic if the entropy isn't checked for NaN
--- a/src/rules/rule.rs
+++ b/src/rules/rule.rs
@ -38,6 +38,7 @@ pub enum Validation {
    GCP,
    MongoDB,
    Postgres,
+    JWT,
    Raw(String),
    Http(HttpValidation),
 }
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@ -37,7 +37,6 @@ use crate::{
        util::is_compressed_file,
    },
    scanner_pool::ScannerPool,
-    util::is_test_like_path,
    EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator, FoundInput,
    GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator, PathBuf,
 };
@ -82,9 +81,10 @@ pub fn enumerate_filesystem_inputs(
    }()
    .context("Failed to initialize filesystem enumerator")?;

-    let (enum_thread, input_recv) = {
+    let (enum_thread, input_recv, exclude_globset) = {
        let fs_enumerator = make_fs_enumerator(args, input_roots.into())
            .context("Failed to initialize filesystem enumerator")?;
+        let exclude_globset = fs_enumerator.as_ref().and_then(|ie| ie.exclude_globset());
        let channel_size = std::cmp::max(args.num_jobs * 128, 1024);

        let (input_send, input_recv) = crossbeam_channel::bounded(channel_size);
@ -97,7 +97,7 @@ pub fn enumerate_filesystem_inputs(
                Ok(())
            })
            .context("Failed to enumerate filesystem inputs")?;
-        (input_enumerator_thread, input_recv)
+        (input_enumerator_thread, input_recv, exclude_globset)
    };

    let enum_cfg = EnumeratorConfig {
@ -107,6 +107,7 @@ pub fn enumerate_filesystem_inputs(
        },
        collect_git_metadata: args.input_specifier_args.commit_metadata,
        repo_scan_timeout,
+        exclude_globset,
    };
    let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs);
    let datastore_writer_thread =
@ -189,23 +190,11 @@ pub fn enumerate_filesystem_inputs(
                    Ok(Some((origin_set, blob_metadata, vec_of_matches))) => {
                        for (_, single_match) in vec_of_matches {
                            // Send each match
-                            let is_test = if args.ignore_tests {
-                                origin_set
-                                    .iter()
-                                    .filter_map(|o| o.full_path())
-                                    .any(|p| is_test_like_path(&p))
-                            } else {
-                                false
-                            };
-
-                            if !is_test {
-                                // Send each match
-                                send_ds.send((
-                                    Arc::new(origin_set.clone()),
-                                    Arc::new(blob_metadata.clone()),
-                                    single_match,
-                                ))?;
-                            }
+                            send_ds.send((
+                                Arc::new(origin_set.clone()),
+                                Arc::new(blob_metadata.clone()),
+                                single_match,
+                            ))?;
                        }
                    }
                    Err(e) => {
@ -246,13 +235,7 @@ fn make_fs_enumerator(
        // Pass no_dedup when enumerating git history
        ie.no_dedup(args.no_dedup);

-        // Load any specified ignore files
-        for ignore_path in args.content_filtering_args.ignore.iter() {
-            debug!("Using ignore rules from {}", ignore_path.display());
-            ie.add_ignore(ignore_path).with_context(|| {
-                format!("Failed to load ignore rules from {}", ignore_path.display())
-            })?;
-        }
+        ie.set_exclude_patterns(&args.content_filtering_args.exclude)?;
        // Determine whether to collect git metadata or not
        let collect_git_metadata = false;
        ie.collect_git_metadata(collect_git_metadata);
@ -610,9 +593,15 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
                // Spawn an enumerator thread so we can time-out cleanly
                let path_clone = path.to_path_buf();
                let (tx, rx) = std::sync::mpsc::channel();
+                let exclude_globset = cfg.exclude_globset.clone();
                let handle = std::thread::spawn(move || {
                    let res = if collect_git_metadata {
-                        GitRepoWithMetadataEnumerator::new(&path_clone, repository).run()
+                        GitRepoWithMetadataEnumerator::new(
+                            &path_clone,
+                            repository,
+                            exclude_globset.clone(),
+                        )
+                        .run()
                    } else {
                        GitRepoEnumerator::new(&path_clone, repository).run()
                    };
--- a/src/scanner/runner.rs
+++ b/src/scanner/runner.rs
@ -100,6 +100,16 @@ pub async fn run_async_scan(
        ds.replace_matches(deduped_arcs);
    }

+    // If baseline management is enabled, apply the baseline
+    if args.baseline_file.is_some() || args.manage_baseline {
+        let path = args
+            .baseline_file
+            .clone()
+            .unwrap_or_else(|| std::path::PathBuf::from("baseline-file.yaml"));
+        let mut ds = datastore.lock().unwrap();
+        crate::baseline::apply_baseline(&mut ds, &path, args.manage_baseline, &input_roots)?;
+    }
+
    // If validation is enabled, run it as a second phase
    if !args.no_validate {
        info!("Starting secret validation phase...");
--- a/src/validation.rs
+++ b/src/validation.rs
@ -27,6 +27,7 @@ mod aws;
 mod azure;
 mod gcp;
 mod httpvalidation;
+mod jwt;
 mod mongodb;
 mod postgres;
 mod utils;
@ -58,35 +59,6 @@ pub fn init_validation_caches() {
    IN_FLIGHT.set(DashMap::new()).ok();
 }

-// #[derive(Clone, FilterReflection, ParseFilter)]
-// #[filter(
-//     name = "b64enc",
-//     description = "Encodes the input string using Base64 encoding",
-//     parsed(B64EncFilter)
-// )]
-// pub struct B64EncFilterParser;
-
-// #[derive(Debug, Default, Clone)]
-// pub struct B64EncFilter;
-
-// impl std::fmt::Display for B64EncFilter {
-//     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-//         write!(f, "b64enc")
-//     }
-// }
-
-// impl Filter for B64EncFilter {
-//     fn evaluate(
-//         &self,
-//         input: &dyn ValueView,
-//         _runtime: &dyn Runtime,
-//     ) -> Result<Value, LiquidError> {
-//         let input_str = input.to_kstr().into_owned();
-//         let encoded = general_purpose::STANDARD.encode(input_str.as_bytes());
-//         Ok(Value::scalar(encoded))
-//     }
-// }
-
 #[derive(Clone)]
 pub struct CachedResponse {
    pub body: String,
@ -700,7 +672,36 @@ async fn timed_validate_single_match<'a>(
                },
            );
        }
+        // ---------------------------------------------------- JWT validator
+        Some(Validation::JWT) => {
+            let token = captured_values
+                .iter()
+                .find(|(n, ..)| n == "TOKEN")
+                .map(|(_, v, ..)| v.clone())
+                .unwrap_or_default();

+            if token.is_empty() {
+                m.validation_success = false;
+                m.validation_response_body = "JWT token not found.".to_string();
+                m.validation_response_status = StatusCode::BAD_REQUEST;
+                commit_and_return(m);
+                return;
+            }
+
+            match jwt::validate_jwt(&token).await {
+                Ok((ok, msg)) => {
+                    m.validation_success = ok;
+                    m.validation_response_body = msg;
+                    m.validation_response_status =
+                        if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
+                }
+                Err(e) => {
+                    m.validation_success = false;
+                    m.validation_response_body = format!("JWT validation error: {}", e);
+                    m.validation_response_status = StatusCode::BAD_REQUEST;
+                }
+            }
+        }
        // ---------------------------------------------------- AWS validator
        Some(Validation::AWS) => {
            let secret = captured_values
--- a/src/validation/jwt.rs
+++ b/src/validation/jwt.rs
@ -0,0 +1,212 @@
+use anyhow::{anyhow, Result};
+use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
+use chrono::Utc;
+use ipnet::IpNet;
+use jsonwebtoken::{decode, decode_header, jwk::JwkSet, DecodingKey, Validation as JwtValidation};
+use once_cell::sync::Lazy;
+use reqwest::{redirect::Policy, Client, Url};
+use serde::Deserialize;
+use tokio::net::lookup_host;
+
+use super::utils::check_url_resolvable;
+
+/// One global, redirect-free client.  Building a `Client` is comparatively
+/// expensive; re-using it lets reqwest share its internal connection pool
+/// and TLS sessions across JWT validations.  `Lazy` ensures thread-safe,
+/// one-time initialisation.
+static NO_REDIRECT_CLIENT: Lazy<Client> = Lazy::new(|| {
+    Client::builder()
+        .redirect(Policy::none()) // disable all redirects
+        .build()
+        .expect("failed to build no-redirect Client")
+});
+
+/// RFC 1918 + loopback + link-local nets we refuse to contact
+const BLOCKED_NETS: &[&str] = &[
+    "10.0.0.0/8",
+    "172.16.0.0/12",
+    "192.168.0.0/16", // private
+    "127.0.0.0/8",
+    "169.254.0.0/16", // loopback / link-local
+];
+
+//  aud is allowed to be either a string or an array, so let Serde flatten it.
+#[derive(Debug, Deserialize)]
+#[serde(untagged)]
+enum Aud {
+    Str(String),
+    Arr(Vec<String>),
+}
+
+#[derive(Debug, Deserialize)]
+struct Claims {
+    exp: Option<i64>,
+    nbf: Option<i64>,
+    iss: Option<String>,
+    aud: Option<Aud>,
+}
+
+pub async fn validate_jwt(token: &str) -> Result<(bool, String)> {
+    // --- insecure payload decode -------------------------------------------------
+    let claims: Claims = {
+        let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?;
+        let payload_json = URL_SAFE_NO_PAD
+            .decode(payload_b64)
+            .map_err(|e| anyhow!("invalid base64 in payload: {e}"))?;
+        serde_json::from_slice(&payload_json).map_err(|e| anyhow!("invalid JSON claims: {e}"))?
+    };
+
+    // temporal checks
+    let now = Utc::now().timestamp();
+    if let Some(nbf) = claims.nbf {
+        if now < nbf {
+            return Ok((false, format!("Token not valid before {nbf}")));
+        }
+    }
+    if let Some(exp) = claims.exp {
+        if now > exp {
+            return Ok((false, format!("Token expired at {exp}")));
+        }
+    }
+
+    // ---------------------------------------------------------------------------
+    let issuer = claims.iss.clone().unwrap_or_default();
+
+    if let Some(iss) = claims.iss.clone() {
+        // parse header now (kid, alg)
+        let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?;
+
+        // build discovery URL and fetch it (redirects disabled)
+        let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/'));
+        let cfg_resp = NO_REDIRECT_CLIENT
+            .get(&config_url)
+            .send()
+            .await
+            .map_err(|e| anyhow!("issuer discovery failed: {e}"))?;
+
+        if !cfg_resp.status().is_success() {
+            return Ok((false, format!("issuer discovery failed: {}", cfg_resp.status())));
+        }
+
+        let cfg_json: serde_json::Value =
+            cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?;
+
+        // extract jwks_uri
+        let jwks_uri = cfg_json
+            .get("jwks_uri")
+            .and_then(|v| v.as_str())
+            .ok_or_else(|| anyhow!("jwks_uri missing"))?;
+
+        // must be HTTPS
+        let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?;
+        if url.scheme() != "https" {
+            return Ok((false, "jwks_uri must use https".to_string()));
+        }
+
+        // host must match issuer host  —  prevents open redirects / SSRF-on-other-host
+        let iss_host = Url::parse(&iss)
+            .map_err(|e| anyhow!("invalid iss: {e}"))?
+            .host_str()
+            .unwrap_or_default()
+            .to_ascii_lowercase();
+        let jwks_host = url.host_str().unwrap_or_default().to_ascii_lowercase();
+        if jwks_host != iss_host {
+            return Ok((
+                false,
+                format!("jwks_uri host ({jwks_host}) must match issuer host ({iss_host})"),
+            ));
+        }
+
+        // -----------------------------------------------------------------------
+        // DNS resolution + private-range block
+        for addr in lookup_host((jwks_host.as_str(), 443)).await? {
+            if is_blocked_ip(addr.ip()) {
+                return Ok((false, "jwks_uri resolves to private or link-local IP".to_string()));
+            }
+        }
+
+        // reachability check (existing helper)
+        check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?;
+
+        // fetch JWKS with redirect-free client
+        let jwks_resp = NO_REDIRECT_CLIENT
+            .get(url)
+            .send()
+            .await
+            .map_err(|e| anyhow!("jwks fetch failed: {e}"))?;
+        if !jwks_resp.status().is_success() {
+            return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status())));
+        }
+
+        let jwk_set: JwkSet =
+            jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?;
+
+        // select key by kid
+        let kid = header.kid.ok_or_else(|| anyhow!("no kid in header"))?;
+        let jwk = jwk_set
+            .keys
+            .iter()
+            .find(|k| k.common.key_id.as_deref() == Some(&kid))
+            .ok_or_else(|| anyhow!("kid not found in jwks"))?;
+
+        // verify signature
+        let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?;
+        let mut validation = JwtValidation::new(header.alg);
+        validation.set_audience(&extract_aud_strings(&claims));
+        validation.validate_exp = false;
+        validation.validate_nbf = false;
+
+        decode::<Claims>(token, &decoding_key, &validation)
+            .map_err(|e| anyhow!("signature verification failed: {e}"))?;
+
+        return Ok((
+            true,
+            format!("JWT valid (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)),
+        ));
+    }
+
+    Ok((true, format!("JWT not expired (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims))))
+}
+
+/// Helper: normalize aud into a flat Vec<String>
+fn extract_aud_strings(claims: &Claims) -> Vec<String> {
+    match &claims.aud {
+        Some(Aud::Str(s)) => vec![s.clone()],
+        Some(Aud::Arr(v)) => v.clone(),
+        None => vec![],
+    }
+}
+/// returns true if IP is in a blocked network
+fn is_blocked_ip(ip: std::net::IpAddr) -> bool {
+    BLOCKED_NETS.iter().filter_map(|cidr| cidr.parse::<IpNet>().ok()).any(|net| net.contains(&ip))
+}
+
+#[cfg(test)]
+mod tests {
+    use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
+    use chrono::{Duration as ChronoDuration, Utc};
+    use reqwest::Client;
+
+    use super::validate_jwt;
+
+    fn build_token(exp_offset: i64) -> String {
+        let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#);
+        let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp();
+        let payload = URL_SAFE_NO_PAD.encode(format!("{{\"exp\":{exp}}}"));
+        format!("{header}.{payload}.")
+    }
+
+    #[tokio::test]
+    async fn valid_token() {
+        let token = build_token(60);
+        let res = validate_jwt(&token).await.unwrap();
+        assert!(res.0);
+    }
+
+    #[tokio::test]
+    async fn expired_token() {
+        let token = build_token(-60);
+        let res = validate_jwt(&token).await.unwrap();
+        assert!(!res.0);
+    }
+}
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@ -90,7 +90,7 @@ rules:
            extraction_depth: 1,
            no_binary: true,
            no_extract_archives: false,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
        },
        confidence: ConfidenceLevel::Low,
        no_validate: true,
@ -101,8 +101,9 @@ rules:
        git_repo_timeout: 1800, // 30 minutes
        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
        no_dedup,
-        ignore_tests: false,
        snippet_length: 64,
+        baseline_file: None,
+        manage_baseline: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@ -77,7 +77,7 @@ fn test_github_remote_scan() -> Result<()> {
            no_extract_archives: false,
            extraction_depth: 2,
            no_binary: true,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
        },
        confidence: ConfidenceLevel::Medium,
        no_validate: false,
@ -88,8 +88,9 @@ fn test_github_remote_scan() -> Result<()> {
        git_repo_timeout: 1800, // 30 minutes
        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
        no_dedup: true,
-        ignore_tests: false,
        snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
    };
    // Create global arguments
    let global_args = GlobalArgs {
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@ -74,7 +74,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
            no_extract_archives: false,
            extraction_depth: 2,
            no_binary: true,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
        },
        confidence: ConfidenceLevel::Medium,
        no_validate: false,
@ -85,8 +85,9 @@ fn test_gitlab_remote_scan() -> Result<()> {
        git_repo_timeout: 1800, // 30 minutes
        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
        no_dedup: true,
-        ignore_tests: false,
        snippet_length: 256,
+        baseline_file: None,
+        manage_baseline: false,
    };

    let global_args = GlobalArgs {
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@ -133,7 +133,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
            extraction_depth: 2,
            no_binary: true,
            no_extract_archives: false,
-            ignore: Vec::new(),
+            exclude: Vec::new(), // Exclude patterns
        },
        confidence: ConfidenceLevel::Low,
        no_validate: false,
@ -144,8 +144,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
        git_repo_timeout: 1800, // 30 minutes
        output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
        no_dedup: true, // keep duplicates so the cache is stressed
-        ignore_tests: false,
        snippet_length: 128,
+        baseline_file: None,
+        manage_baseline: false,
    };

    /* --------------------------------------------------------- *
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@ -76,7 +76,7 @@ impl TestContext {
                extraction_depth: 2,
                no_binary: true,
                no_extract_archives: false,
-                ignore: Vec::new(),
+                exclude: Vec::new(), // Exclude patterns
            },
            confidence: ConfidenceLevel::Low,
            no_validate: true,
@ -87,8 +87,9 @@ impl TestContext {
            git_repo_timeout: 1800, // 30 minutes
            output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
            no_dedup: true,
-            ignore_tests: false,
            snippet_length: 256,
+            baseline_file: None,
+            manage_baseline: false,
        };

        let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
@ -140,7 +141,7 @@ impl TestContext {
                extraction_depth: 2,
                no_binary: true,
                no_extract_archives: false,
-                ignore: Vec::new(),
+                exclude: Vec::new(), // Exclude patterns
            },
            confidence: ConfidenceLevel::Low,
            no_validate: true,
@ -151,8 +152,9 @@ impl TestContext {
            git_repo_timeout: 1800, // 30 minutes
            output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
            no_dedup: true,
-            ignore_tests: false,
            snippet_length: 256,
+            baseline_file: None,
+            manage_baseline: false,
        };

        let global_args = GlobalArgs {
--- a/tests/smoke_baseline.rs
+++ b/tests/smoke_baseline.rs
@ -0,0 +1,56 @@
+use std::fs;
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::tempdir;
+
+const GH_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";
+
+#[test]
+fn baseline_create_and_filter() -> anyhow::Result<()> {
+    let dir = tempdir()?;
+    let file = dir.path().join("leak.txt");
+    fs::write(&file, format!("token = \"{}\"\n", GH_PAT))?;
+    let baseline = dir.path().join("baseline.yaml");
+
+    // Create baseline with manage flag
+    Command::cargo_bin("kingfisher")?
+        .args([
+            "scan",
+            dir.path().to_str().unwrap(),
+            "--no-binary",
+            "--confidence=low",
+            "--no-validate",
+            "--format",
+            "json",
+            "--manage-baseline",
+            "--baseline-file",
+            baseline.to_str().unwrap(),
+            "--no-update-check",
+        ])
+        .assert()
+        .code(200)
+        .stdout(predicate::str::contains(GH_PAT));
+
+    assert!(baseline.exists(), "baseline file created");
+
+    // Scan again using the baseline
+    Command::cargo_bin("kingfisher")?
+        .args([
+            "scan",
+            dir.path().to_str().unwrap(),
+            "--no-binary",
+            "--confidence=low",
+            "--no-validate",
+            "--format",
+            "json",
+            "--baseline-file",
+            baseline.to_str().unwrap(),
+            "--no-update-check",
+        ])
+        .assert()
+        .code(0)
+        .stdout(predicate::str::contains(GH_PAT).not());
+
+    Ok(())
+}
--- a/tests/smoke_exclude.rs
+++ b/tests/smoke_exclude.rs
@ -0,0 +1,34 @@
+use std::fs;
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::tempdir;
+
+const SECRET: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";
+
+#[test]
+fn exclude_pattern_hides_matches() -> anyhow::Result<()> {
+    let dir = tempdir()?;
+    let py = dir.path().join("foo.py");
+    let txt = dir.path().join("bar.txt");
+    fs::write(&py, format!("token = \"{}\"\n", SECRET))?;
+    fs::write(&txt, format!("token = \"{}\"\n", SECRET))?;
+
+    Command::cargo_bin("kingfisher")?
+        .args([
+            "scan",
+            dir.path().to_str().unwrap(),
+            "--confidence=low",
+            "--no-binary",
+            "--no-validate",
+            "--format",
+            "json",
+            "--exclude=*.py",
+            "--no-update-check",
+        ])
+        .assert()
+        .code(200)
+        .stdout(predicate::str::contains("bar.txt").and(predicate::str::contains("foo.py").not()));
+
+    Ok(())
+}
--- a/tests/smoke_update.rs
+++ b/tests/smoke_update.rs
@ -1,9 +1,4 @@
-use std::fs::{self, File};
-
-use flate2::{write::GzEncoder, Compression};
 use kingfisher::{cli::global::GlobalArgs, update::check_for_update};
-use tar::Builder;
-use tempfile::tempdir;
 use tokio;
 use wiremock::{
    matchers::{method, path},