Removed --ignore-tests argument, because the --exclude flag provides more granular functionality

This commit is contained in:
Mick Grove 2025-07-14 16:55:19 -07:00
commit 533fc49c54
13 changed files with 47 additions and 42 deletions

View file

@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
- Added tests covering baseline and exclude workflow
- Added validation for JWT tokens that checks `exp` and `nbf` claims
- JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
- Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality
## [1.20.0]

View file

@ -142,7 +142,12 @@ kingfisher scan /path/to/repo --rule-stats
```bash
# Scan source but skip likely unit / integration tests
kingfisher scan ./my-project --ignore-tests
kingfisher scan ./my-project \
--exclude='test' \
--exclude='spec' \
--exclude='fixture' \
--exclude='example' \
--exclude='sample'
```
### Exclude specific paths
@ -304,7 +309,6 @@ kingfisher github repos list --organization my-org
- `--no-extract-archives`: Do not scan inside archives
- `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
- `--redact`: Replaces discovered secrets with a one-way hash for secure output
- `--ignore-tests`:Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax)
- `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
- `--manage-baseline`: Create or update the baseline file with current findings

View file

@ -88,10 +88,6 @@ pub struct ScanArgs {
#[arg(long, default_value_t = false)]
pub no_dedup: bool,
/// Ignore matches that appear to come from test files
#[arg(long, default_value_t = false)]
pub ignore_tests: bool,
/// Redact findings values using a secure hash
#[arg(long, short = 'r', default_value_t = false)]
pub redact: bool,

View file

@ -73,11 +73,16 @@ pub struct GitBlobMetadata {
pub struct GitRepoWithMetadataEnumerator<'a> {
path: &'a Path,
repo: Repository,
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
}
impl<'a> GitRepoWithMetadataEnumerator<'a> {
pub fn new(path: &'a Path, repo: Repository) -> Self {
Self { path, repo }
pub fn new(
path: &'a Path,
repo: Repository,
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
) -> Self {
Self { path, repo, exclude_globset }
}
pub fn run(self) -> Result<GitRepoResult> {
@ -173,12 +178,19 @@ impl<'a> GitRepoWithMetadataEnumerator<'a> {
}
let filtered = appearances
.into_iter()
.filter(|entry| {
// Apply your actual ignore-logic here:
match entry.path.to_path() {
Ok(_path) => true,
Err(_) => true,
.filter(|entry| match entry.path.to_path() {
Ok(p) => {
if let Some(gs) = &self.exclude_globset {
let m = gs.is_match(p);
if m {
debug!("Skipping {} due to --exclude", p.display());
}
!m
} else {
true
}
}
Err(_) => true,
})
.collect::<SmallVec<_>>();
if filtered.is_empty() {

View file

@ -82,7 +82,7 @@ struct EnumeratorConfig {
enumerate_git_history: bool,
collect_git_metadata: bool,
repo_scan_timeout: Duration,
// gitignore: Gitignore,
exclude_globset: Option<std::sync::Arc<GlobSet>>,
}
pub enum FoundInput {
@ -312,6 +312,10 @@ impl FilesystemEnumerator {
Ok(self)
}
pub fn exclude_globset(&self) -> Option<std::sync::Arc<GlobSet>> {
self.exclude_globset.clone()
}
pub fn gitignore(&self) -> Result<Gitignore> {
Ok(self.gitignore_builder.build()?)
}

View file

@ -300,7 +300,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
redact: false,
git_repo_timeout: 1800,
no_dedup: false,
ignore_tests: false,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,

View file

@ -314,7 +314,6 @@ fn test_pretty_format_with_nan_entropy_panics() {
// core execution / performance
num_jobs: 1,
no_dedup: false,
ignore_tests: false,
// rule selection
rules: RuleSpecifierArgs {

View file

@ -82,9 +82,10 @@ pub fn enumerate_filesystem_inputs(
}()
.context("Failed to initialize filesystem enumerator")?;
let (enum_thread, input_recv) = {
let (enum_thread, input_recv, exclude_globset) = {
let fs_enumerator = make_fs_enumerator(args, input_roots.into())
.context("Failed to initialize filesystem enumerator")?;
let exclude_globset = fs_enumerator.as_ref().and_then(|ie| ie.exclude_globset());
let channel_size = std::cmp::max(args.num_jobs * 128, 1024);
let (input_send, input_recv) = crossbeam_channel::bounded(channel_size);
@ -97,7 +98,7 @@ pub fn enumerate_filesystem_inputs(
Ok(())
})
.context("Failed to enumerate filesystem inputs")?;
(input_enumerator_thread, input_recv)
(input_enumerator_thread, input_recv, exclude_globset)
};
let enum_cfg = EnumeratorConfig {
@ -107,6 +108,7 @@ pub fn enumerate_filesystem_inputs(
},
collect_git_metadata: args.input_specifier_args.commit_metadata,
repo_scan_timeout,
exclude_globset,
};
let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs);
let datastore_writer_thread =
@ -189,23 +191,11 @@ pub fn enumerate_filesystem_inputs(
Ok(Some((origin_set, blob_metadata, vec_of_matches))) => {
for (_, single_match) in vec_of_matches {
// Send each match
let is_test = if args.ignore_tests {
origin_set
.iter()
.filter_map(|o| o.full_path())
.any(|p| is_test_like_path(&p))
} else {
false
};
if !is_test {
// Send each match
send_ds.send((
Arc::new(origin_set.clone()),
Arc::new(blob_metadata.clone()),
single_match,
))?;
}
send_ds.send((
Arc::new(origin_set.clone()),
Arc::new(blob_metadata.clone()),
single_match,
))?;
}
}
Err(e) => {
@ -604,9 +594,15 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
// Spawn an enumerator thread so we can time-out cleanly
let path_clone = path.to_path_buf();
let (tx, rx) = std::sync::mpsc::channel();
let exclude_globset = cfg.exclude_globset.clone();
let handle = std::thread::spawn(move || {
let res = if collect_git_metadata {
GitRepoWithMetadataEnumerator::new(&path_clone, repository).run()
GitRepoWithMetadataEnumerator::new(
&path_clone,
repository,
exclude_globset.clone(),
)
.run()
} else {
GitRepoEnumerator::new(&path_clone, repository).run()
};

View file

@ -101,7 +101,6 @@ rules:
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup,
ignore_tests: false,
snippet_length: 64,
baseline_file: None,
manage_baseline: false,

View file

@ -88,7 +88,6 @@ fn test_github_remote_scan() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
ignore_tests: false,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,

View file

@ -85,7 +85,6 @@ fn test_gitlab_remote_scan() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
ignore_tests: false,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,

View file

@ -144,7 +144,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true, // keep duplicates so the cache is stressed
ignore_tests: false,
snippet_length: 128,
baseline_file: None,
manage_baseline: false,

View file

@ -87,7 +87,6 @@ impl TestContext {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
ignore_tests: false,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,
@ -153,7 +152,6 @@ impl TestContext {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
ignore_tests: false,
snippet_length: 256,
baseline_file: None,
manage_baseline: false,