forked from mirrors/kingfisher
Removed --ignore-tests argument, because the --exclude flag provides more granular functionality
This commit is contained in:
parent
601ca05fc8
commit
533fc49c54
13 changed files with 47 additions and 42 deletions
|
|
@ -10,6 +10,7 @@ All notable changes to this project will be documented in this file.
|
|||
- Added tests covering baseline and exclude workflow
|
||||
- Added validation for JWT tokens that checks `exp` and `nbf` claims
|
||||
- JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
|
||||
- Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality
|
||||
|
||||
|
||||
## [1.20.0]
|
||||
|
|
|
|||
|
|
@ -142,7 +142,12 @@ kingfisher scan /path/to/repo --rule-stats
|
|||
|
||||
```bash
|
||||
# Scan source but skip likely unit / integration tests
|
||||
kingfisher scan ./my-project --ignore-tests
|
||||
kingfisher scan ./my-project \
|
||||
--exclude='test' \
|
||||
--exclude='spec' \
|
||||
--exclude='fixture' \
|
||||
--exclude='example' \
|
||||
--exclude='sample'
|
||||
```
|
||||
|
||||
### Exclude specific paths
|
||||
|
|
@ -304,7 +309,6 @@ kingfisher github repos list --organization my-org
|
|||
- `--no-extract-archives`: Do not scan inside archives
|
||||
- `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
|
||||
- `--redact`: Replaces discovered secrets with a one-way hash for secure output
|
||||
- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
|
||||
- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax)
|
||||
- `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
|
||||
- `--manage-baseline`: Create or update the baseline file with current findings
|
||||
|
|
|
|||
|
|
@ -88,10 +88,6 @@ pub struct ScanArgs {
|
|||
#[arg(long, default_value_t = false)]
|
||||
pub no_dedup: bool,
|
||||
|
||||
/// Ignore matches that appear to come from test files
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub ignore_tests: bool,
|
||||
|
||||
/// Redact findings values using a secure hash
|
||||
#[arg(long, short = 'r', default_value_t = false)]
|
||||
pub redact: bool,
|
||||
|
|
|
|||
|
|
@ -73,11 +73,16 @@ pub struct GitBlobMetadata {
|
|||
pub struct GitRepoWithMetadataEnumerator<'a> {
|
||||
path: &'a Path,
|
||||
repo: Repository,
|
||||
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
|
||||
}
|
||||
|
||||
impl<'a> GitRepoWithMetadataEnumerator<'a> {
|
||||
pub fn new(path: &'a Path, repo: Repository) -> Self {
|
||||
Self { path, repo }
|
||||
pub fn new(
|
||||
path: &'a Path,
|
||||
repo: Repository,
|
||||
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
|
||||
) -> Self {
|
||||
Self { path, repo, exclude_globset }
|
||||
}
|
||||
|
||||
pub fn run(self) -> Result<GitRepoResult> {
|
||||
|
|
@ -173,12 +178,19 @@ impl<'a> GitRepoWithMetadataEnumerator<'a> {
|
|||
}
|
||||
let filtered = appearances
|
||||
.into_iter()
|
||||
.filter(|entry| {
|
||||
// Apply your actual ignore-logic here:
|
||||
match entry.path.to_path() {
|
||||
Ok(_path) => true,
|
||||
Err(_) => true,
|
||||
.filter(|entry| match entry.path.to_path() {
|
||||
Ok(p) => {
|
||||
if let Some(gs) = &self.exclude_globset {
|
||||
let m = gs.is_match(p);
|
||||
if m {
|
||||
debug!("Skipping {} due to --exclude", p.display());
|
||||
}
|
||||
!m
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Err(_) => true,
|
||||
})
|
||||
.collect::<SmallVec<_>>();
|
||||
if filtered.is_empty() {
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ struct EnumeratorConfig {
|
|||
enumerate_git_history: bool,
|
||||
collect_git_metadata: bool,
|
||||
repo_scan_timeout: Duration,
|
||||
// gitignore: Gitignore,
|
||||
exclude_globset: Option<std::sync::Arc<GlobSet>>,
|
||||
}
|
||||
|
||||
pub enum FoundInput {
|
||||
|
|
@ -312,6 +312,10 @@ impl FilesystemEnumerator {
|
|||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn exclude_globset(&self) -> Option<std::sync::Arc<GlobSet>> {
|
||||
self.exclude_globset.clone()
|
||||
}
|
||||
|
||||
pub fn gitignore(&self) -> Result<Gitignore> {
|
||||
Ok(self.gitignore_builder.build()?)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -300,7 +300,6 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
redact: false,
|
||||
git_repo_timeout: 1800,
|
||||
no_dedup: false,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
|
|||
|
|
@ -314,7 +314,6 @@ fn test_pretty_format_with_nan_entropy_panics() {
|
|||
// core execution / performance
|
||||
num_jobs: 1,
|
||||
no_dedup: false,
|
||||
ignore_tests: false,
|
||||
|
||||
// rule selection
|
||||
rules: RuleSpecifierArgs {
|
||||
|
|
|
|||
|
|
@ -82,9 +82,10 @@ pub fn enumerate_filesystem_inputs(
|
|||
}()
|
||||
.context("Failed to initialize filesystem enumerator")?;
|
||||
|
||||
let (enum_thread, input_recv) = {
|
||||
let (enum_thread, input_recv, exclude_globset) = {
|
||||
let fs_enumerator = make_fs_enumerator(args, input_roots.into())
|
||||
.context("Failed to initialize filesystem enumerator")?;
|
||||
let exclude_globset = fs_enumerator.as_ref().and_then(|ie| ie.exclude_globset());
|
||||
let channel_size = std::cmp::max(args.num_jobs * 128, 1024);
|
||||
|
||||
let (input_send, input_recv) = crossbeam_channel::bounded(channel_size);
|
||||
|
|
@ -97,7 +98,7 @@ pub fn enumerate_filesystem_inputs(
|
|||
Ok(())
|
||||
})
|
||||
.context("Failed to enumerate filesystem inputs")?;
|
||||
(input_enumerator_thread, input_recv)
|
||||
(input_enumerator_thread, input_recv, exclude_globset)
|
||||
};
|
||||
|
||||
let enum_cfg = EnumeratorConfig {
|
||||
|
|
@ -107,6 +108,7 @@ pub fn enumerate_filesystem_inputs(
|
|||
},
|
||||
collect_git_metadata: args.input_specifier_args.commit_metadata,
|
||||
repo_scan_timeout,
|
||||
exclude_globset,
|
||||
};
|
||||
let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs);
|
||||
let datastore_writer_thread =
|
||||
|
|
@ -189,23 +191,11 @@ pub fn enumerate_filesystem_inputs(
|
|||
Ok(Some((origin_set, blob_metadata, vec_of_matches))) => {
|
||||
for (_, single_match) in vec_of_matches {
|
||||
// Send each match
|
||||
let is_test = if args.ignore_tests {
|
||||
origin_set
|
||||
.iter()
|
||||
.filter_map(|o| o.full_path())
|
||||
.any(|p| is_test_like_path(&p))
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if !is_test {
|
||||
// Send each match
|
||||
send_ds.send((
|
||||
Arc::new(origin_set.clone()),
|
||||
Arc::new(blob_metadata.clone()),
|
||||
single_match,
|
||||
))?;
|
||||
}
|
||||
send_ds.send((
|
||||
Arc::new(origin_set.clone()),
|
||||
Arc::new(blob_metadata.clone()),
|
||||
single_match,
|
||||
))?;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
|
|
@ -604,9 +594,15 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
|
|||
// Spawn an enumerator thread so we can time-out cleanly
|
||||
let path_clone = path.to_path_buf();
|
||||
let (tx, rx) = std::sync::mpsc::channel();
|
||||
let exclude_globset = cfg.exclude_globset.clone();
|
||||
let handle = std::thread::spawn(move || {
|
||||
let res = if collect_git_metadata {
|
||||
GitRepoWithMetadataEnumerator::new(&path_clone, repository).run()
|
||||
GitRepoWithMetadataEnumerator::new(
|
||||
&path_clone,
|
||||
repository,
|
||||
exclude_globset.clone(),
|
||||
)
|
||||
.run()
|
||||
} else {
|
||||
GitRepoEnumerator::new(&path_clone, repository).run()
|
||||
};
|
||||
|
|
|
|||
|
|
@ -101,7 +101,6 @@ rules:
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup,
|
||||
ignore_tests: false,
|
||||
snippet_length: 64,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
|
|||
|
|
@ -88,7 +88,6 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
|
|||
|
|
@ -85,7 +85,6 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
|
|||
|
|
@ -144,7 +144,6 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true, // keep duplicates so the cache is stressed
|
||||
ignore_tests: false,
|
||||
snippet_length: 128,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
|
|||
|
|
@ -87,7 +87,6 @@ impl TestContext {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
@ -153,7 +152,6 @@ impl TestContext {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue