pub mod binary; pub mod blob; pub mod bstring_escape; pub mod bstring_table; pub mod cli; pub mod content_type; pub mod decompress; pub mod defaults; pub mod entropy; pub mod finding_data; pub mod findings_store; pub mod git_binary; pub mod git_commit_metadata; pub mod git_metadata_graph; mod git_repo_enumerator; pub mod git_url; pub mod github; pub mod gitlab; pub mod guesser; pub mod liquid_filters; pub mod location; pub mod matcher; pub mod origin; pub mod parser; pub mod reporter; pub mod rule_loader; pub mod rule_profiling; pub mod rules; pub mod rules_database; pub mod safe_list; pub mod scanner; pub mod scanner_pool; pub mod serde_utils; pub mod snippet; pub mod update; pub mod util; pub mod validation; use std::path::{Path, PathBuf}; use anyhow::{bail, Result}; use crossbeam_channel::Sender; pub use git_repo_enumerator::{GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator}; pub use gix::{self, Repository, ThreadSafeRepository}; use gix::{open::Options, open_opts}; pub use ignore::gitignore::{Gitignore, GitignoreBuilder}; use ignore::{DirEntry, WalkBuilder, WalkState}; use tokio::time::Duration; use tracing::debug; macro_rules! unwrap_some_or_continue { ($arg:expr, $on_error:expr $(,)?) => { match $arg { Some(v) => v, None => { #[allow(clippy::redundant_closure_call)] $on_error(); continue; } } }; } pub(crate) use unwrap_some_or_continue; macro_rules! unwrap_ok_or_continue { ($arg:expr, $on_error:expr $(,)?) => { match $arg { Ok(v) => v, Err(e) => { #[allow(clippy::redundant_closure_call)] $on_error(e); continue; } } }; } pub(crate) use unwrap_ok_or_continue; struct EnumeratorConfig { enumerate_git_history: bool, collect_git_metadata: bool, repo_scan_timeout: Duration, // gitignore: Gitignore, } pub enum FoundInput { File(FileResult), Directory(DirectoryResult), EnumeratorFile(EnumeratorFileResult), } pub struct FileResult { pub path: PathBuf, pub num_bytes: u64, pub extract_archives: bool, pub extraction_depth: usize, } pub struct EnumeratorFileResult { pub path: PathBuf, } pub struct DirectoryResult { pub path: PathBuf, } pub type Output = Sender; struct VisitorBuilder<'t> { max_file_size: Option, extract_archives: bool, extraction_depth: usize, output: &'t Output, } impl<'s, 't> ignore::ParallelVisitorBuilder<'s> for VisitorBuilder<'t> where 't: 's, { fn build(&mut self) -> Box { Box::new(Visitor { max_file_size: self.max_file_size, extract_archives: self.extract_archives, extraction_depth: self.extraction_depth, output: self.output, }) } } struct Visitor<'t> { max_file_size: Option, extract_archives: bool, extraction_depth: usize, output: &'t Output, } impl<'t> Visitor<'t> { #[inline] fn file_too_big(&self, size: u64) -> bool { match self.max_file_size { Some(max_size) => size > max_size, None => false, } } fn found_file(&self, r: FileResult) { let _ = self.output.send(FoundInput::File(r)); } fn found_directory(&self, r: DirectoryResult) { let _ = self.output.send(FoundInput::Directory(r)); } } impl<'t> ignore::ParallelVisitor for Visitor<'t> { fn visit(&mut self, result: Result) -> ignore::WalkState { let entry = match result { Ok(e) => e, Err(e) => { debug!("Skipping entry: {e}"); return WalkState::Continue; } }; let path = entry.path(); let metadata = match entry.metadata() { Ok(md) => md, Err(e) => { debug!("Skipping {}: {e}", path.display()); return WalkState::Continue; } }; if metadata.is_file() { let num_bytes = metadata.len(); if self.file_too_big(num_bytes) { debug!("Skipping {}: size {num_bytes} too big", path.display()); } else { self.found_file(FileResult { path: path.to_owned(), num_bytes, extract_archives: self.extract_archives, extraction_depth: self.extraction_depth, }); } } else if metadata.is_dir() { self.found_directory(DirectoryResult { path: path.to_owned() }); } else if metadata.is_symlink() { // Ignored; if follow_symlinks was set, we'd see the pointed-to entry instead. } else { debug!("Unhandled type for {}", path.display()); } WalkState::Continue } } pub struct FilesystemEnumerator { walk_builder: WalkBuilder, gitignore_builder: GitignoreBuilder, max_file_size: Option, collect_git_metadata: bool, enumerate_git_history: bool, extract_archives: bool, extraction_depth: usize, no_dedup: bool, } impl FilesystemEnumerator { pub const DEFAULT_ENUMERATE_GIT_HISTORY: bool = true; pub const DEFAULT_FOLLOW_LINKS: bool = false; pub const DEFAULT_MAX_FILESIZE: u64 = 100 * 1024 * 1024; pub fn new>(inputs: &[T], args: &cli::commands::scan::ScanArgs) -> Result { if inputs.is_empty() { bail!("No inputs provided"); } let mut builder = WalkBuilder::new(&inputs[0]); for input in &inputs[1..] { builder.add(input); } let max_file_size = args.content_filtering_args.max_file_size_bytes(); builder.follow_links(Self::DEFAULT_FOLLOW_LINKS); builder.max_filesize(max_file_size); builder.standard_filters(false); Ok(Self { walk_builder: builder, gitignore_builder: GitignoreBuilder::new(""), max_file_size, collect_git_metadata: args.input_specifier_args.commit_metadata, enumerate_git_history: Self::DEFAULT_ENUMERATE_GIT_HISTORY, extract_archives: !args.content_filtering_args.no_extract_archives, extraction_depth: args.content_filtering_args.extraction_depth as usize, no_dedup: args.no_dedup, }) } pub fn no_dedup(&mut self, no_dedup: bool) -> &mut Self { self.no_dedup = no_dedup; self } pub fn threads(&mut self, threads: usize) -> &mut Self { self.walk_builder.threads(threads); self } pub fn add_ignore>(&mut self, path: T) -> Result<&mut Self> { let path = path.as_ref(); if let Some(e) = self.gitignore_builder.add(path) { Err(e)?; } if let Some(e) = self.walk_builder.add_ignore(path) { Err(e)?; } Ok(self) } pub fn follow_links(&mut self, follow_links: bool) -> &mut Self { self.walk_builder.follow_links(follow_links); self } pub fn max_filesize(&mut self, max_filesize: Option) -> &mut Self { self.walk_builder.max_filesize(max_filesize); self.max_file_size = max_filesize; self } pub fn collect_git_metadata(&mut self, collect: bool) -> &mut Self { self.collect_git_metadata = collect; self } pub fn enumerate_git_history(&mut self, enumerate: bool) -> &mut Self { self.enumerate_git_history = enumerate; self } pub fn filter_entry

(&mut self, filter: P) -> &mut Self where P: Fn(&DirEntry) -> bool + Send + Sync + 'static, { self.walk_builder.filter_entry(filter); self } pub fn gitignore(&self) -> Result { Ok(self.gitignore_builder.build()?) } pub fn run(&self, output: Output) -> Result<()> { let mut visitor_builder = VisitorBuilder { max_file_size: self.max_file_size, extract_archives: self.extract_archives, extraction_depth: self.extraction_depth, output: &output, }; self.walk_builder.build_parallel().visit(&mut visitor_builder); Ok(()) } } /// Opens the given Git repository if it exists, returning None if not. pub fn open_git_repo(path: &Path) -> Result> { let opts = Options::isolated().open_path_as_is(true); // <- OK now match open_opts(path, opts) { Err(gix::open::Error::NotARepository { .. }) => Ok(None), Err(err) => Err(err.into()), Ok(repo) => Ok(Some(repo)), } }