forked from mirrors/kingfisher
commit
9bee9e6b5d
29 changed files with 743 additions and 110 deletions
11
CHANGELOG.md
11
CHANGELOG.md
|
|
@ -2,6 +2,17 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.21.0]
|
||||
- Improved Azure Storage rule
|
||||
- Added rule to detect TravisCI encrypted values
|
||||
- Added baseline feature with `--baseline-file` and `--manage-baseline` flags
|
||||
- Introduced `--exclude` option for skipping paths
|
||||
- Added tests covering baseline and exclude workflow
|
||||
- Added validation for JWT tokens that checks `exp` and `nbf` claims
|
||||
- JWT validation performs OpenID Connect discovery using the `iss` claim and verifies signatures via JWKS
|
||||
- Removed `--ignore-tests` argument, because the `--exclude` flag provides more granular functionality
|
||||
- DigitalOcean rule update
|
||||
- Adafruit rule update
|
||||
|
||||
## [1.20.0]
|
||||
- Removed confirmation prompt when user provides --self-update flag
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.20.0"
|
||||
version = "1.21.0"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
license.workspace = true
|
||||
|
|
@ -161,6 +161,9 @@ trust-dns-resolver = { version = "0.23.2", default-features = false, features
|
|||
atty = "0.2.14"
|
||||
self_update = { version = "0.42.0", default-features = false, features = ["rustls", "archive-tar", "archive-zip", "compression-flate2"] }
|
||||
semver = "1.0.26"
|
||||
globset = "0.4.16"
|
||||
jsonwebtoken = "9.3.1"
|
||||
ipnet = "2.11.0"
|
||||
|
||||
[dependencies.tikv-jemallocator]
|
||||
version = "0.6"
|
||||
|
|
|
|||
54
README.md
54
README.md
|
|
@ -140,9 +140,34 @@ kingfisher scan /path/to/repo --rule-stats
|
|||
|
||||
### Scan while ignoring likely test files
|
||||
|
||||
`--exclude` skips any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive)
|
||||
|
||||
```bash
|
||||
# Scan source but skip likely unit / integration tests
|
||||
kingfisher scan ./my-project --ignore-tests
|
||||
kingfisher scan ./my-project \
|
||||
--exclude='[Tt]est' \
|
||||
--exclude='spec' \
|
||||
--exclude='[Ff]ixture' \
|
||||
--exclude='example' \
|
||||
--exclude='sample'
|
||||
```
|
||||
|
||||
### Exclude specific paths
|
||||
```bash
|
||||
# Skip all Python files and any directory named tests
|
||||
kingfisher scan ./my-project \
|
||||
--exclude '*.py' \
|
||||
--exclude '[Tt]ests'
|
||||
```
|
||||
|
||||
If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via --exclude):
|
||||
|
||||
```bash
|
||||
# Skip all Python files and any directory named tests, and report to stderr any skipped files
|
||||
kingfisher scan ./my-project \
|
||||
--exclude '*.py' \
|
||||
--exclude tests \
|
||||
-v
|
||||
```
|
||||
|
||||
---
|
||||
|
|
@ -286,7 +311,32 @@ kingfisher github repos list --organization my-org
|
|||
- `--no-extract-archives`: Do not scan inside archives
|
||||
- `--extraction-depth <N>`: Specifies how deep nested archives should be extracted and scanned (default: 2)
|
||||
- `--redact`: Replaces discovered secrets with a one-way hash for secure output
|
||||
- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
|
||||
- `--exclude <PATTERN>`: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive)
|
||||
- `--baseline-file <FILE>`: Ignore matches listed in a baseline YAML file
|
||||
- `--manage-baseline`: Create or update the baseline file with current findings
|
||||
|
||||
## Build a Baseline / Detect New Secrets
|
||||
|
||||
There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list.
|
||||
|
||||
The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches):
|
||||
|
||||
```bash
|
||||
kingfisher scan /path/to/code \
|
||||
--confidence low \
|
||||
--manage-baseline \
|
||||
--baseline-file ./baseline-file.yml
|
||||
```
|
||||
|
||||
Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings:
|
||||
|
||||
```bash
|
||||
kingfisher scan /path/to/code \
|
||||
--baseline-file /path/to/baseline-file.yaml
|
||||
```
|
||||
|
||||
See ([docs/BASELINE.md](docs/BASELINE.md)) for full detail.
|
||||
|
||||
|
||||
## Finding Fingerprint
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
rules:
|
||||
- name: Azure Storage Account Name
|
||||
id: kingfisher.azurestorage.name.1
|
||||
id: kingfisher.azurestorage.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
(?:
|
||||
\b
|
||||
azure
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?i:
|
||||
(?:Account|Storage)
|
||||
(?:[._-]Account)?
|
||||
|
|
@ -19,13 +22,16 @@ rules:
|
|||
visible: false
|
||||
confidence: medium
|
||||
examples:
|
||||
- storage_name=mystorageaccount123
|
||||
- azure_storage_name=mystorageaccount123
|
||||
- mystorageaccount.blob.core.windows.net
|
||||
|
||||
- name: Azure Storage Account Key
|
||||
id: kingfisher.azurestorage.key.1
|
||||
id: kingfisher.azurestorage.2
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
azure
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?i:(?:Access|Account|Storage)[_.-]?Key)
|
||||
(?:.|[\n\r]){0,25}?
|
||||
(
|
||||
|
|
@ -34,9 +40,9 @@ rules:
|
|||
min_entropy: 4.0
|
||||
confidence: medium
|
||||
examples:
|
||||
- AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\
|
||||
- Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\
|
||||
validation:
|
||||
type: AzureStorage
|
||||
depends_on_rule:
|
||||
- rule_id: kingfisher.azurestorage.name.1
|
||||
- rule_id: kingfisher.azurestorage.1
|
||||
variable: AZURENAME
|
||||
|
|
|
|||
|
|
@ -22,4 +22,6 @@ rules:
|
|||
- https://datatracker.ietf.org/doc/html/rfc7519
|
||||
- https://en.wikipedia.org/wiki/Base64#URL_applications
|
||||
- https://datatracker.ietf.org/doc/html/rfc4648
|
||||
- https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen
|
||||
- https://developer.okta.com/blog/2018/06/20/what-happens-if-your-jwt-is-stolen
|
||||
validation:
|
||||
type: JWT
|
||||
|
|
@ -31,3 +31,24 @@ rules:
|
|||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- name: Travis CI Encrypted Variable
|
||||
id: kingfisher.travisci.1
|
||||
pattern: |
|
||||
(?xis)
|
||||
\b
|
||||
(?:language|env|deploy|script):[\r\n]
|
||||
(?:.|[\\n\r]){0,256}?
|
||||
\b
|
||||
(
|
||||
secure:\s*"?[A-Za-z0-9+/=\\]+"?\s*
|
||||
)
|
||||
\b
|
||||
min_entropy: 3.0
|
||||
confidence: medium
|
||||
examples:
|
||||
- |
|
||||
env:
|
||||
global:
|
||||
# This sets FOO=super-secret, but the plaintext never appears here.
|
||||
- secure: "VJh0l9gOb+6AVNDk6cziZSs1AqVM8CqtZU6ot9ZQeJ+KfL1pxnGQ4qQF8Cz9\M1q85c3l1N1+qkQ0uV12QG6O6ylq6Qq1l3VjAJM3h2pY3jdmrA8kX2ZIxRjC/\8+Xj1wVtKQ0R+owM/6i5Y6cyx4hRb3VvSeYlC0lD1iTzQ2vgMyE="
|
||||
|
||||
|
|
|
|||
55
docs/BASELINE.md
Normal file
55
docs/BASELINE.md
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
|
||||
# Build a Baseline / Detect Only New Secrets
|
||||
|
||||
There are situations where a repository already contains checked‑in secrets, but you want to ensure no **new** secrets are introduced. A baseline file lets you document the known findings so future scans only report anything that is not already in that list.
|
||||
|
||||
The easiest way to create a baseline is to run a normal scan with the `--manage-baseline` flag (typically at a low confidence level to capture all potential matches):
|
||||
|
||||
```bash
|
||||
kingfisher scan /path/to/code \
|
||||
--confidence low \
|
||||
--manage-baseline \
|
||||
--baseline-file ./baseline-file.yml
|
||||
```
|
||||
|
||||
This generates a YAML file named `baseline-file.yml` in the current directory. The file tracks each finding under an `ExactFindings` section:
|
||||
|
||||
```yaml
|
||||
ExactFindings:
|
||||
matches:
|
||||
- filepath: ruby_vulnerable.rb/
|
||||
fingerprint: 056876f00ffd0622
|
||||
linenum: 52
|
||||
lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
|
||||
- filepath: ruby_vulnerable.rb/
|
||||
fingerprint: ce41d19b83b2b1b0
|
||||
linenum: 53
|
||||
lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
|
||||
- filepath: ruby_vulnerable.rb/
|
||||
fingerprint: e8644d91fa6654f5
|
||||
linenum: 40
|
||||
lastupdated: Mon, 14 Jul 2025 10:17:56 -0700
|
||||
```
|
||||
|
||||
`fingerprint` reuses Kingfisher's 64-bit *finding fingerprint* algorithm with offsets set to zero. It hashes the secret value together with the normalized filepath, so moving a secret around does not create a new entry.
|
||||
|
||||
Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings:
|
||||
|
||||
```bash
|
||||
kingfisher scan /path/to/code \
|
||||
--baseline-file /path/to/baseline-file.yaml
|
||||
```
|
||||
|
||||
If you intentionally add a new secret that should be ignored later, rerun the scan with both `--manage-baseline` and `--baseline-file` to append new matches to the file without removing existing entries:
|
||||
|
||||
```bash
|
||||
kingfisher scan /path/to/code \
|
||||
--manage-baseline \
|
||||
--baseline-file /path/to/baseline-file.yml
|
||||
```
|
||||
|
||||
If you want to know which files are being skipped, enable verbose debugging (-v) when scanning, which will report any files being skipped by the baseline file (or via `--exclude`):
|
||||
|
||||
```bash
|
||||
kingfisher scan /path/to/project -v
|
||||
```
|
||||
115
src/baseline.rs
Normal file
115
src/baseline.rs
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
use std::{
|
||||
collections::HashSet,
|
||||
fs,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use chrono::Local;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint};
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct BaselineFile {
|
||||
#[serde(rename = "ExactFindings", default)]
|
||||
pub exact_findings: ExactFindings,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Serialize, Deserialize)]
|
||||
pub struct ExactFindings {
|
||||
#[serde(default)]
|
||||
pub matches: Vec<BaselineFinding>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Clone)]
|
||||
pub struct BaselineFinding {
|
||||
pub filepath: String,
|
||||
pub fingerprint: String,
|
||||
pub linenum: usize,
|
||||
pub lastupdated: String,
|
||||
}
|
||||
|
||||
pub fn load_baseline(path: &Path) -> Result<BaselineFile> {
|
||||
let data = fs::read_to_string(path).context("read baseline file")?;
|
||||
Ok(serde_yaml::from_str(&data).context("parse baseline yaml")?)
|
||||
}
|
||||
|
||||
pub fn save_baseline(path: &Path, baseline: &BaselineFile) -> Result<()> {
|
||||
let data = serde_yaml::to_string(baseline).context("serialize baseline")?;
|
||||
fs::write(path, data).context("write baseline file")
|
||||
}
|
||||
|
||||
fn normalize_path(p: &Path, roots: &[PathBuf]) -> String {
|
||||
for root in roots {
|
||||
if let Ok(stripped) = p.strip_prefix(root) {
|
||||
if let Some(name) = root.file_name() {
|
||||
return PathBuf::from(name).join(stripped).to_string_lossy().into();
|
||||
}
|
||||
}
|
||||
}
|
||||
p.to_string_lossy().into()
|
||||
}
|
||||
|
||||
fn compute_hash(secret: &str, path: &str) -> String {
|
||||
let fp = compute_finding_fingerprint(secret, path, 0, 0);
|
||||
format!("{:016x}", fp)
|
||||
}
|
||||
|
||||
fn extract_secret(m: &crate::matcher::Match) -> String {
|
||||
m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| m.groups.captures.get(0))
|
||||
.map(|c| c.value.to_string())
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
pub fn apply_baseline(
|
||||
store: &mut FindingsStore,
|
||||
baseline_path: &Path,
|
||||
manage: bool,
|
||||
roots: &[PathBuf],
|
||||
) -> Result<()> {
|
||||
let mut baseline = if baseline_path.exists() {
|
||||
load_baseline(baseline_path)?
|
||||
} else {
|
||||
BaselineFile::default()
|
||||
};
|
||||
|
||||
let mut known: HashSet<String> =
|
||||
baseline.exact_findings.matches.iter().map(|m| m.fingerprint.clone()).collect();
|
||||
|
||||
let mut new_entries = Vec::new();
|
||||
for arc_msg in store.get_matches_mut() {
|
||||
let (origin, _blob, m) = Arc::make_mut(arc_msg);
|
||||
let file_path = origin.iter().filter_map(|o| o.full_path()).next();
|
||||
if let Some(fp) = file_path {
|
||||
let normalized = normalize_path(&fp, roots);
|
||||
let secret = extract_secret(m);
|
||||
let hash = compute_hash(&secret, &normalized);
|
||||
if known.contains(&hash) {
|
||||
debug!("Skipping {} due to baseline (hash {})", normalized, hash);
|
||||
m.visible = false;
|
||||
} else if manage {
|
||||
known.insert(hash.clone());
|
||||
let entry = BaselineFinding {
|
||||
filepath: normalized,
|
||||
fingerprint: hash,
|
||||
linenum: m.location.source_span.start.line,
|
||||
lastupdated: Local::now().to_rfc2822(),
|
||||
};
|
||||
new_entries.push(entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if manage && !new_entries.is_empty() {
|
||||
baseline.exact_findings.matches.extend(new_entries);
|
||||
save_baseline(baseline_path, &baseline)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -111,9 +111,13 @@ pub struct ContentFilteringArgs {
|
|||
#[arg(long("max-file-size"), default_value_t = 25.0)]
|
||||
pub max_file_size_mb: f64,
|
||||
|
||||
/// Use custom path-based ignore rules from the given file(s)
|
||||
#[arg(long, short, value_hint = ValueHint::FilePath)]
|
||||
pub ignore: Vec<PathBuf>,
|
||||
// /// Use custom path-based ignore rules from the given file(s)
|
||||
// #[arg(long, short, value_hint = ValueHint::FilePath)]
|
||||
// pub ignore: Vec<PathBuf>,
|
||||
/// Skip any file or directory whose path matches this glob pattern. Multiple
|
||||
/// patterns may be provided by repeating the flag.
|
||||
#[arg(long, value_name = "PATTERN")]
|
||||
pub exclude: Vec<String>,
|
||||
|
||||
/// If true, do NOT extract archive files
|
||||
#[arg(long("no-extract-archives"), default_value_t = false)]
|
||||
|
|
|
|||
|
|
@ -88,10 +88,6 @@ pub struct ScanArgs {
|
|||
#[arg(long, default_value_t = false)]
|
||||
pub no_dedup: bool,
|
||||
|
||||
/// Ignore matches that appear to come from test files
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub ignore_tests: bool,
|
||||
|
||||
/// Redact findings values using a secure hash
|
||||
#[arg(long, short = 'r', default_value_t = false)]
|
||||
pub redact: bool,
|
||||
|
|
@ -106,6 +102,14 @@ pub struct ScanArgs {
|
|||
/// Bytes of context before and after each match
|
||||
#[arg(long, default_value_t = 256, value_name = "BYTES")]
|
||||
pub snippet_length: usize,
|
||||
|
||||
/// Baseline file to filter known secrets
|
||||
#[arg(long, value_name = "FILE")]
|
||||
pub baseline_file: Option<std::path::PathBuf>,
|
||||
|
||||
/// Create or update the baseline file with current findings
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub manage_baseline: bool,
|
||||
}
|
||||
|
||||
/// Confidence levels for findings
|
||||
|
|
|
|||
|
|
@ -147,9 +147,9 @@ impl RepositoryIndex {
|
|||
.with_ordering(Ordering::PackLexicographicalThenLooseLexicographical)
|
||||
{
|
||||
let oid = unwrap_ok_or_continue!(oid, |e| debug!("Failed to read object id: {e}"));
|
||||
if count % 100000 == 0 {
|
||||
debug!("Indexed {} objects in RepositoryIndex::new", count);
|
||||
}
|
||||
// if count % 100000 == 0 {
|
||||
// debug!("Indexed {} objects in RepositoryIndex::new", count);
|
||||
// }
|
||||
let hdr = unwrap_ok_or_continue!(odb.header(oid), |e| {
|
||||
debug!("Failed to read object header for {oid}: {e}")
|
||||
});
|
||||
|
|
|
|||
|
|
@ -73,11 +73,16 @@ pub struct GitBlobMetadata {
|
|||
pub struct GitRepoWithMetadataEnumerator<'a> {
|
||||
path: &'a Path,
|
||||
repo: Repository,
|
||||
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
|
||||
}
|
||||
|
||||
impl<'a> GitRepoWithMetadataEnumerator<'a> {
|
||||
pub fn new(path: &'a Path, repo: Repository) -> Self {
|
||||
Self { path, repo }
|
||||
pub fn new(
|
||||
path: &'a Path,
|
||||
repo: Repository,
|
||||
exclude_globset: Option<std::sync::Arc<globset::GlobSet>>,
|
||||
) -> Self {
|
||||
Self { path, repo, exclude_globset }
|
||||
}
|
||||
|
||||
pub fn run(self) -> Result<GitRepoResult> {
|
||||
|
|
@ -173,12 +178,19 @@ impl<'a> GitRepoWithMetadataEnumerator<'a> {
|
|||
}
|
||||
let filtered = appearances
|
||||
.into_iter()
|
||||
.filter(|entry| {
|
||||
// Apply your actual ignore-logic here:
|
||||
match entry.path.to_path() {
|
||||
Ok(_path) => true,
|
||||
Err(_) => true,
|
||||
.filter(|entry| match entry.path.to_path() {
|
||||
Ok(p) => {
|
||||
if let Some(gs) = &self.exclude_globset {
|
||||
let m = gs.is_match(p);
|
||||
if m {
|
||||
debug!("Skipping {} due to --exclude", p.display());
|
||||
}
|
||||
!m
|
||||
} else {
|
||||
true
|
||||
}
|
||||
}
|
||||
Err(_) => true,
|
||||
})
|
||||
.collect::<SmallVec<_>>();
|
||||
if filtered.is_empty() {
|
||||
|
|
|
|||
31
src/lib.rs
31
src/lib.rs
|
|
@ -1,3 +1,4 @@
|
|||
pub mod baseline;
|
||||
pub mod binary;
|
||||
pub mod blob;
|
||||
pub mod bstring_escape;
|
||||
|
|
@ -43,6 +44,7 @@ use crossbeam_channel::Sender;
|
|||
pub use git_repo_enumerator::{GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator};
|
||||
pub use gix::{self, Repository, ThreadSafeRepository};
|
||||
use gix::{open::Options, open_opts};
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
pub use ignore::gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::{DirEntry, WalkBuilder, WalkState};
|
||||
use tokio::time::Duration;
|
||||
|
|
@ -80,7 +82,7 @@ struct EnumeratorConfig {
|
|||
enumerate_git_history: bool,
|
||||
collect_git_metadata: bool,
|
||||
repo_scan_timeout: Duration,
|
||||
// gitignore: Gitignore,
|
||||
exclude_globset: Option<std::sync::Arc<GlobSet>>,
|
||||
}
|
||||
|
||||
pub enum FoundInput {
|
||||
|
|
@ -204,6 +206,7 @@ pub struct FilesystemEnumerator {
|
|||
extract_archives: bool,
|
||||
extraction_depth: usize,
|
||||
no_dedup: bool,
|
||||
exclude_globset: Option<std::sync::Arc<GlobSet>>,
|
||||
}
|
||||
|
||||
impl FilesystemEnumerator {
|
||||
|
|
@ -234,6 +237,7 @@ impl FilesystemEnumerator {
|
|||
extract_archives: !args.content_filtering_args.no_extract_archives,
|
||||
extraction_depth: args.content_filtering_args.extraction_depth as usize,
|
||||
no_dedup: args.no_dedup,
|
||||
exclude_globset: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -287,6 +291,31 @@ impl FilesystemEnumerator {
|
|||
self
|
||||
}
|
||||
|
||||
pub fn set_exclude_patterns(&mut self, patterns: &[String]) -> Result<&mut Self> {
|
||||
if patterns.is_empty() {
|
||||
return Ok(self);
|
||||
}
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
for pat in patterns {
|
||||
builder.add(Glob::new(pat)?);
|
||||
}
|
||||
let globset = std::sync::Arc::new(builder.build()?);
|
||||
self.exclude_globset = Some(globset.clone());
|
||||
self.filter_entry(move |entry| {
|
||||
let path = entry.path();
|
||||
let matches = globset.is_match(path);
|
||||
if matches {
|
||||
debug!("Skipping {} due to --exclude", path.display());
|
||||
}
|
||||
!matches
|
||||
});
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
pub fn exclude_globset(&self) -> Option<std::sync::Arc<GlobSet>> {
|
||||
self.exclude_globset.clone()
|
||||
}
|
||||
|
||||
pub fn gitignore(&self) -> Result<Gitignore> {
|
||||
Ok(self.gitignore_builder.build()?)
|
||||
}
|
||||
|
|
|
|||
20
src/main.rs
20
src/main.rs
|
|
@ -132,7 +132,18 @@ pub fn determine_exit_code(datastore: &Arc<Mutex<findings_store::FindingsStore>>
|
|||
// exit with code 0 if there are NO findings discovered
|
||||
let ds = datastore.lock().unwrap();
|
||||
// Get all matches
|
||||
let all_matches = ds.get_matches();
|
||||
// let all_matches = ds.get_matches();
|
||||
|
||||
// Only consider visible matches when determining the exit code
|
||||
let all_matches = ds
|
||||
.get_matches()
|
||||
.iter()
|
||||
.filter(|msg| {
|
||||
let (_, _, match_item) = &***msg;
|
||||
match_item.visible
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if all_matches.is_empty() {
|
||||
// No findings discovered
|
||||
0
|
||||
|
|
@ -141,7 +152,7 @@ pub fn determine_exit_code(datastore: &Arc<Mutex<findings_store::FindingsStore>>
|
|||
let validated_matches = all_matches
|
||||
.iter()
|
||||
.filter(|msg| {
|
||||
let (_, _, match_item) = &***msg;
|
||||
let (_, _, match_item) = &****msg;
|
||||
match_item.validation_success
|
||||
})
|
||||
.count();
|
||||
|
|
@ -278,7 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
max_file_size_mb: 25.0,
|
||||
no_extract_archives: true,
|
||||
extraction_depth: 2,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
no_binary: true,
|
||||
},
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
|
|
@ -289,8 +300,9 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
redact: false,
|
||||
git_repo_timeout: 1800,
|
||||
no_dedup: false,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -383,13 +383,13 @@ mod tests {
|
|||
rules::rule::Confidence,
|
||||
util::intern,
|
||||
};
|
||||
|
||||
fn create_default_args() -> cli::commands::scan::ScanArgs {
|
||||
use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope
|
||||
|
||||
cli::commands::scan::ScanArgs {
|
||||
num_jobs: 1,
|
||||
no_dedup: false,
|
||||
ignore_tests: false,
|
||||
rules: RuleSpecifierArgs {
|
||||
rules_path: Vec::new(),
|
||||
rule: vec!["all".into()],
|
||||
|
|
@ -424,7 +424,7 @@ mod tests {
|
|||
max_file_size_mb: 25.0,
|
||||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
no_binary: true,
|
||||
},
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
|
|
@ -436,6 +436,8 @@ mod tests {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -163,7 +163,9 @@ impl<'a> Display for PrettyFinding<'a> {
|
|||
writeln!(f, " |Fingerprint...: {}", rm.m.finding_fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", rm.match_confidence.to_string())?;
|
||||
writeln!(f, " |Entropy.......: {:.2}", rm.m.calculated_entropy)?;
|
||||
let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16() {
|
||||
let validation_status = if rm.validation_response_status == StatusCode::CONTINUE.as_u16()
|
||||
|| rm.validation_response_status == StatusCode::PRECONDITION_REQUIRED.as_u16()
|
||||
{
|
||||
"Not Attempted".to_string()
|
||||
} else if rm.validation_success {
|
||||
"Active Credential".to_string()
|
||||
|
|
@ -312,7 +314,6 @@ fn test_pretty_format_with_nan_entropy_panics() {
|
|||
// core execution / performance
|
||||
num_jobs: 1,
|
||||
no_dedup: false,
|
||||
ignore_tests: false,
|
||||
|
||||
// rule selection
|
||||
rules: RuleSpecifierArgs {
|
||||
|
|
@ -349,7 +350,7 @@ fn test_pretty_format_with_nan_entropy_panics() {
|
|||
max_file_size_mb: 25.0,
|
||||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
no_binary: true,
|
||||
},
|
||||
|
||||
|
|
@ -367,6 +368,8 @@ fn test_pretty_format_with_nan_entropy_panics() {
|
|||
|
||||
// display
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
// This will panic if the entropy isn't checked for NaN
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ pub enum Validation {
|
|||
GCP,
|
||||
MongoDB,
|
||||
Postgres,
|
||||
JWT,
|
||||
Raw(String),
|
||||
Http(HttpValidation),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ use crate::{
|
|||
util::is_compressed_file,
|
||||
},
|
||||
scanner_pool::ScannerPool,
|
||||
util::is_test_like_path,
|
||||
EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator, FoundInput,
|
||||
GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator, PathBuf,
|
||||
};
|
||||
|
|
@ -82,9 +81,10 @@ pub fn enumerate_filesystem_inputs(
|
|||
}()
|
||||
.context("Failed to initialize filesystem enumerator")?;
|
||||
|
||||
let (enum_thread, input_recv) = {
|
||||
let (enum_thread, input_recv, exclude_globset) = {
|
||||
let fs_enumerator = make_fs_enumerator(args, input_roots.into())
|
||||
.context("Failed to initialize filesystem enumerator")?;
|
||||
let exclude_globset = fs_enumerator.as_ref().and_then(|ie| ie.exclude_globset());
|
||||
let channel_size = std::cmp::max(args.num_jobs * 128, 1024);
|
||||
|
||||
let (input_send, input_recv) = crossbeam_channel::bounded(channel_size);
|
||||
|
|
@ -97,7 +97,7 @@ pub fn enumerate_filesystem_inputs(
|
|||
Ok(())
|
||||
})
|
||||
.context("Failed to enumerate filesystem inputs")?;
|
||||
(input_enumerator_thread, input_recv)
|
||||
(input_enumerator_thread, input_recv, exclude_globset)
|
||||
};
|
||||
|
||||
let enum_cfg = EnumeratorConfig {
|
||||
|
|
@ -107,6 +107,7 @@ pub fn enumerate_filesystem_inputs(
|
|||
},
|
||||
collect_git_metadata: args.input_specifier_args.commit_metadata,
|
||||
repo_scan_timeout,
|
||||
exclude_globset,
|
||||
};
|
||||
let (send_ds, recv_ds) = create_datastore_channel(args.num_jobs);
|
||||
let datastore_writer_thread =
|
||||
|
|
@ -189,23 +190,11 @@ pub fn enumerate_filesystem_inputs(
|
|||
Ok(Some((origin_set, blob_metadata, vec_of_matches))) => {
|
||||
for (_, single_match) in vec_of_matches {
|
||||
// Send each match
|
||||
let is_test = if args.ignore_tests {
|
||||
origin_set
|
||||
.iter()
|
||||
.filter_map(|o| o.full_path())
|
||||
.any(|p| is_test_like_path(&p))
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if !is_test {
|
||||
// Send each match
|
||||
send_ds.send((
|
||||
Arc::new(origin_set.clone()),
|
||||
Arc::new(blob_metadata.clone()),
|
||||
single_match,
|
||||
))?;
|
||||
}
|
||||
send_ds.send((
|
||||
Arc::new(origin_set.clone()),
|
||||
Arc::new(blob_metadata.clone()),
|
||||
single_match,
|
||||
))?;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
|
|
@ -246,13 +235,7 @@ fn make_fs_enumerator(
|
|||
// Pass no_dedup when enumerating git history
|
||||
ie.no_dedup(args.no_dedup);
|
||||
|
||||
// Load any specified ignore files
|
||||
for ignore_path in args.content_filtering_args.ignore.iter() {
|
||||
debug!("Using ignore rules from {}", ignore_path.display());
|
||||
ie.add_ignore(ignore_path).with_context(|| {
|
||||
format!("Failed to load ignore rules from {}", ignore_path.display())
|
||||
})?;
|
||||
}
|
||||
ie.set_exclude_patterns(&args.content_filtering_args.exclude)?;
|
||||
// Determine whether to collect git metadata or not
|
||||
let collect_git_metadata = false;
|
||||
ie.collect_git_metadata(collect_git_metadata);
|
||||
|
|
@ -610,9 +593,15 @@ impl<'cfg> ParallelBlobIterator for (&'cfg EnumeratorConfig, FoundInput) {
|
|||
// Spawn an enumerator thread so we can time-out cleanly
|
||||
let path_clone = path.to_path_buf();
|
||||
let (tx, rx) = std::sync::mpsc::channel();
|
||||
let exclude_globset = cfg.exclude_globset.clone();
|
||||
let handle = std::thread::spawn(move || {
|
||||
let res = if collect_git_metadata {
|
||||
GitRepoWithMetadataEnumerator::new(&path_clone, repository).run()
|
||||
GitRepoWithMetadataEnumerator::new(
|
||||
&path_clone,
|
||||
repository,
|
||||
exclude_globset.clone(),
|
||||
)
|
||||
.run()
|
||||
} else {
|
||||
GitRepoEnumerator::new(&path_clone, repository).run()
|
||||
};
|
||||
|
|
|
|||
|
|
@ -100,6 +100,16 @@ pub async fn run_async_scan(
|
|||
ds.replace_matches(deduped_arcs);
|
||||
}
|
||||
|
||||
// If baseline management is enabled, apply the baseline
|
||||
if args.baseline_file.is_some() || args.manage_baseline {
|
||||
let path = args
|
||||
.baseline_file
|
||||
.clone()
|
||||
.unwrap_or_else(|| std::path::PathBuf::from("baseline-file.yaml"));
|
||||
let mut ds = datastore.lock().unwrap();
|
||||
crate::baseline::apply_baseline(&mut ds, &path, args.manage_baseline, &input_roots)?;
|
||||
}
|
||||
|
||||
// If validation is enabled, run it as a second phase
|
||||
if !args.no_validate {
|
||||
info!("Starting secret validation phase...");
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ mod aws;
|
|||
mod azure;
|
||||
mod gcp;
|
||||
mod httpvalidation;
|
||||
mod jwt;
|
||||
mod mongodb;
|
||||
mod postgres;
|
||||
mod utils;
|
||||
|
|
@ -58,35 +59,6 @@ pub fn init_validation_caches() {
|
|||
IN_FLIGHT.set(DashMap::new()).ok();
|
||||
}
|
||||
|
||||
// #[derive(Clone, FilterReflection, ParseFilter)]
|
||||
// #[filter(
|
||||
// name = "b64enc",
|
||||
// description = "Encodes the input string using Base64 encoding",
|
||||
// parsed(B64EncFilter)
|
||||
// )]
|
||||
// pub struct B64EncFilterParser;
|
||||
|
||||
// #[derive(Debug, Default, Clone)]
|
||||
// pub struct B64EncFilter;
|
||||
|
||||
// impl std::fmt::Display for B64EncFilter {
|
||||
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// write!(f, "b64enc")
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl Filter for B64EncFilter {
|
||||
// fn evaluate(
|
||||
// &self,
|
||||
// input: &dyn ValueView,
|
||||
// _runtime: &dyn Runtime,
|
||||
// ) -> Result<Value, LiquidError> {
|
||||
// let input_str = input.to_kstr().into_owned();
|
||||
// let encoded = general_purpose::STANDARD.encode(input_str.as_bytes());
|
||||
// Ok(Value::scalar(encoded))
|
||||
// }
|
||||
// }
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CachedResponse {
|
||||
pub body: String,
|
||||
|
|
@ -700,7 +672,36 @@ async fn timed_validate_single_match<'a>(
|
|||
},
|
||||
);
|
||||
}
|
||||
// ---------------------------------------------------- JWT validator
|
||||
Some(Validation::JWT) => {
|
||||
let token = captured_values
|
||||
.iter()
|
||||
.find(|(n, ..)| n == "TOKEN")
|
||||
.map(|(_, v, ..)| v.clone())
|
||||
.unwrap_or_default();
|
||||
|
||||
if token.is_empty() {
|
||||
m.validation_success = false;
|
||||
m.validation_response_body = "JWT token not found.".to_string();
|
||||
m.validation_response_status = StatusCode::BAD_REQUEST;
|
||||
commit_and_return(m);
|
||||
return;
|
||||
}
|
||||
|
||||
match jwt::validate_jwt(&token).await {
|
||||
Ok((ok, msg)) => {
|
||||
m.validation_success = ok;
|
||||
m.validation_response_body = msg;
|
||||
m.validation_response_status =
|
||||
if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
|
||||
}
|
||||
Err(e) => {
|
||||
m.validation_success = false;
|
||||
m.validation_response_body = format!("JWT validation error: {}", e);
|
||||
m.validation_response_status = StatusCode::BAD_REQUEST;
|
||||
}
|
||||
}
|
||||
}
|
||||
// ---------------------------------------------------- AWS validator
|
||||
Some(Validation::AWS) => {
|
||||
let secret = captured_values
|
||||
|
|
|
|||
212
src/validation/jwt.rs
Normal file
212
src/validation/jwt.rs
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
use anyhow::{anyhow, Result};
|
||||
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
|
||||
use chrono::Utc;
|
||||
use ipnet::IpNet;
|
||||
use jsonwebtoken::{decode, decode_header, jwk::JwkSet, DecodingKey, Validation as JwtValidation};
|
||||
use once_cell::sync::Lazy;
|
||||
use reqwest::{redirect::Policy, Client, Url};
|
||||
use serde::Deserialize;
|
||||
use tokio::net::lookup_host;
|
||||
|
||||
use super::utils::check_url_resolvable;
|
||||
|
||||
/// One global, redirect-free client. Building a `Client` is comparatively
|
||||
/// expensive; re-using it lets reqwest share its internal connection pool
|
||||
/// and TLS sessions across JWT validations. `Lazy` ensures thread-safe,
|
||||
/// one-time initialisation.
|
||||
static NO_REDIRECT_CLIENT: Lazy<Client> = Lazy::new(|| {
|
||||
Client::builder()
|
||||
.redirect(Policy::none()) // disable all redirects
|
||||
.build()
|
||||
.expect("failed to build no-redirect Client")
|
||||
});
|
||||
|
||||
/// RFC 1918 + loopback + link-local nets we refuse to contact
|
||||
const BLOCKED_NETS: &[&str] = &[
|
||||
"10.0.0.0/8",
|
||||
"172.16.0.0/12",
|
||||
"192.168.0.0/16", // private
|
||||
"127.0.0.0/8",
|
||||
"169.254.0.0/16", // loopback / link-local
|
||||
];
|
||||
|
||||
// aud is allowed to be either a string or an array, so let Serde flatten it.
|
||||
#[derive(Debug, Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum Aud {
|
||||
Str(String),
|
||||
Arr(Vec<String>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct Claims {
|
||||
exp: Option<i64>,
|
||||
nbf: Option<i64>,
|
||||
iss: Option<String>,
|
||||
aud: Option<Aud>,
|
||||
}
|
||||
|
||||
pub async fn validate_jwt(token: &str) -> Result<(bool, String)> {
|
||||
// --- insecure payload decode -------------------------------------------------
|
||||
let claims: Claims = {
|
||||
let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?;
|
||||
let payload_json = URL_SAFE_NO_PAD
|
||||
.decode(payload_b64)
|
||||
.map_err(|e| anyhow!("invalid base64 in payload: {e}"))?;
|
||||
serde_json::from_slice(&payload_json).map_err(|e| anyhow!("invalid JSON claims: {e}"))?
|
||||
};
|
||||
|
||||
// temporal checks
|
||||
let now = Utc::now().timestamp();
|
||||
if let Some(nbf) = claims.nbf {
|
||||
if now < nbf {
|
||||
return Ok((false, format!("Token not valid before {nbf}")));
|
||||
}
|
||||
}
|
||||
if let Some(exp) = claims.exp {
|
||||
if now > exp {
|
||||
return Ok((false, format!("Token expired at {exp}")));
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
let issuer = claims.iss.clone().unwrap_or_default();
|
||||
|
||||
if let Some(iss) = claims.iss.clone() {
|
||||
// parse header now (kid, alg)
|
||||
let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?;
|
||||
|
||||
// build discovery URL and fetch it (redirects disabled)
|
||||
let config_url = format!("{}/.well-known/openid-configuration", iss.trim_end_matches('/'));
|
||||
let cfg_resp = NO_REDIRECT_CLIENT
|
||||
.get(&config_url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| anyhow!("issuer discovery failed: {e}"))?;
|
||||
|
||||
if !cfg_resp.status().is_success() {
|
||||
return Ok((false, format!("issuer discovery failed: {}", cfg_resp.status())));
|
||||
}
|
||||
|
||||
let cfg_json: serde_json::Value =
|
||||
cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?;
|
||||
|
||||
// extract jwks_uri
|
||||
let jwks_uri = cfg_json
|
||||
.get("jwks_uri")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow!("jwks_uri missing"))?;
|
||||
|
||||
// must be HTTPS
|
||||
let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?;
|
||||
if url.scheme() != "https" {
|
||||
return Ok((false, "jwks_uri must use https".to_string()));
|
||||
}
|
||||
|
||||
// host must match issuer host — prevents open redirects / SSRF-on-other-host
|
||||
let iss_host = Url::parse(&iss)
|
||||
.map_err(|e| anyhow!("invalid iss: {e}"))?
|
||||
.host_str()
|
||||
.unwrap_or_default()
|
||||
.to_ascii_lowercase();
|
||||
let jwks_host = url.host_str().unwrap_or_default().to_ascii_lowercase();
|
||||
if jwks_host != iss_host {
|
||||
return Ok((
|
||||
false,
|
||||
format!("jwks_uri host ({jwks_host}) must match issuer host ({iss_host})"),
|
||||
));
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// DNS resolution + private-range block
|
||||
for addr in lookup_host((jwks_host.as_str(), 443)).await? {
|
||||
if is_blocked_ip(addr.ip()) {
|
||||
return Ok((false, "jwks_uri resolves to private or link-local IP".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
// reachability check (existing helper)
|
||||
check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?;
|
||||
|
||||
// fetch JWKS with redirect-free client
|
||||
let jwks_resp = NO_REDIRECT_CLIENT
|
||||
.get(url)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| anyhow!("jwks fetch failed: {e}"))?;
|
||||
if !jwks_resp.status().is_success() {
|
||||
return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status())));
|
||||
}
|
||||
|
||||
let jwk_set: JwkSet =
|
||||
jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?;
|
||||
|
||||
// select key by kid
|
||||
let kid = header.kid.ok_or_else(|| anyhow!("no kid in header"))?;
|
||||
let jwk = jwk_set
|
||||
.keys
|
||||
.iter()
|
||||
.find(|k| k.common.key_id.as_deref() == Some(&kid))
|
||||
.ok_or_else(|| anyhow!("kid not found in jwks"))?;
|
||||
|
||||
// verify signature
|
||||
let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?;
|
||||
let mut validation = JwtValidation::new(header.alg);
|
||||
validation.set_audience(&extract_aud_strings(&claims));
|
||||
validation.validate_exp = false;
|
||||
validation.validate_nbf = false;
|
||||
|
||||
decode::<Claims>(token, &decoding_key, &validation)
|
||||
.map_err(|e| anyhow!("signature verification failed: {e}"))?;
|
||||
|
||||
return Ok((
|
||||
true,
|
||||
format!("JWT valid (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims)),
|
||||
));
|
||||
}
|
||||
|
||||
Ok((true, format!("JWT not expired (iss: {issuer}, aud: {:?})", extract_aud_strings(&claims))))
|
||||
}
|
||||
|
||||
/// Helper: normalize aud into a flat Vec<String>
|
||||
fn extract_aud_strings(claims: &Claims) -> Vec<String> {
|
||||
match &claims.aud {
|
||||
Some(Aud::Str(s)) => vec![s.clone()],
|
||||
Some(Aud::Arr(v)) => v.clone(),
|
||||
None => vec![],
|
||||
}
|
||||
}
|
||||
/// returns true if IP is in a blocked network
|
||||
fn is_blocked_ip(ip: std::net::IpAddr) -> bool {
|
||||
BLOCKED_NETS.iter().filter_map(|cidr| cidr.parse::<IpNet>().ok()).any(|net| net.contains(&ip))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
|
||||
use chrono::{Duration as ChronoDuration, Utc};
|
||||
use reqwest::Client;
|
||||
|
||||
use super::validate_jwt;
|
||||
|
||||
fn build_token(exp_offset: i64) -> String {
|
||||
let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#);
|
||||
let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp();
|
||||
let payload = URL_SAFE_NO_PAD.encode(format!("{{\"exp\":{exp}}}"));
|
||||
format!("{header}.{payload}.")
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn valid_token() {
|
||||
let token = build_token(60);
|
||||
let res = validate_jwt(&token).await.unwrap();
|
||||
assert!(res.0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn expired_token() {
|
||||
let token = build_token(-60);
|
||||
let res = validate_jwt(&token).await.unwrap();
|
||||
assert!(!res.0);
|
||||
}
|
||||
}
|
||||
|
|
@ -90,7 +90,7 @@ rules:
|
|||
extraction_depth: 1,
|
||||
no_binary: true,
|
||||
no_extract_archives: false,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
},
|
||||
confidence: ConfidenceLevel::Low,
|
||||
no_validate: true,
|
||||
|
|
@ -101,8 +101,9 @@ rules:
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup,
|
||||
ignore_tests: false,
|
||||
snippet_length: 64,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
no_binary: true,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
},
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
no_validate: false,
|
||||
|
|
@ -88,8 +88,9 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
// Create global arguments
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
no_extract_archives: false,
|
||||
extraction_depth: 2,
|
||||
no_binary: true,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
},
|
||||
confidence: ConfidenceLevel::Medium,
|
||||
no_validate: false,
|
||||
|
|
@ -85,8 +85,9 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
extraction_depth: 2,
|
||||
no_binary: true,
|
||||
no_extract_archives: false,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
},
|
||||
confidence: ConfidenceLevel::Low,
|
||||
no_validate: false,
|
||||
|
|
@ -144,8 +144,9 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true, // keep duplicates so the cache is stressed
|
||||
ignore_tests: false,
|
||||
snippet_length: 128,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
/* --------------------------------------------------------- *
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ impl TestContext {
|
|||
extraction_depth: 2,
|
||||
no_binary: true,
|
||||
no_extract_archives: false,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
},
|
||||
confidence: ConfidenceLevel::Low,
|
||||
no_validate: true,
|
||||
|
|
@ -87,8 +87,9 @@ impl TestContext {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
|
||||
|
|
@ -140,7 +141,7 @@ impl TestContext {
|
|||
extraction_depth: 2,
|
||||
no_binary: true,
|
||||
no_extract_archives: false,
|
||||
ignore: Vec::new(),
|
||||
exclude: Vec::new(), // Exclude patterns
|
||||
},
|
||||
confidence: ConfidenceLevel::Low,
|
||||
no_validate: true,
|
||||
|
|
@ -151,8 +152,9 @@ impl TestContext {
|
|||
git_repo_timeout: 1800, // 30 minutes
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_dedup: true,
|
||||
ignore_tests: false,
|
||||
snippet_length: 256,
|
||||
baseline_file: None,
|
||||
manage_baseline: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
56
tests/smoke_baseline.rs
Normal file
56
tests/smoke_baseline.rs
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
use std::fs;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use predicates::prelude::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
const GH_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";
|
||||
|
||||
#[test]
|
||||
fn baseline_create_and_filter() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file = dir.path().join("leak.txt");
|
||||
fs::write(&file, format!("token = \"{}\"\n", GH_PAT))?;
|
||||
let baseline = dir.path().join("baseline.yaml");
|
||||
|
||||
// Create baseline with manage flag
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--confidence=low",
|
||||
"--no-validate",
|
||||
"--format",
|
||||
"json",
|
||||
"--manage-baseline",
|
||||
"--baseline-file",
|
||||
baseline.to_str().unwrap(),
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(predicate::str::contains(GH_PAT));
|
||||
|
||||
assert!(baseline.exists(), "baseline file created");
|
||||
|
||||
// Scan again using the baseline
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--confidence=low",
|
||||
"--no-validate",
|
||||
"--format",
|
||||
"json",
|
||||
"--baseline-file",
|
||||
baseline.to_str().unwrap(),
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(0)
|
||||
.stdout(predicate::str::contains(GH_PAT).not());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
34
tests/smoke_exclude.rs
Normal file
34
tests/smoke_exclude.rs
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
use std::fs;
|
||||
|
||||
use assert_cmd::Command;
|
||||
use predicates::prelude::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
const SECRET: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa";
|
||||
|
||||
#[test]
|
||||
fn exclude_pattern_hides_matches() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let py = dir.path().join("foo.py");
|
||||
let txt = dir.path().join("bar.txt");
|
||||
fs::write(&py, format!("token = \"{}\"\n", SECRET))?;
|
||||
fs::write(&txt, format!("token = \"{}\"\n", SECRET))?;
|
||||
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--confidence=low",
|
||||
"--no-binary",
|
||||
"--no-validate",
|
||||
"--format",
|
||||
"json",
|
||||
"--exclude=*.py",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(predicate::str::contains("bar.txt").and(predicate::str::contains("foo.py").not()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -1,9 +1,4 @@
|
|||
use std::fs::{self, File};
|
||||
|
||||
use flate2::{write::GzEncoder, Compression};
|
||||
use kingfisher::{cli::global::GlobalArgs, update::check_for_update};
|
||||
use tar::Builder;
|
||||
use tempfile::tempdir;
|
||||
use tokio;
|
||||
use wiremock::{
|
||||
matchers::{method, path},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue