forked from mirrors/kingfisher
commit
f43a26030d
22 changed files with 440 additions and 1332 deletions
|
|
@ -2,6 +2,15 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.48.0]
|
||||
- Improved error message when self-update cannot find the current binary
|
||||
- Optimized memory usage via string interning and extensive data sharing
|
||||
- Replaced quadratic match filtering with a per-rule span map, fixing missed secrets in extremely large files and improving scan performance
|
||||
- Support scanning extremely large files by chunking input into 1 GiB segments with small overlaps, avoiding vectorscan buffer limits while preserving match offsets
|
||||
- Always use chunked vectorscan, eliminating the slow regex fallback for blobs over 4 GiB
|
||||
- Skip Base64 scanning for blobs over 64 MB to avoid a second pass over massive files
|
||||
- Increased max-file-size default to 64 MB (up from 25 MB)
|
||||
|
||||
## [1.47.0]
|
||||
- MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them
|
||||
- Improved rules: github oauth2, diffbot, mailchimp, aws
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.47.0"
|
||||
version = "1.48.0"
|
||||
description = "MongoDB's blazingly fast secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
@ -65,7 +65,7 @@ smallvec = { version = "1", features = [
|
|||
tracing = "0.1.41"
|
||||
indicatif = { version = "0.17", features = ["improved_unicode"] }
|
||||
rayon = "1.10"
|
||||
sha1 = "0.10.6"
|
||||
sha1 = { version = "0.10.6", features = ["asm"] }
|
||||
hex = "0.4.3"
|
||||
vectorscan-rs = "0.0.5"
|
||||
regex = "1.11.1"
|
||||
|
|
@ -151,6 +151,7 @@ tar = "0.4.44"
|
|||
xz2 = "0.1.7"
|
||||
asar = "0.3.0"
|
||||
blake3 = "1.8.2"
|
||||
memchr = "2.7"
|
||||
memmap2 = "0.9.7"
|
||||
futures = "0.3.31"
|
||||
dashmap = "6.1.0"
|
||||
|
|
|
|||
77
README.md
77
README.md
|
|
@ -36,6 +36,64 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
|
|||
<img src="docs/runtime-comparison.png" alt="Kingfisher Runtime Comparison" style="vertical-align: center;" />
|
||||
</p>
|
||||
|
||||
- [Kingfisher](#kingfisher)
|
||||
- [Key Features](#key-features)
|
||||
- [Benchmark Results](#benchmark-results)
|
||||
- [Getting Started](#getting-started)
|
||||
- [Installation](#installation)
|
||||
- [Run Kingfisher in Docker](#run-kingfisher-in-docker)
|
||||
- [🔐 Detection Rules at a Glance](#-detection-rules-at-a-glance)
|
||||
- [Write Custom Rules!](#write-custom-rules)
|
||||
- [Usage](#usage)
|
||||
- [Basic Examples](#basic-examples)
|
||||
- [Scan with secret validation](#scan-with-secret-validation)
|
||||
- [Scan a directory containing multiple Git repositories](#scan-a-directory-containing-multiple-git-repositories)
|
||||
- [Scan a Git repository without validation](#scan-a-git-repository-without-validation)
|
||||
- [Display only secrets confirmed active by third‑party APIs](#display-only-secrets-confirmed-active-by-thirdparty-apis)
|
||||
- [Output JSON and capture to a file](#output-json-and-capture-to-a-file)
|
||||
- [Output SARIF directly to disk](#output-sarif-directly-to-disk)
|
||||
- [Pipe any text directly into Kingfisher by passing `-`](#pipe-any-text-directly-into-kingfisher-by-passing--)
|
||||
- [Limit maximum file size scanned (`--max-file-size`)](#limit-maximum-file-size-scanned---max-file-size)
|
||||
- [Scan using a rule _family_ with one flag](#scan-using-a-rule-family-with-one-flag)
|
||||
- [Display rule performance statistics](#display-rule-performance-statistics)
|
||||
- [Scan while ignoring likely test files](#scan-while-ignoring-likely-test-files)
|
||||
- [Exclude specific paths](#exclude-specific-paths)
|
||||
- [Scan an S3 bucket](#scan-an-s3-bucket)
|
||||
- [Scanning Docker Images](#scanning-docker-images)
|
||||
- [Scanning GitHub](#scanning-github)
|
||||
- [Scan GitHub organisation (requires `KF_GITHUB_TOKEN`)](#scan-github-organisation-requires-kf_github_token)
|
||||
- [Scan remote GitHub repository](#scan-remote-github-repository)
|
||||
- [Scanning GitLab](#scanning-gitlab)
|
||||
- [Scan GitLab group (requires `KF_GITLAB_TOKEN`)](#scan-gitlab-group-requires-kf_gitlab_token)
|
||||
- [Scan GitLab user](#scan-gitlab-user)
|
||||
- [Scan remote GitLab repository by URL](#scan-remote-gitlab-repository-by-url)
|
||||
- [List GitLab repositories](#list-gitlab-repositories)
|
||||
- [Scanning Jira](#scanning-jira)
|
||||
- [Scan Jira issues matching a JQL query](#scan-jira-issues-matching-a-jql-query)
|
||||
- [Scan the last 1,000 Jira issues:](#scan-the-last-1000-jira-issues)
|
||||
- [Scanning Confluence](#scanning-confluence)
|
||||
- [Scan Confluence pages matching a CQL query](#scan-confluence-pages-matching-a-cql-query)
|
||||
- [Scanning Slack](#scanning-slack)
|
||||
- [Scan Slack messages matching a search query](#scan-slack-messages-matching-a-search-query)
|
||||
- [Environment Variables for Tokens](#environment-variables-for-tokens)
|
||||
- [Exit Codes](#exit-codes)
|
||||
- [Update Checks](#update-checks)
|
||||
- [Advanced Options](#advanced-options)
|
||||
- [Build a Baseline / Detect New Secrets](#build-a-baseline--detect-new-secrets)
|
||||
- [List Builtin Rules](#list-builtin-rules)
|
||||
- [To scan using **only** your own `my_rules.yaml` you could run:](#to-scan-using-only-your-own-my_rulesyaml-you-could-run)
|
||||
- [To add your rules alongside the built‑ins:](#to-add-your-rules-alongside-the-builtins)
|
||||
- [Other Examples](#other-examples)
|
||||
- [Notable Scan Options](#notable-scan-options)
|
||||
- [Understanding `--confidence`](#understanding---confidence)
|
||||
- [Ignore known false positives](#ignore-known-false-positives)
|
||||
- [Finding Fingerprint](#finding-fingerprint)
|
||||
- [Rule Performance Profiling](#rule-performance-profiling)
|
||||
- [CLI Options](#cli-options)
|
||||
- [Origins and Divergence](#origins-and-divergence)
|
||||
- [Roadmap](#roadmap)
|
||||
- [License](#license)
|
||||
|
||||
# Getting Started
|
||||
## Installation
|
||||
|
||||
|
|
@ -225,9 +283,18 @@ cat /path/to/file.py | kingfisher scan -
|
|||
|
||||
```
|
||||
|
||||
### Limit maximum file size scanned (`--max-file-size`)
|
||||
|
||||
By default, Kingfisher skips files larger than **64 MB**. You can raise or lower this cap per run with `--max-file-size`, which takes a value in **megabytes**.
|
||||
|
||||
```bash
|
||||
# Scan files up to 250 mb in size
|
||||
kingfisher scan /some/file --max-file-size 250
|
||||
```
|
||||
|
||||
### Scan using a rule _family_ with one flag
|
||||
|
||||
_(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws._`)\*
|
||||
_(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws.*`)_
|
||||
|
||||
```bash
|
||||
# Only apply AWS-related rules (kingfisher.aws.1 + kingfisher.aws.2)
|
||||
|
|
@ -617,6 +684,14 @@ kingfisher github repos list --organization my-org
|
|||
- `--skip-regex <PATTERN>`: Ignore findings whose text matches this regex (repeatable)
|
||||
- `--skip-word <WORD>`: Ignore findings containing this case-insensitive word (repeatable)
|
||||
|
||||
## Understanding `--confidence`
|
||||
|
||||
The `--confidence` flag sets a minimum confidence threshold, not an exact match.
|
||||
|
||||
- If you pass `--confidence medium`, findings with **medium and higher** confidence (medium + high) will be included.
|
||||
- If you pass `--confidence low`, you’ll see **all levels** (low, medium, high).
|
||||
|
||||
|
||||
### Ignore known false positives
|
||||
|
||||
Use `--skip-regex` and `--skip-word` to suppress findings you know are benign. Both flags may be provided multiple times and are tested against the secret value **and** the full match context.
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ rules:
|
|||
(?x)
|
||||
\b
|
||||
(
|
||||
ey[A-Za-z0-9_-]{12,} (?# header )
|
||||
(?:ey|ewogIC)[A-Za-z0-9_-]{12,} (?# header )
|
||||
\.
|
||||
ey[A-Za-z0-9_-]{12,} (?# payload )
|
||||
\.
|
||||
|
|
|
|||
61
src/blob.rs
61
src/blob.rs
|
|
@ -10,16 +10,20 @@ use anyhow::Result;
|
|||
use bstr::{BString, ByteSlice};
|
||||
use gix::ObjectId;
|
||||
use hex;
|
||||
use once_cell::sync::OnceCell;
|
||||
use parking_lot::Mutex;
|
||||
use rustc_hash::FxHashMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha1::{Digest, Sha1};
|
||||
use smallvec::SmallVec;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
use crate::git_commit_metadata::CommitMetadata;
|
||||
// const LARGE_FILE_THRESHOLD: u64 = 512 * 1024; // 512 KB
|
||||
const LARGE_FILE_THRESHOLD: u64 = 0; // always mmap
|
||||
|
||||
static NEXT_ID: AtomicU64 = AtomicU64::new(1);
|
||||
|
||||
/// The data of a blob, either owned (small files) or memory mapped (large files).
|
||||
pub enum BlobData<'a> {
|
||||
/// Small blobs – remains as-is.
|
||||
|
|
@ -75,47 +79,68 @@ pub type BlobAppearanceSet = SmallVec<[BlobAppearance; 1]>;
|
|||
/// A Git blob, storing its SHA-1 id and its contents.
|
||||
|
||||
pub struct Blob<'a> {
|
||||
pub id: BlobId,
|
||||
pub data: BlobData<'a>,
|
||||
id: OnceCell<BlobId>,
|
||||
data: BlobData<'a>,
|
||||
temp_id: u64,
|
||||
}
|
||||
|
||||
impl Blob<'_> {
|
||||
#[inline]
|
||||
|
||||
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
|
||||
let mut file = File::open(&path)?;
|
||||
let file_size = file.metadata()?.len();
|
||||
let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
if file_size > LARGE_FILE_THRESHOLD {
|
||||
// Large files: one mmap, zero extra copies.
|
||||
let mmap = unsafe { memmap2::Mmap::map(&file)? };
|
||||
let id = BlobId::new(mmap.as_ref());
|
||||
Ok(Blob { id, data: BlobData::Mapped(mmap) })
|
||||
Ok(Blob { id: OnceCell::new(), data: BlobData::Mapped(mmap), temp_id })
|
||||
} else {
|
||||
// Small files: reuse the same handle and pre-allocate exact capacity
|
||||
let mut bytes = Vec::with_capacity(file_size as usize);
|
||||
file.read_to_end(&mut bytes)?;
|
||||
let id = BlobId::new(&bytes);
|
||||
Ok(Blob { id, data: BlobData::Owned(bytes) })
|
||||
Ok(Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id })
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the blob's bytes as a slice.
|
||||
#[inline]
|
||||
pub fn bytes(&self) -> &[u8] {
|
||||
self.data.as_ref()
|
||||
}
|
||||
|
||||
/// Lazily compute and return the blob's SHA-1 `BlobId`.
|
||||
#[inline]
|
||||
pub fn id(&self) -> BlobId {
|
||||
*self.id.get_or_init(|| BlobId::new(self.bytes()))
|
||||
}
|
||||
|
||||
/// Get a reference to the blob's SHA-1 `BlobId`, computing it if necessary.
|
||||
#[inline]
|
||||
pub fn id_ref(&self) -> &BlobId {
|
||||
self.id.get_or_init(|| BlobId::new(self.bytes()))
|
||||
}
|
||||
|
||||
/// Return the temporary identifier assigned on blob creation.
|
||||
#[inline]
|
||||
pub fn temp_id(&self) -> u64 {
|
||||
self.temp_id
|
||||
}
|
||||
|
||||
/// Create a new `Blob` from a vector of bytes.
|
||||
#[inline]
|
||||
pub fn from_bytes(bytes: Vec<u8>) -> Self {
|
||||
let id = BlobId::compute_from_bytes(&bytes);
|
||||
Blob { id, data: BlobData::Owned(bytes) }
|
||||
let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
|
||||
Blob { id: OnceCell::new(), data: BlobData::Owned(bytes), temp_id }
|
||||
}
|
||||
|
||||
/// Create a new `Blob` with the given id and data.
|
||||
#[inline]
|
||||
pub fn new(id: BlobId, bytes: Vec<u8>) -> Self {
|
||||
Blob { id, data: BlobData::Owned(bytes) }
|
||||
let temp_id = NEXT_ID.fetch_add(1, Ordering::Relaxed);
|
||||
let cell = OnceCell::new();
|
||||
let _ = cell.set(id);
|
||||
Blob { id: cell, data: BlobData::Owned(bytes), temp_id }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
@ -269,9 +294,15 @@ impl BlobId {
|
|||
/// Create a new BlobId computed from the given input.
|
||||
#[inline]
|
||||
pub fn new(input: &[u8]) -> Self {
|
||||
const CHUNK: usize = 64 * 1024; // 64KB from start and end
|
||||
let mut hasher = Sha1::new();
|
||||
write!(&mut hasher, "blob {}\0", input.len()).unwrap();
|
||||
hasher.update(input);
|
||||
if input.len() <= CHUNK * 2 {
|
||||
hasher.update(input);
|
||||
} else {
|
||||
hasher.update(&input[..CHUNK]);
|
||||
hasher.update(&input[input.len() - CHUNK..]);
|
||||
}
|
||||
BlobId(hasher.finalize().as_slice().try_into().expect("SHA-1 output size mismatch"))
|
||||
}
|
||||
|
||||
|
|
@ -360,9 +391,6 @@ pub struct BlobMetadata {
|
|||
/// The guessed multimedia type of the blob
|
||||
pub mime_essence: Option<String>,
|
||||
|
||||
/// The guessed charset of the blob
|
||||
pub charset: Option<String>,
|
||||
|
||||
/// The guessed programming language of the blob
|
||||
pub language: Option<String>,
|
||||
}
|
||||
|
|
@ -384,9 +412,4 @@ impl BlobMetadata {
|
|||
pub fn mime_essence(&self) -> Option<&str> {
|
||||
self.mime_essence.as_deref()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn charset(&self) -> Option<&str> {
|
||||
self.charset.as_deref()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ pub struct InputSpecifierArgs {
|
|||
#[derive(Args, Debug, Clone)]
|
||||
pub struct ContentFilteringArgs {
|
||||
/// Ignore files larger than the given size in MB
|
||||
#[arg(long("max-file-size"), default_value_t = 25.0)]
|
||||
#[arg(long("max-file-size"), default_value_t = 64.0)]
|
||||
pub max_file_size_mb: f64,
|
||||
|
||||
// /// Use custom path-based ignore rules from the given file(s)
|
||||
|
|
|
|||
|
|
@ -96,9 +96,8 @@ impl ContentInspector {
|
|||
#[inline]
|
||||
#[must_use]
|
||||
pub fn guess_charset(&self, bytes: &[u8]) -> Option<String> {
|
||||
String::from_utf8(bytes.to_vec()).ok().map(|_| "UTF-8".to_string())
|
||||
std::str::from_utf8(bytes).ok().map(|_| "UTF-8".to_string())
|
||||
}
|
||||
|
||||
/// Guess programming language with broad coverage using `tokei`.
|
||||
///
|
||||
/// Strategy (no disk I/O):
|
||||
|
|
|
|||
|
|
@ -161,11 +161,8 @@ fn handle_zip_archive_streaming(
|
|||
Ok(CompressedContent::ArchiveFiles(entries_on_disk))
|
||||
}
|
||||
|
||||
fn handle_asar_archive_in_memory(
|
||||
buffer: Vec<u8>,
|
||||
archive_path: &Path,
|
||||
) -> Result<CompressedContent> {
|
||||
match AsarReader::new(&buffer, None) {
|
||||
fn handle_asar_archive_in_memory(buffer: &[u8], archive_path: &Path) -> Result<CompressedContent> {
|
||||
match AsarReader::new(buffer, None) {
|
||||
Ok(reader) => {
|
||||
let mut contents = Vec::new();
|
||||
for (path_in_asar, file) in reader.files() {
|
||||
|
|
@ -200,7 +197,7 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedCon
|
|||
match ext {
|
||||
"asar" => {
|
||||
let mmap = unsafe { Mmap::map(&file)? };
|
||||
return handle_asar_archive_in_memory(mmap.to_vec(), path);
|
||||
return handle_asar_archive_in_memory(&mmap, path);
|
||||
}
|
||||
"tar" => {
|
||||
if let Some(base) = base_dir {
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ use crate::{
|
|||
matcher::Match,
|
||||
origin::{Origin, OriginSet},
|
||||
rules::rule::Rule,
|
||||
util::intern,
|
||||
};
|
||||
|
||||
// share with Arc so every blob/origin is materialised once
|
||||
|
|
@ -151,7 +152,7 @@ impl FindingsStore {
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| m.groups.captures.get(0))
|
||||
.map_or("", |c| c.value.as_ref());
|
||||
.map_or("", |c| c.value);
|
||||
|
||||
let origin_kind = match origin.first() {
|
||||
Origin::GitRepo(_) => "git",
|
||||
|
|
@ -160,7 +161,7 @@ impl FindingsStore {
|
|||
};
|
||||
|
||||
let key = xxh3_64(
|
||||
format!("{}|{}|{}", m.rule_text_id.to_uppercase(), origin_kind, snippet)
|
||||
format!("{}|{}|{}", m.rule.id().to_uppercase(), origin_kind, snippet)
|
||||
.as_bytes(),
|
||||
);
|
||||
|
||||
|
|
@ -280,7 +281,7 @@ impl FindingsStore {
|
|||
pub fn get_summary(&self) -> FxHashMap<&'static str, usize> {
|
||||
self.matches.iter().fold(FxHashMap::default(), |mut acc, msg| {
|
||||
let (_, _, m) = &**msg;
|
||||
*acc.entry(m.rule_name).or_insert(0) += 1; // borrow, no alloc
|
||||
*acc.entry(intern(m.rule.name())).or_insert(0) += 1;
|
||||
acc
|
||||
})
|
||||
}
|
||||
|
|
@ -342,13 +343,13 @@ impl FindingsStore {
|
|||
self.matches.iter().map(|msg| {
|
||||
let (_, _, match_item) = &**msg;
|
||||
finding_data::FindingMetadata {
|
||||
rule_name: match_item.rule_name.to_string(),
|
||||
rule_name: match_item.rule.name().to_string(),
|
||||
num_matches: 1,
|
||||
comment: None,
|
||||
visible: match_item.visible,
|
||||
finding_id: match_item.finding_id(),
|
||||
rule_finding_fingerprint: match_item.rule_finding_fingerprint.to_string(),
|
||||
rule_text_id: match_item.rule_text_id.to_string(),
|
||||
rule_finding_fingerprint: match_item.rule.finding_sha1_fingerprint().to_string(),
|
||||
rule_text_id: match_item.rule.id().to_string(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -362,7 +363,7 @@ impl FindingsStore {
|
|||
.iter()
|
||||
.filter(|msg| {
|
||||
let (_, _, match_item) = &***msg;
|
||||
match_item.rule_name == metadata.rule_name
|
||||
match_item.rule.name() == metadata.rule_name
|
||||
})
|
||||
.map(|msg| {
|
||||
let (origin, blob_metadata, match_item) = &**msg;
|
||||
|
|
@ -373,7 +374,7 @@ impl FindingsStore {
|
|||
match_id: MatchIdInt::from_str(&match_item.finding_id())?,
|
||||
match_comment: None,
|
||||
visible: match_item.visible,
|
||||
match_confidence: match_item.rule_confidence,
|
||||
match_confidence: match_item.rule.confidence(),
|
||||
validation_response_body: match_item.validation_response_body.clone(),
|
||||
validation_response_status: match_item.validation_response_status,
|
||||
validation_success: match_item.validation_success,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use core::ops::Range;
|
||||
use std::cell::RefCell;
|
||||
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -87,25 +88,35 @@ impl std::fmt::Display for SourceSpan {
|
|||
}
|
||||
|
||||
/// Records newline byte‑offsets to map offsets -- (line, column).
|
||||
pub struct LocationMapping {
|
||||
newline_offsets: Vec<usize>,
|
||||
pub struct LocationMapping<'a> {
|
||||
bytes: &'a [u8],
|
||||
newline_offsets: RefCell<Vec<usize>>,
|
||||
}
|
||||
|
||||
impl LocationMapping {
|
||||
/// Scan once for all `\n` positions.
|
||||
pub fn new(input: &[u8]) -> Self {
|
||||
let newline_offsets =
|
||||
input.iter().enumerate().filter_map(|(i, &b)| (b == b'\n').then_some(i)).collect();
|
||||
LocationMapping { newline_offsets }
|
||||
impl<'a> LocationMapping<'a> {
|
||||
/// Create a new mapping without pre-scanning the entire input.
|
||||
pub fn new(input: &'a [u8]) -> Self {
|
||||
LocationMapping { bytes: input, newline_offsets: RefCell::new(Vec::new()) }
|
||||
}
|
||||
|
||||
/// Map a byte offset to a `SourcePoint`.
|
||||
pub fn get_source_point(&self, offset: usize) -> SourcePoint {
|
||||
let line = match self.newline_offsets.binary_search(&offset) {
|
||||
Ok(idx) => idx + 2, // exact newline -- next line
|
||||
fn ensure_offsets_up_to(&self, offset: usize) {
|
||||
let mut offsets = self.newline_offsets.borrow_mut();
|
||||
let start = offsets.last().map_or(0, |&last| last + 1);
|
||||
if offset < start {
|
||||
return;
|
||||
}
|
||||
let end = offset.min(self.bytes.len());
|
||||
for nl in memchr::memchr_iter(b'\n', &self.bytes[start..end]) {
|
||||
offsets.push(start + nl);
|
||||
}
|
||||
}
|
||||
|
||||
fn source_point_from_offsets(offsets: &[usize], offset: usize) -> SourcePoint {
|
||||
let line = match offsets.binary_search(&offset) {
|
||||
Ok(idx) => idx + 2,
|
||||
Err(idx) => idx + 1,
|
||||
};
|
||||
let column = if let Some(&last) = self.newline_offsets.get(line.saturating_sub(2)) {
|
||||
let column = if let Some(&last) = offsets.get(line.saturating_sub(2)) {
|
||||
offset.saturating_sub(last + 1)
|
||||
} else {
|
||||
offset
|
||||
|
|
@ -113,10 +124,19 @@ impl LocationMapping {
|
|||
SourcePoint { line, column }
|
||||
}
|
||||
|
||||
/// Map a byte offset to a `SourcePoint`.
|
||||
pub fn get_source_point(&self, offset: usize) -> SourcePoint {
|
||||
self.ensure_offsets_up_to(offset);
|
||||
let offsets = self.newline_offsets.borrow();
|
||||
Self::source_point_from_offsets(&offsets, offset)
|
||||
}
|
||||
|
||||
/// Map an `OffsetSpan` -- `SourceSpan` (closed interval).
|
||||
pub fn get_source_span(&self, span: &OffsetSpan) -> SourceSpan {
|
||||
let start = self.get_source_point(span.start);
|
||||
let end = self.get_source_point(span.end.saturating_sub(1));
|
||||
self.ensure_offsets_up_to(span.end.saturating_sub(1));
|
||||
let offsets = self.newline_offsets.borrow();
|
||||
let start = Self::source_point_from_offsets(&offsets, span.start);
|
||||
let end = Self::source_point_from_offsets(&offsets, span.end.saturating_sub(1));
|
||||
SourceSpan { start, end }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
241
src/matcher.rs
241
src/matcher.rs
|
|
@ -10,7 +10,6 @@ use anyhow::Result;
|
|||
use base64::{engine::general_purpose, Engine};
|
||||
use bstr::BString;
|
||||
use http::StatusCode;
|
||||
use lazy_static::lazy_static;
|
||||
use regex::bytes::Regex;
|
||||
use rustc_hash::{FxHashMap, FxHashSet, FxHasher};
|
||||
use schemars::{
|
||||
|
|
@ -27,7 +26,7 @@ use xxhash_rust::xxh3::xxh3_64;
|
|||
use crate::{
|
||||
blob::{Blob, BlobId, BlobIdMap},
|
||||
entropy::calculate_shannon_entropy,
|
||||
location::{Location, LocationMapping, OffsetSpan},
|
||||
location::{Location, LocationMapping, OffsetSpan, SourcePoint, SourceSpan},
|
||||
origin::OriginSet,
|
||||
parser,
|
||||
parser::{Checker, Language},
|
||||
|
|
@ -40,6 +39,10 @@ use crate::{
|
|||
util::{intern, redact_value},
|
||||
};
|
||||
|
||||
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
|
||||
const CHUNK_OVERLAP: usize = 64 * 1024; // 64 KiB overlap to catch boundary matches
|
||||
const BASE64_SCAN_LIMIT: usize = 64 * 1024 * 1024; // skip expensive Base64 pass on huge blobs
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// RawMatch
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
|
@ -255,17 +258,28 @@ impl<'a> Matcher<'a> {
|
|||
self.user_data.raw_matches_scratch.reserve(prev_capacity.max(64));
|
||||
|
||||
self.user_data.input_len = input.len() as u64;
|
||||
// self.vs_scanner.scan(input, |rid, from, to, _flags| {
|
||||
self.scanner_pool.with(|scanner| {
|
||||
scanner.scan(input, |rule_id, from, to, _flags| {
|
||||
self.user_data.raw_matches_scratch.push(RawMatch {
|
||||
rule_id,
|
||||
start_idx: from,
|
||||
end_idx: to,
|
||||
});
|
||||
vectorscan_rs::Scan::Continue
|
||||
})
|
||||
})?;
|
||||
|
||||
let mut offset: usize = 0;
|
||||
while offset < input.len() {
|
||||
let end = (offset + MAX_CHUNK_SIZE).min(input.len());
|
||||
let slice = &input[offset..end];
|
||||
let base = offset as u64;
|
||||
self.scanner_pool.with(|scanner| {
|
||||
scanner.scan(slice, |rule_id, from, to, _flags| {
|
||||
self.user_data.raw_matches_scratch.push(RawMatch {
|
||||
rule_id,
|
||||
start_idx: from + base,
|
||||
end_idx: to + base,
|
||||
});
|
||||
vectorscan_rs::Scan::Continue
|
||||
})
|
||||
})?;
|
||||
|
||||
if end == input.len() {
|
||||
break;
|
||||
}
|
||||
offset = end.saturating_sub(CHUNK_OVERLAP);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -288,17 +302,6 @@ impl<'a> Matcher<'a> {
|
|||
self.local_stats.blobs_scanned += 1;
|
||||
self.local_stats.bytes_scanned += blob.bytes().len() as u64;
|
||||
|
||||
// Check if blob was already seen and respect no_dedup flag
|
||||
if !no_dedup {
|
||||
if let Some(had_matches) = self.seen_blobs.get(&blob.id) {
|
||||
return Ok(if had_matches {
|
||||
ScanResult::SeenWithMatches
|
||||
} else {
|
||||
ScanResult::SeenSansMatches
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Extract filename from origin
|
||||
let filename = origin
|
||||
.first()
|
||||
|
|
@ -313,24 +316,19 @@ impl<'a> Matcher<'a> {
|
|||
// Opportunistically look for standalone Base64 blobs. If neither
|
||||
// the raw scan nor this check yields anything, we can return early
|
||||
// before doing any heavier work.
|
||||
let mut b64_items = if no_base64 { Vec::new() } else { get_base64_strings(blob.bytes()) };
|
||||
let mut b64_items = if no_base64 || blob.len() > BASE64_SCAN_LIMIT {
|
||||
Vec::new()
|
||||
} else {
|
||||
get_base64_strings(blob.bytes())
|
||||
};
|
||||
|
||||
if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() {
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
if !no_dedup {
|
||||
return Ok(match self.seen_blobs.insert(blob.id, false) {
|
||||
None => ScanResult::New(Vec::new()),
|
||||
Some(true) => ScanResult::SeenWithMatches,
|
||||
Some(false) => ScanResult::SeenSansMatches,
|
||||
});
|
||||
} else {
|
||||
return Ok(ScanResult::New(Vec::new()));
|
||||
}
|
||||
return Ok(ScanResult::New(Vec::new()));
|
||||
}
|
||||
|
||||
let rules_db = self.rules_db;
|
||||
let mut seen_matches = FxHashSet::default();
|
||||
let mut previous_matches = Vec::new();
|
||||
let mut previous_matches: FxHashMap<usize, Vec<OffsetSpan>> = FxHashMap::default();
|
||||
let tree_sitter_result = if self.user_data.raw_matches_scratch.is_empty() {
|
||||
None
|
||||
} else {
|
||||
|
|
@ -363,27 +361,19 @@ impl<'a> Matcher<'a> {
|
|||
})
|
||||
.collect::<Vec<_>>()
|
||||
});
|
||||
let mut previous_raw_matches: Vec<(usize, OffsetSpan)> = Vec::new();
|
||||
let mut previous_raw_matches: FxHashMap<usize, Vec<OffsetSpan>> = FxHashMap::default();
|
||||
for &RawMatch { rule_id, start_idx, end_idx } in
|
||||
self.user_data.raw_matches_scratch.iter().rev()
|
||||
{
|
||||
let rule_id_usize: usize = rule_id as usize;
|
||||
// let rule = &rules_db.rules[rule_id_usize];
|
||||
let rule = Arc::clone(&rules_db.rules[rule_id_usize]);
|
||||
let re = &rules_db.anchored_regexes[rule_id_usize];
|
||||
let start_idx_usize = start_idx as usize;
|
||||
let end_idx_usize = end_idx as usize;
|
||||
let current_span = OffsetSpan::from_range(start_idx_usize..end_idx_usize);
|
||||
// Skip if fully contained in a previous match
|
||||
if previous_raw_matches.iter().any(|(prev_id, prev_span): &(usize, OffsetSpan)| {
|
||||
*prev_id == rule_id_usize
|
||||
&& (prev_span.fully_contains(¤t_span)
|
||||
|| current_span.fully_contains(prev_span))
|
||||
}) {
|
||||
if !record_match(&mut previous_raw_matches, rule_id_usize, current_span) {
|
||||
continue;
|
||||
}
|
||||
let matching_input_offset_span = OffsetSpan::from_range(start_idx_usize..end_idx_usize);
|
||||
previous_raw_matches.push((rule_id_usize, matching_input_offset_span));
|
||||
filter_match(
|
||||
blob,
|
||||
rule,
|
||||
|
|
@ -472,9 +462,15 @@ impl<'a> Matcher<'a> {
|
|||
}
|
||||
}
|
||||
// Finalize
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
if !no_dedup {
|
||||
self.seen_blobs.insert(blob.id, !matches.is_empty());
|
||||
if !no_dedup && !matches.is_empty() {
|
||||
let blob_id = blob.id();
|
||||
if let Some(had_matches) = self.seen_blobs.insert(blob_id, true) {
|
||||
return Ok(if had_matches {
|
||||
ScanResult::SeenWithMatches
|
||||
} else {
|
||||
ScanResult::SeenSansMatches
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// --- opportunistic capacity cap ---------------------------------
|
||||
|
|
@ -501,6 +497,39 @@ fn compute_match_key(content: &[u8], rule_id: &[u8], start: usize, end: usize) -
|
|||
end.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn insert_span(spans: &mut Vec<OffsetSpan>, span: OffsetSpan) -> bool {
|
||||
let mut idx = spans.binary_search_by(|s| s.start.cmp(&span.start)).unwrap_or_else(|i| i);
|
||||
if idx > 0 {
|
||||
if spans[idx - 1].fully_contains(&span) {
|
||||
return false;
|
||||
}
|
||||
if span.fully_contains(&spans[idx - 1]) {
|
||||
spans.remove(idx - 1);
|
||||
idx -= 1;
|
||||
}
|
||||
}
|
||||
if idx < spans.len() {
|
||||
if spans[idx].fully_contains(&span) {
|
||||
return false;
|
||||
}
|
||||
if span.fully_contains(&spans[idx]) {
|
||||
spans.remove(idx);
|
||||
}
|
||||
}
|
||||
spans.insert(idx, span);
|
||||
true
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record_match(
|
||||
map: &mut FxHashMap<usize, Vec<OffsetSpan>>,
|
||||
rule_id: usize,
|
||||
span: OffsetSpan,
|
||||
) -> bool {
|
||||
insert_span(map.entry(rule_id).or_default(), span)
|
||||
}
|
||||
fn filter_match<'b>(
|
||||
blob: &'b Blob,
|
||||
// rule: &'b Rule,
|
||||
|
|
@ -509,7 +538,7 @@ fn filter_match<'b>(
|
|||
start: usize,
|
||||
end: usize,
|
||||
matches: &mut Vec<BlobMatch<'b>>,
|
||||
previous_matches: &mut Vec<(usize, OffsetSpan)>,
|
||||
previous_matches: &mut FxHashMap<usize, Vec<OffsetSpan>>,
|
||||
rule_id: usize,
|
||||
seen_matches: &mut FxHashSet<u64>,
|
||||
_origin: &OriginSet,
|
||||
|
|
@ -558,11 +587,7 @@ fn filter_match<'b>(
|
|||
if !seen_matches.insert(match_key) {
|
||||
continue;
|
||||
}
|
||||
if previous_matches.iter().any(|(prev_rule_id, prev_loc)| {
|
||||
*prev_rule_id == rule_id
|
||||
&& (prev_loc.fully_contains(&matching_input_offset_span)
|
||||
|| matching_input_offset_span.fully_contains(prev_loc))
|
||||
}) {
|
||||
if !record_match(previous_matches, rule_id, matching_input_offset_span) {
|
||||
continue;
|
||||
}
|
||||
let only_matching_input =
|
||||
|
|
@ -571,7 +596,7 @@ fn filter_match<'b>(
|
|||
SerializableCaptures::from_captures(&captures, byte_slice.as_ref(), re, redact);
|
||||
matches.push(BlobMatch {
|
||||
rule: Arc::clone(&rule),
|
||||
blob_id: &blob.id,
|
||||
blob_id: blob.id_ref(),
|
||||
matching_input: only_matching_input,
|
||||
matching_input_offset_span,
|
||||
captures: groups,
|
||||
|
|
@ -581,7 +606,6 @@ fn filter_match<'b>(
|
|||
calculated_entropy,
|
||||
is_base64,
|
||||
});
|
||||
previous_matches.push((rule_id, matching_input_offset_span));
|
||||
}
|
||||
if let Some(t) = timer.take() {
|
||||
let new_count = (matches.len() - initial_len) as u64;
|
||||
|
|
@ -682,20 +706,20 @@ impl JsonSchema for Groups {
|
|||
// pub end: usize, // End position of the match
|
||||
// pub value: String, // The actual captured value
|
||||
// }
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, JsonSchema)]
|
||||
pub struct SerializableCapture {
|
||||
pub name: Option<String>,
|
||||
pub match_number: i32,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
// Instead of storing an owned String, store a borrowed or interned value.
|
||||
// Here we use Cow to allow either borrowing or owning as needed.
|
||||
pub value: std::borrow::Cow<'static, str>,
|
||||
/// Interned value of the capture.
|
||||
pub value: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, JsonSchema)]
|
||||
pub struct SerializableCaptures {
|
||||
pub captures: Vec<SerializableCapture>, // All captures (named and unnamed)
|
||||
#[schemars(with = "Vec<SerializableCapture>")]
|
||||
pub captures: SmallVec<[SerializableCapture; 2]>, // All captures (named and unnamed)
|
||||
}
|
||||
impl SerializableCaptures {
|
||||
pub fn from_captures(
|
||||
|
|
@ -704,7 +728,7 @@ impl SerializableCaptures {
|
|||
re: &Regex,
|
||||
redact: bool,
|
||||
) -> Self {
|
||||
let mut serialized_captures = Vec::new();
|
||||
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
|
||||
// Process named captures
|
||||
for name in re.capture_names().flatten() {
|
||||
if let Some(capture) = captures.name(name) {
|
||||
|
|
@ -718,7 +742,7 @@ impl SerializableCaptures {
|
|||
match_number: -1,
|
||||
start: capture.start(),
|
||||
end: capture.end(),
|
||||
value: value.into(),
|
||||
value: intern(&value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -735,7 +759,7 @@ impl SerializableCaptures {
|
|||
match_number: i32::try_from(i).unwrap_or(0),
|
||||
start: capture.start(),
|
||||
end: capture.end(),
|
||||
value: value.into(),
|
||||
value: intern(&value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -764,16 +788,9 @@ pub struct Match {
|
|||
pub finding_fingerprint: u64,
|
||||
|
||||
/// The rule that produced this match
|
||||
pub rule_finding_fingerprint: &'static str,
|
||||
|
||||
/// The text identifier of the rule that produced this match
|
||||
pub rule_text_id: &'static str,
|
||||
|
||||
/// The name of the rule that produced this match
|
||||
pub rule_name: &'static str,
|
||||
|
||||
/// The confidence property of the rule that produced this match
|
||||
pub rule_confidence: crate::rules::rule::Confidence,
|
||||
#[serde(skip_serializing)]
|
||||
#[schemars(skip)]
|
||||
pub rule: Arc<Rule>,
|
||||
|
||||
/// Validation Body
|
||||
pub validation_response_body: String,
|
||||
|
|
@ -794,7 +811,7 @@ pub struct Match {
|
|||
impl Match {
|
||||
#[inline]
|
||||
pub fn convert_owned_blobmatch_to_match<'a>(
|
||||
loc_mapping: &'a LocationMapping,
|
||||
loc_mapping: Option<&'a LocationMapping<'a>>,
|
||||
owned_blob_match: &'a OwnedBlobMatch,
|
||||
origin_type: &'a str,
|
||||
) -> Self {
|
||||
|
|
@ -812,9 +829,11 @@ impl Match {
|
|||
// The fingerprint will be based on the content of the secret.
|
||||
let finding_value_for_fp = std::str::from_utf8(matching_finding_bytes).unwrap_or("");
|
||||
|
||||
let source_span = loc_mapping.get_source_span(&offset_span);
|
||||
let rule_finding_fingerprint = owned_blob_match.rule.finding_sha1_fingerprint().to_owned();
|
||||
|
||||
let source_span =
|
||||
loc_mapping.map(|lm| lm.get_source_span(&offset_span)).unwrap_or(SourceSpan {
|
||||
start: SourcePoint { line: 0, column: 0 },
|
||||
end: SourcePoint { line: 0, column: 0 },
|
||||
});
|
||||
let offset_start: u64 =
|
||||
owned_blob_match.matching_input_offset_span.start.try_into().unwrap();
|
||||
let offset_end: u64 = owned_blob_match.matching_input_offset_span.end.try_into().unwrap();
|
||||
|
|
@ -828,10 +847,7 @@ impl Match {
|
|||
|
||||
// matching_snippet
|
||||
Match {
|
||||
rule_finding_fingerprint: intern(&rule_finding_fingerprint),
|
||||
rule_name: intern(owned_blob_match.rule.name()),
|
||||
rule_confidence: owned_blob_match.rule.confidence(),
|
||||
rule_text_id: intern(owned_blob_match.rule.id()),
|
||||
rule: owned_blob_match.rule.clone(),
|
||||
visible: owned_blob_match.rule.visible().to_owned(),
|
||||
location: Location { offset_span, source_span: source_span.clone() },
|
||||
groups: owned_blob_match.captures.clone(),
|
||||
|
|
@ -852,7 +868,7 @@ impl Match {
|
|||
|
||||
pub fn finding_id(&self) -> String {
|
||||
let mut h = Sha1::new();
|
||||
write!(&mut h, "{}\0", self.rule_finding_fingerprint)
|
||||
write!(&mut h, "{}\0", self.rule.finding_sha1_fingerprint())
|
||||
.expect("should be able to write to memory");
|
||||
serde_json::to_writer(&mut h, &self.groups)
|
||||
.expect("should be able to serialize groups as JSON");
|
||||
|
|
@ -892,33 +908,48 @@ pub struct DecodedData {
|
|||
pub pos_start: usize,
|
||||
pub pos_end: usize,
|
||||
}
|
||||
#[inline]
|
||||
fn is_base64_byte(b: u8) -> bool {
|
||||
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/')
|
||||
}
|
||||
|
||||
pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
|
||||
lazy_static! {
|
||||
// Require a reasonably long run of valid Base64 characters to reduce
|
||||
// noise. 32 bytes corresponds to 24 decoded bytes.
|
||||
static ref RE_BASE64: Regex = Regex::new(r"[A-Za-z0-9+/]{32,}={0,2}").unwrap();
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
for m in RE_BASE64.find_iter(input) {
|
||||
let base64_string = m.as_bytes();
|
||||
// Skip candidates whose length isn't a multiple of four – they cannot
|
||||
// be valid Base64.
|
||||
if base64_string.len() % 4 != 0 {
|
||||
continue;
|
||||
let mut i = 0;
|
||||
while i < input.len() {
|
||||
while i < input.len() && !is_base64_byte(input[i]) {
|
||||
i += 1;
|
||||
}
|
||||
if let Ok(decoded) = general_purpose::STANDARD.decode(base64_string) {
|
||||
if let Ok(decoded_str) = std::str::from_utf8(&decoded) {
|
||||
if decoded_str.is_ascii() {
|
||||
results.push(DecodedData {
|
||||
original: String::from_utf8_lossy(base64_string).into_owned(),
|
||||
decoded: decoded_str.to_string(),
|
||||
pos_start: m.start(),
|
||||
pos_end: m.end(),
|
||||
});
|
||||
let start = i;
|
||||
while i < input.len() && is_base64_byte(input[i]) {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let mut eq_count = 0;
|
||||
while i < input.len() && input[i] == b'=' && eq_count < 2 {
|
||||
i += 1;
|
||||
eq_count += 1;
|
||||
}
|
||||
let end = i;
|
||||
|
||||
let len = end - start;
|
||||
if len >= 32 && len % 4 == 0 {
|
||||
let base64_slice = &input[start..end];
|
||||
if let Ok(decoded) = general_purpose::STANDARD.decode(base64_slice) {
|
||||
if let Ok(decoded_str) = std::str::from_utf8(&decoded) {
|
||||
if decoded_str.is_ascii() {
|
||||
results.push(DecodedData {
|
||||
original: String::from_utf8_lossy(base64_slice).into_owned(),
|
||||
decoded: decoded_str.to_string(),
|
||||
pos_start: start,
|
||||
pos_end: end,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ impl DetailsReporter {
|
|||
m: match_item.clone(),
|
||||
comment: None,
|
||||
visible: match_item.visible,
|
||||
match_confidence: match_item.rule_confidence,
|
||||
match_confidence: match_item.rule.confidence(),
|
||||
validation_response_body: match_item.validation_response_body.clone(),
|
||||
validation_response_status: match_item.validation_response_status,
|
||||
validation_success: match_item.validation_success,
|
||||
|
|
@ -366,13 +366,13 @@ impl DetailsReporter {
|
|||
|
||||
FindingReporterRecord {
|
||||
rule: RuleMetadata {
|
||||
name: rm.m.rule_name.to_string(),
|
||||
id: rm.m.rule_text_id.to_string(),
|
||||
name: rm.m.rule.name().to_string(),
|
||||
id: rm.m.rule.id().to_string(),
|
||||
},
|
||||
finding: FindingRecordData {
|
||||
snippet,
|
||||
fingerprint: rm.m.finding_fingerprint.to_string(),
|
||||
confidence: rm.match_confidence.to_string(),
|
||||
confidence: rm.m.rule.confidence().to_string(),
|
||||
entropy: format!("{:.2}", rm.m.calculated_entropy),
|
||||
validation: ValidationInfo { status: validation_status, response: response_body },
|
||||
language: rm
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ mod tests {
|
|||
use crate::cli::commands::github::GitHistoryMode;
|
||||
use crate::cli::commands::rules::RuleSpecifierArgs;
|
||||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||||
use crate::rules::rule::{Confidence, Rule, RuleSyntax};
|
||||
use crate::util::intern;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
|
|
@ -49,6 +50,7 @@ mod tests {
|
|||
origin::Origin,
|
||||
reporter::styles::Styles,
|
||||
};
|
||||
use smallvec::smallvec;
|
||||
use std::{
|
||||
io::Cursor,
|
||||
path::PathBuf,
|
||||
|
|
@ -132,12 +134,21 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn create_mock_match(
|
||||
rule_name: &str,
|
||||
rule_text_id: &str,
|
||||
rule_finding_fingerprint: &str,
|
||||
validation_success: bool,
|
||||
) -> Match {
|
||||
fn create_mock_match(rule_name: &str, rule_text_id: &str, validation_success: bool) -> Match {
|
||||
let syntax = RuleSyntax {
|
||||
name: rule_name.to_string(),
|
||||
id: rule_text_id.to_string(),
|
||||
pattern: "dummy".to_string(),
|
||||
min_entropy: 0.0,
|
||||
confidence: Confidence::Medium,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
};
|
||||
let rule = Arc::new(Rule::new(syntax));
|
||||
Match {
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 10, end: 20 },
|
||||
|
|
@ -147,20 +158,17 @@ mod tests {
|
|||
},
|
||||
},
|
||||
groups: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: Some("token".to_string()),
|
||||
match_number: 1,
|
||||
start: 10,
|
||||
end: 20,
|
||||
value: "mock_token".into(),
|
||||
value: intern("mock_token"),
|
||||
}],
|
||||
},
|
||||
blob_id: BlobId::new(b"mock_blob"),
|
||||
finding_fingerprint: 0123,
|
||||
rule_finding_fingerprint: intern(rule_finding_fingerprint),
|
||||
rule_text_id: intern(rule_text_id),
|
||||
rule_name: intern(rule_name),
|
||||
rule_confidence: Confidence::Medium,
|
||||
rule,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
validation_success,
|
||||
|
|
@ -177,7 +185,6 @@ mod tests {
|
|||
id: BlobId::new(b"mock_blob"),
|
||||
num_bytes: 1024,
|
||||
mime_essence: Some("text/plain".to_string()),
|
||||
charset: Some("UTF-8".to_string()),
|
||||
language: Some("Rust".to_string()),
|
||||
};
|
||||
let dedup = true;
|
||||
|
|
@ -204,15 +211,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_json_format() -> Result<()> {
|
||||
let mock_match =
|
||||
create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
let mock_match = create_mock_match("MockRule", "mock_rule_1", true);
|
||||
let matches = vec![ReportMatch {
|
||||
origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]),
|
||||
blob_metadata: BlobMetadata {
|
||||
id: BlobId::new(b"mock_blob"),
|
||||
num_bytes: 1024,
|
||||
mime_essence: Some("text/plain".to_string()),
|
||||
charset: Some("UTF-8".to_string()),
|
||||
language: Some("Rust".to_string()),
|
||||
},
|
||||
m: mock_match,
|
||||
|
|
@ -238,12 +243,7 @@ mod tests {
|
|||
fn test_validation_status_in_json() -> Result<()> {
|
||||
let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")];
|
||||
for (validation_success, expected_status) in test_cases {
|
||||
let mock_match = create_mock_match(
|
||||
"MockRule",
|
||||
"mock_rule_1",
|
||||
"mock_finding_fingerprint",
|
||||
validation_success,
|
||||
);
|
||||
let mock_match = create_mock_match("MockRule", "mock_rule_1", validation_success);
|
||||
let matches = vec![ReportMatch {
|
||||
origin: OriginSet::new(
|
||||
Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
|
|
@ -253,7 +253,6 @@ mod tests {
|
|||
id: BlobId::new(b"mock_blob"),
|
||||
num_bytes: 1024,
|
||||
mime_essence: Some("text/plain".to_string()),
|
||||
charset: Some("UTF-8".to_string()),
|
||||
language: Some("Rust".to_string()),
|
||||
},
|
||||
m: mock_match,
|
||||
|
|
|
|||
|
|
@ -175,9 +175,11 @@ impl Docker {
|
|||
|
||||
pb.set_length(layer_paths.len() as u64);
|
||||
for p in layer_paths {
|
||||
let mut data = Vec::new();
|
||||
File::open(&p)?.read_to_end(&mut data)?;
|
||||
let digest = format!("{:x}", Sha256::digest(&data));
|
||||
let mut file = File::open(&p)?;
|
||||
let mut hasher = Sha256::new();
|
||||
std::io::copy(&mut file, &mut hasher)?;
|
||||
let digest = format!("{:x}", hasher.finalize());
|
||||
|
||||
let new_path = out_dir.join(format!("layer_{digest}.tar"));
|
||||
std::fs::rename(&p, &new_path)?;
|
||||
// extract layer contents so inner filenames appear in scan results
|
||||
|
|
|
|||
|
|
@ -172,10 +172,15 @@ pub fn enumerate_filesystem_inputs(
|
|||
};
|
||||
if should_skip {
|
||||
progress.suspend(|| {
|
||||
let path = origin
|
||||
.first()
|
||||
.blob_path()
|
||||
.map(|p| p.display().to_string())
|
||||
.unwrap_or_else(|| blob.temp_id().to_string());
|
||||
if is_archive {
|
||||
debug!("Skipping archive: {}", blob.id);
|
||||
debug!("Skipping archive: {path}");
|
||||
} else {
|
||||
debug!("Skipping binary blob: {}", blob.id);
|
||||
debug!("Skipping binary blob: {path}");
|
||||
}
|
||||
});
|
||||
return Ok(());
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ use crate::{
|
|||
Path,
|
||||
};
|
||||
|
||||
const LOCATION_LIMIT_BYTES: usize = 256 * 1024 * 1024;
|
||||
|
||||
/// A matcher along with parameters that remain constant during a single
|
||||
/// `scan` run
|
||||
pub struct BlobProcessor<'a> {
|
||||
|
|
@ -27,8 +29,7 @@ impl<'a> BlobProcessor<'a> {
|
|||
redact: bool,
|
||||
no_base64: bool,
|
||||
) -> Result<Option<DatastoreMessage>> {
|
||||
let blob_id = blob.id.hex();
|
||||
let _span = debug_span!("matcher", blob_id).entered();
|
||||
let _span = debug_span!("matcher", temp_id = blob.temp_id()).entered();
|
||||
let t1 = Instant::now();
|
||||
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup, no_base64)?;
|
||||
let scan_us = t1.elapsed().as_micros();
|
||||
|
|
@ -43,10 +44,9 @@ impl<'a> BlobProcessor<'a> {
|
|||
ScanResult::SeenWithMatches => {
|
||||
trace!("({scan_us}us) blob already scanned with matches");
|
||||
let metadata = BlobMetadata {
|
||||
id: blob.id,
|
||||
id: blob.id(),
|
||||
num_bytes: blob.len(),
|
||||
mime_essence: None,
|
||||
charset: None,
|
||||
language: None,
|
||||
};
|
||||
Ok(Some((origin, metadata, Vec::new())))
|
||||
|
|
@ -63,10 +63,9 @@ impl<'a> BlobProcessor<'a> {
|
|||
}
|
||||
let md = MetadataResult::from_blob_and_origin(&blob, &origin);
|
||||
let metadata = BlobMetadata {
|
||||
id: blob.id,
|
||||
id: blob.id(),
|
||||
num_bytes: blob.len(),
|
||||
mime_essence: md.mime_essence,
|
||||
charset: md.charset,
|
||||
language: md.language,
|
||||
};
|
||||
|
||||
|
|
@ -84,12 +83,17 @@ impl<'a> BlobProcessor<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
let loc_mapping = LocationMapping::new(&blob.bytes());
|
||||
let bytes = blob.bytes();
|
||||
let loc_mapping = if bytes.len() <= LOCATION_LIMIT_BYTES {
|
||||
Some(LocationMapping::new(bytes))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let converted_matches: Vec<(Option<f64>, Match)> = matches
|
||||
.into_iter()
|
||||
.map(|m| {
|
||||
let converted_match = Match::convert_owned_blobmatch_to_match(
|
||||
&loc_mapping,
|
||||
loc_mapping.as_ref(),
|
||||
&OwnedBlobMatch::from_blob_match(m),
|
||||
origin_type,
|
||||
);
|
||||
|
|
@ -106,7 +110,6 @@ impl<'a> BlobProcessor<'a> {
|
|||
struct MetadataResult {
|
||||
mime_essence: Option<String>,
|
||||
language: Option<String>,
|
||||
charset: Option<String>,
|
||||
}
|
||||
impl MetadataResult {
|
||||
fn from_blob_and_origin(blob: &Blob, origin: &OriginSet) -> MetadataResult {
|
||||
|
|
@ -115,7 +118,6 @@ impl MetadataResult {
|
|||
let mime_essence = Some(tree_magic_mini::from_u8(bytes).to_string());
|
||||
let inspector = ContentInspector::default();
|
||||
let language = blob_path.and_then(|p| inspector.guess_language(p, bytes));
|
||||
let charset = inspector.guess_charset(bytes);
|
||||
MetadataResult { mime_essence, language, charset }
|
||||
MetadataResult { mime_essence, language }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use futures::{stream, StreamExt};
|
|||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use liquid::Parser;
|
||||
use reqwest::{Client, StatusCode};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tokio::{sync::Notify, time::timeout};
|
||||
|
||||
use crate::{
|
||||
|
|
@ -21,7 +21,6 @@ use crate::{
|
|||
findings_store::{FindingsStore, FindingsStoreMessage},
|
||||
location::OffsetSpan,
|
||||
matcher::{Match, OwnedBlobMatch},
|
||||
rules::rule,
|
||||
validation::{collect_variables_and_dependencies, validate_single_match, CachedResponse},
|
||||
};
|
||||
|
||||
|
|
@ -40,7 +39,7 @@ pub async fn run_secret_validation(
|
|||
let fail_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
// ── 2. Fetch rules + matches ────────────────────────────────────────────
|
||||
let (all_rules, all_matches_by_blob) = {
|
||||
let (_all_rules, all_matches_by_blob) = {
|
||||
let ds = datastore.lock().unwrap();
|
||||
let rules = ds.get_rules()?;
|
||||
let mut map: FxHashMap<BlobId, Vec<Arc<FindingsStoreMessage>>> = FxHashMap::default();
|
||||
|
|
@ -51,16 +50,10 @@ pub async fn run_secret_validation(
|
|||
};
|
||||
|
||||
// ── 3. Partition blobs ──────────────────────────────────────────────────
|
||||
let rules_with_deps: FxHashSet<&str> = all_rules
|
||||
.iter()
|
||||
.filter(|r| !r.syntax().depends_on_rule.is_empty())
|
||||
.map(|r| r.id())
|
||||
.collect();
|
||||
|
||||
let mut simple_matches = Vec::new();
|
||||
let mut dependent_blobs = FxHashMap::default(); // blob_id -- Vec<Arc<…>>
|
||||
for (blob_id, matches) in all_matches_by_blob {
|
||||
if matches.iter().any(|m| rules_with_deps.contains(m.2.rule_text_id)) {
|
||||
if matches.iter().any(|m| !m.2.rule.syntax().depends_on_rule.is_empty()) {
|
||||
dependent_blobs.insert(blob_id, matches);
|
||||
} else {
|
||||
simple_matches.extend(matches);
|
||||
|
|
@ -80,11 +73,8 @@ pub async fn run_secret_validation(
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| arc_msg.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.value.as_ref());
|
||||
groups
|
||||
.entry(format!("{}|{}", arc_msg.2.rule_text_id, secret))
|
||||
.or_default()
|
||||
.push(arc_msg);
|
||||
.map_or("", |c| c.value);
|
||||
groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg);
|
||||
}
|
||||
|
||||
let validation_results = DashMap::<String, CachedResponse>::new();
|
||||
|
|
@ -109,7 +99,6 @@ pub async fn run_secret_validation(
|
|||
let client = client.clone();
|
||||
let cache_glob = cache.clone();
|
||||
let val_res = &validation_results;
|
||||
let rules = &all_rules;
|
||||
let success = success_count.clone();
|
||||
let fail = fail_count.clone();
|
||||
// *** FIX: Clone the progress bar for each concurrent task ***
|
||||
|
|
@ -122,8 +111,8 @@ pub async fn run_secret_validation(
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rep_arc.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.value.as_ref());
|
||||
let key = format!("{}|{}", rep_arc.2.rule_text_id, secret);
|
||||
.map_or("", |c| c.value);
|
||||
let key = format!("{}|{}", rep_arc.2.rule.id(), secret);
|
||||
|
||||
match val_res.entry(key.clone()) {
|
||||
dashmap::mapref::entry::Entry::Occupied(_) => return,
|
||||
|
|
@ -138,8 +127,10 @@ pub async fn run_secret_validation(
|
|||
}
|
||||
}
|
||||
|
||||
let rule = find_rule_for_match(rules, rep_arc.2.rule_text_id).unwrap();
|
||||
let mut om = OwnedBlobMatch::convert_match_to_owned_blobmatch(&rep_arc.2, rule);
|
||||
let mut om = OwnedBlobMatch::convert_match_to_owned_blobmatch(
|
||||
&rep_arc.2,
|
||||
rep_arc.2.rule.clone(),
|
||||
);
|
||||
|
||||
validate_single(
|
||||
&mut om,
|
||||
|
|
@ -211,7 +202,6 @@ pub async fn run_secret_validation(
|
|||
|
||||
let val_cache = Arc::new(DashMap::<String, CachedResponse>::new());
|
||||
let in_flight = Arc::new(DashMap::<String, ()>::new());
|
||||
let rules_ref = Arc::new(all_rules.clone());
|
||||
|
||||
for chunk in blob_ids.chunks(chunk_size) {
|
||||
let tasks: Vec<_> = chunk
|
||||
|
|
@ -225,15 +215,15 @@ pub async fn run_secret_validation(
|
|||
let success = success_count.clone();
|
||||
let fail = fail_count.clone();
|
||||
let cache_glob = cache.clone();
|
||||
let rules = rules_ref.clone();
|
||||
|
||||
async move {
|
||||
let owned = matches_for_blob
|
||||
.iter()
|
||||
.map(|arc_msg| {
|
||||
let rule = find_rule_for_match(&rules, arc_msg.2.rule_text_id)
|
||||
.expect("rule");
|
||||
OwnedBlobMatch::convert_match_to_owned_blobmatch(&arc_msg.2, rule)
|
||||
OwnedBlobMatch::convert_match_to_owned_blobmatch(
|
||||
&arc_msg.2,
|
||||
arc_msg.2.rule.clone(),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
@ -338,21 +328,6 @@ pub async fn run_secret_validation(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns `Some(Arc<Rule>)` if a matching rule is found; otherwise returns `None`.
|
||||
/// Callers can decide how to handle the `None` case (e.g., skip processing).
|
||||
fn find_rule_for_match(
|
||||
all_rules: &[Arc<rule::Rule>],
|
||||
rule_text_id: &str,
|
||||
) -> Option<Arc<rule::Rule>> {
|
||||
match all_rules.iter().find(|r| r.syntax().id == rule_text_id).cloned() {
|
||||
Some(rule) => Some(rule),
|
||||
None => {
|
||||
eprintln!("Warning: no rule found with id '{}'. Skipping.", rule_text_id);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
// The core validation logic, used in an async pipeline
|
||||
// ---------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -129,16 +129,29 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt
|
|||
.apply_to(&format!("Updated to version {}", status.version()))
|
||||
),
|
||||
Err(e) => match e {
|
||||
UpdError::Io(ref io_err) if io_err.kind() == ErrorKind::PermissionDenied => {
|
||||
warn!(
|
||||
"{}",
|
||||
styles.style_finding_active_heading.apply_to(
|
||||
"Cannot replace the current binary - permission denied.\n\
|
||||
If you installed via a package manager, run its upgrade command.\n\
|
||||
Otherwise reinstall to a user-writable directory or re-run with sudo."
|
||||
)
|
||||
);
|
||||
}
|
||||
UpdError::Io(ref io_err) => match io_err.kind() {
|
||||
ErrorKind::PermissionDenied => {
|
||||
warn!(
|
||||
"{}",
|
||||
styles.style_finding_active_heading.apply_to(
|
||||
"Cannot replace the current binary - permission denied.\n\
|
||||
If you installed via a package manager, run its upgrade command.\n\
|
||||
Otherwise reinstall to a user-writable directory or re-run with sudo."
|
||||
)
|
||||
);
|
||||
}
|
||||
ErrorKind::NotFound => {
|
||||
warn!(
|
||||
"{}",
|
||||
styles.style_finding_active_heading.apply_to(
|
||||
"Cannot replace the current binary - file not found.\n\
|
||||
If you installed via a package manager, run its upgrade command.\n\
|
||||
Otherwise reinstall to a user-writable directory."
|
||||
)
|
||||
);
|
||||
}
|
||||
_ => error!("Failed to update: {e}"),
|
||||
},
|
||||
_ => error!("Failed to update: {e}"),
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -910,6 +910,7 @@ mod tests {
|
|||
use crossbeam_skiplist::SkipMap;
|
||||
use http::StatusCode;
|
||||
use rustc_hash::FxHashMap;
|
||||
use smallvec::smallvec;
|
||||
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
|
|
@ -920,6 +921,7 @@ mod tests {
|
|||
rule::{Confidence, Rule},
|
||||
Rules,
|
||||
},
|
||||
util::intern,
|
||||
validation::{validate_single_match, Cache},
|
||||
};
|
||||
#[tokio::test]
|
||||
|
|
@ -1016,12 +1018,12 @@ rules:
|
|||
// matching_input: token.as_bytes().to_vec(),
|
||||
matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
|
||||
captures: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: Some("TOKEN".to_string()),
|
||||
match_number: -1,
|
||||
start: 0,
|
||||
end: token.len(),
|
||||
value: token.into(),
|
||||
value: intern(token),
|
||||
}],
|
||||
},
|
||||
validation_response_body: String::new(),
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -22,7 +22,7 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String,
|
|||
.map(|cap| {
|
||||
let name =
|
||||
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
|
||||
(name, cap.value.clone().into_owned(), cap.start, cap.end)
|
||||
(name, cap.value.to_string(), cap.start, cap.end)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
|
@ -76,16 +76,17 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||||
use pretty_assertions::assert_eq;
|
||||
use smallvec::smallvec;
|
||||
|
||||
#[test]
|
||||
fn single_unnamed_capture_is_returned() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "abc".into(),
|
||||
value: "abc",
|
||||
}],
|
||||
};
|
||||
let result = process_captures(&captures);
|
||||
|
|
@ -95,20 +96,20 @@ mod tests {
|
|||
#[test]
|
||||
fn skips_whole_match_when_multiple() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: vec![
|
||||
captures: smallvec![
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 5,
|
||||
value: "abcde".into(),
|
||||
value: "abcde",
|
||||
},
|
||||
SerializableCapture {
|
||||
name: Some("foo".to_string()),
|
||||
match_number: -1,
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "bcd".into(),
|
||||
value: "bcd",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
@ -119,28 +120,22 @@ mod tests {
|
|||
#[test]
|
||||
fn includes_unnamed_groups_but_skips_whole_match() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: vec![
|
||||
captures: smallvec![
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 6,
|
||||
value: "aabbcc".into(),
|
||||
value: "aabbcc",
|
||||
},
|
||||
SerializableCapture {
|
||||
name: Some("foo".to_string()),
|
||||
match_number: -1,
|
||||
start: 0,
|
||||
end: 2,
|
||||
value: "aa".into(),
|
||||
},
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 1,
|
||||
start: 4,
|
||||
end: 6,
|
||||
value: "cc".into(),
|
||||
value: "aa",
|
||||
},
|
||||
SerializableCapture { name: None, match_number: 1, start: 4, end: 6, value: "cc" },
|
||||
],
|
||||
};
|
||||
let result = process_captures(&captures);
|
||||
|
|
|
|||
|
|
@ -14,11 +14,27 @@ use kingfisher::{
|
|||
matcher::{Match, SerializableCapture, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
reporter::{styles::Styles, DetailsReporter, ReportMatch},
|
||||
rules::rule::Confidence,
|
||||
rules::rule::{Confidence, Rule, RuleSyntax},
|
||||
util::intern,
|
||||
};
|
||||
use smallvec::smallvec;
|
||||
// ---- helpers -------------------------------------------------------------------------------
|
||||
|
||||
fn make_match(fp: u64) -> Match {
|
||||
let syntax = RuleSyntax {
|
||||
name: "Example Rule".to_string(),
|
||||
id: "RULE.1".to_string(),
|
||||
pattern: "dummy".to_string(),
|
||||
min_entropy: 0.0,
|
||||
confidence: Confidence::Medium,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
};
|
||||
let rule = Arc::new(Rule::new(syntax));
|
||||
Match {
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 0, end: 10 },
|
||||
|
|
@ -28,20 +44,17 @@ fn make_match(fp: u64) -> Match {
|
|||
},
|
||||
},
|
||||
groups: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 10,
|
||||
value: "dummy".into(),
|
||||
value: intern("dummy"),
|
||||
}],
|
||||
},
|
||||
blob_id: BlobId::new(b"dummy"),
|
||||
finding_fingerprint: fp,
|
||||
rule_finding_fingerprint: "structural.1".into(),
|
||||
rule_text_id: "RULE.1".into(),
|
||||
rule_name: "Example Rule".into(),
|
||||
rule_confidence: Confidence::Medium,
|
||||
rule,
|
||||
validation_response_body: String::new(),
|
||||
validation_response_status: 0,
|
||||
validation_success: false,
|
||||
|
|
@ -106,7 +119,6 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> {
|
|||
id: BlobId::new(b"dummy"),
|
||||
num_bytes: 10,
|
||||
mime_essence: None,
|
||||
charset: None,
|
||||
language: None,
|
||||
},
|
||||
m: m1,
|
||||
|
|
@ -123,7 +135,6 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> {
|
|||
id: BlobId::new(b"dummy"),
|
||||
num_bytes: 10,
|
||||
mime_essence: None,
|
||||
charset: None,
|
||||
language: None,
|
||||
},
|
||||
m: m2,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue