forked from mirrors/kingfisher
Optimized memory usage via string interning and extensive data sharing
This commit is contained in:
parent
23102f4b59
commit
c3513ea206
12 changed files with 114 additions and 125 deletions
|
|
@ -3,7 +3,8 @@
|
|||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [Unreleased]
|
||||
- Improved error message when self-update cannot find the current binary.
|
||||
- Improved error message when self-update cannot find the current binary
|
||||
- Optimized memory usage via string interning and extensive data sharing
|
||||
|
||||
## [1.47.0]
|
||||
- MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them
|
||||
|
|
|
|||
|
|
@ -96,9 +96,8 @@ impl ContentInspector {
|
|||
#[inline]
|
||||
#[must_use]
|
||||
pub fn guess_charset(&self, bytes: &[u8]) -> Option<String> {
|
||||
String::from_utf8(bytes.to_vec()).ok().map(|_| "UTF-8".to_string())
|
||||
std::str::from_utf8(bytes).ok().map(|_| "UTF-8".to_string())
|
||||
}
|
||||
|
||||
/// Guess programming language with broad coverage using `tokei`.
|
||||
///
|
||||
/// Strategy (no disk I/O):
|
||||
|
|
|
|||
|
|
@ -162,10 +162,10 @@ fn handle_zip_archive_streaming(
|
|||
}
|
||||
|
||||
fn handle_asar_archive_in_memory(
|
||||
buffer: Vec<u8>,
|
||||
buffer: &[u8],
|
||||
archive_path: &Path,
|
||||
) -> Result<CompressedContent> {
|
||||
match AsarReader::new(&buffer, None) {
|
||||
match AsarReader::new(buffer, None) {
|
||||
Ok(reader) => {
|
||||
let mut contents = Vec::new();
|
||||
for (path_in_asar, file) in reader.files() {
|
||||
|
|
@ -200,7 +200,7 @@ fn decompress_once(path: &Path, base_dir: Option<&Path>) -> Result<CompressedCon
|
|||
match ext {
|
||||
"asar" => {
|
||||
let mmap = unsafe { Mmap::map(&file)? };
|
||||
return handle_asar_archive_in_memory(mmap.to_vec(), path);
|
||||
return handle_asar_archive_in_memory(&mmap, path);
|
||||
}
|
||||
"tar" => {
|
||||
if let Some(base) = base_dir {
|
||||
|
|
@ -525,4 +525,4 @@ mod tests {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ use crate::{
|
|||
matcher::Match,
|
||||
origin::{Origin, OriginSet},
|
||||
rules::rule::Rule,
|
||||
util::intern,
|
||||
};
|
||||
|
||||
// share with Arc so every blob/origin is materialised once
|
||||
|
|
@ -151,7 +152,7 @@ impl FindingsStore {
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| m.groups.captures.get(0))
|
||||
.map_or("", |c| c.value.as_ref());
|
||||
.map_or("", |c| c.value);
|
||||
|
||||
let origin_kind = match origin.first() {
|
||||
Origin::GitRepo(_) => "git",
|
||||
|
|
@ -160,7 +161,7 @@ impl FindingsStore {
|
|||
};
|
||||
|
||||
let key = xxh3_64(
|
||||
format!("{}|{}|{}", m.rule_text_id.to_uppercase(), origin_kind, snippet)
|
||||
format!("{}|{}|{}", m.rule.id().to_uppercase(), origin_kind, snippet)
|
||||
.as_bytes(),
|
||||
);
|
||||
|
||||
|
|
@ -280,7 +281,7 @@ impl FindingsStore {
|
|||
pub fn get_summary(&self) -> FxHashMap<&'static str, usize> {
|
||||
self.matches.iter().fold(FxHashMap::default(), |mut acc, msg| {
|
||||
let (_, _, m) = &**msg;
|
||||
*acc.entry(m.rule_name).or_insert(0) += 1; // borrow, no alloc
|
||||
*acc.entry(intern(m.rule.name())).or_insert(0) += 1;
|
||||
acc
|
||||
})
|
||||
}
|
||||
|
|
@ -342,13 +343,13 @@ impl FindingsStore {
|
|||
self.matches.iter().map(|msg| {
|
||||
let (_, _, match_item) = &**msg;
|
||||
finding_data::FindingMetadata {
|
||||
rule_name: match_item.rule_name.to_string(),
|
||||
rule_name: match_item.rule.name().to_string(),
|
||||
num_matches: 1,
|
||||
comment: None,
|
||||
visible: match_item.visible,
|
||||
finding_id: match_item.finding_id(),
|
||||
rule_finding_fingerprint: match_item.rule_finding_fingerprint.to_string(),
|
||||
rule_text_id: match_item.rule_text_id.to_string(),
|
||||
rule_finding_fingerprint: match_item.rule.finding_sha1_fingerprint().to_string(),
|
||||
rule_text_id: match_item.rule.id().to_string(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -362,7 +363,7 @@ impl FindingsStore {
|
|||
.iter()
|
||||
.filter(|msg| {
|
||||
let (_, _, match_item) = &***msg;
|
||||
match_item.rule_name == metadata.rule_name
|
||||
match_item.rule.name() == metadata.rule_name
|
||||
})
|
||||
.map(|msg| {
|
||||
let (origin, blob_metadata, match_item) = &**msg;
|
||||
|
|
@ -373,7 +374,7 @@ impl FindingsStore {
|
|||
match_id: MatchIdInt::from_str(&match_item.finding_id())?,
|
||||
match_comment: None,
|
||||
visible: match_item.visible,
|
||||
match_confidence: match_item.rule_confidence,
|
||||
match_confidence: match_item.rule.confidence(),
|
||||
validation_response_body: match_item.validation_response_body.clone(),
|
||||
validation_response_status: match_item.validation_response_status,
|
||||
validation_success: match_item.validation_success,
|
||||
|
|
|
|||
|
|
@ -682,20 +682,20 @@ impl JsonSchema for Groups {
|
|||
// pub end: usize, // End position of the match
|
||||
// pub value: String, // The actual captured value
|
||||
// }
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, JsonSchema)]
|
||||
pub struct SerializableCapture {
|
||||
pub name: Option<String>,
|
||||
pub match_number: i32,
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
// Instead of storing an owned String, store a borrowed or interned value.
|
||||
// Here we use Cow to allow either borrowing or owning as needed.
|
||||
pub value: std::borrow::Cow<'static, str>,
|
||||
/// Interned value of the capture.
|
||||
pub value: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, JsonSchema)]
|
||||
pub struct SerializableCaptures {
|
||||
pub captures: Vec<SerializableCapture>, // All captures (named and unnamed)
|
||||
#[schemars(with = "Vec<SerializableCapture>")]
|
||||
pub captures: SmallVec<[SerializableCapture; 2]>, // All captures (named and unnamed)
|
||||
}
|
||||
impl SerializableCaptures {
|
||||
pub fn from_captures(
|
||||
|
|
@ -704,7 +704,7 @@ impl SerializableCaptures {
|
|||
re: &Regex,
|
||||
redact: bool,
|
||||
) -> Self {
|
||||
let mut serialized_captures = Vec::new();
|
||||
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
|
||||
// Process named captures
|
||||
for name in re.capture_names().flatten() {
|
||||
if let Some(capture) = captures.name(name) {
|
||||
|
|
@ -718,7 +718,7 @@ impl SerializableCaptures {
|
|||
match_number: -1,
|
||||
start: capture.start(),
|
||||
end: capture.end(),
|
||||
value: value.into(),
|
||||
value: intern(&value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -735,7 +735,7 @@ impl SerializableCaptures {
|
|||
match_number: i32::try_from(i).unwrap_or(0),
|
||||
start: capture.start(),
|
||||
end: capture.end(),
|
||||
value: value.into(),
|
||||
value: intern(&value),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -764,16 +764,9 @@ pub struct Match {
|
|||
pub finding_fingerprint: u64,
|
||||
|
||||
/// The rule that produced this match
|
||||
pub rule_finding_fingerprint: &'static str,
|
||||
|
||||
/// The text identifier of the rule that produced this match
|
||||
pub rule_text_id: &'static str,
|
||||
|
||||
/// The name of the rule that produced this match
|
||||
pub rule_name: &'static str,
|
||||
|
||||
/// The confidence property of the rule that produced this match
|
||||
pub rule_confidence: crate::rules::rule::Confidence,
|
||||
#[serde(skip_serializing)]
|
||||
#[schemars(skip)]
|
||||
pub rule: Arc<Rule>,
|
||||
|
||||
/// Validation Body
|
||||
pub validation_response_body: String,
|
||||
|
|
@ -813,8 +806,6 @@ impl Match {
|
|||
let finding_value_for_fp = std::str::from_utf8(matching_finding_bytes).unwrap_or("");
|
||||
|
||||
let source_span = loc_mapping.get_source_span(&offset_span);
|
||||
let rule_finding_fingerprint = owned_blob_match.rule.finding_sha1_fingerprint().to_owned();
|
||||
|
||||
let offset_start: u64 =
|
||||
owned_blob_match.matching_input_offset_span.start.try_into().unwrap();
|
||||
let offset_end: u64 = owned_blob_match.matching_input_offset_span.end.try_into().unwrap();
|
||||
|
|
@ -828,10 +819,7 @@ impl Match {
|
|||
|
||||
// matching_snippet
|
||||
Match {
|
||||
rule_finding_fingerprint: intern(&rule_finding_fingerprint),
|
||||
rule_name: intern(owned_blob_match.rule.name()),
|
||||
rule_confidence: owned_blob_match.rule.confidence(),
|
||||
rule_text_id: intern(owned_blob_match.rule.id()),
|
||||
rule: owned_blob_match.rule.clone(),
|
||||
visible: owned_blob_match.rule.visible().to_owned(),
|
||||
location: Location { offset_span, source_span: source_span.clone() },
|
||||
groups: owned_blob_match.captures.clone(),
|
||||
|
|
@ -852,7 +840,7 @@ impl Match {
|
|||
|
||||
pub fn finding_id(&self) -> String {
|
||||
let mut h = Sha1::new();
|
||||
write!(&mut h, "{}\0", self.rule_finding_fingerprint)
|
||||
write!(&mut h, "{}\0", self.rule.finding_sha1_fingerprint())
|
||||
.expect("should be able to write to memory");
|
||||
serde_json::to_writer(&mut h, &self.groups)
|
||||
.expect("should be able to serialize groups as JSON");
|
||||
|
|
|
|||
|
|
@ -216,7 +216,7 @@ impl DetailsReporter {
|
|||
m: match_item.clone(),
|
||||
comment: None,
|
||||
visible: match_item.visible,
|
||||
match_confidence: match_item.rule_confidence,
|
||||
match_confidence: match_item.rule.confidence(),
|
||||
validation_response_body: match_item.validation_response_body.clone(),
|
||||
validation_response_status: match_item.validation_response_status,
|
||||
validation_success: match_item.validation_success,
|
||||
|
|
@ -366,13 +366,13 @@ impl DetailsReporter {
|
|||
|
||||
FindingReporterRecord {
|
||||
rule: RuleMetadata {
|
||||
name: rm.m.rule_name.to_string(),
|
||||
id: rm.m.rule_text_id.to_string(),
|
||||
name: rm.m.rule.name().to_string(),
|
||||
id: rm.m.rule.id().to_string(),
|
||||
},
|
||||
finding: FindingRecordData {
|
||||
snippet,
|
||||
fingerprint: rm.m.finding_fingerprint.to_string(),
|
||||
confidence: rm.match_confidence.to_string(),
|
||||
confidence: rm.m.rule.confidence().to_string(),
|
||||
entropy: format!("{:.2}", rm.m.calculated_entropy),
|
||||
validation: ValidationInfo { status: validation_status, response: response_body },
|
||||
language: rm
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ mod tests {
|
|||
use crate::cli::commands::github::GitHistoryMode;
|
||||
use crate::cli::commands::rules::RuleSpecifierArgs;
|
||||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||||
use crate::rules::rule::{Rule, RuleSyntax, Confidence};
|
||||
use crate::util::intern;
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
|
|
@ -54,6 +55,7 @@ mod tests {
|
|||
path::PathBuf,
|
||||
sync::{Arc, Mutex},
|
||||
};
|
||||
use smallvec::smallvec;
|
||||
use url::Url;
|
||||
fn create_default_args() -> cli::commands::scan::ScanArgs {
|
||||
use crate::cli::commands::gitlab::GitLabRepoType; // bring enum into scope
|
||||
|
|
@ -132,12 +134,21 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
fn create_mock_match(
|
||||
rule_name: &str,
|
||||
rule_text_id: &str,
|
||||
rule_finding_fingerprint: &str,
|
||||
validation_success: bool,
|
||||
) -> Match {
|
||||
fn create_mock_match(rule_name: &str, rule_text_id: &str, validation_success: bool) -> Match {
|
||||
let syntax = RuleSyntax {
|
||||
name: rule_name.to_string(),
|
||||
id: rule_text_id.to_string(),
|
||||
pattern: "dummy".to_string(),
|
||||
min_entropy: 0.0,
|
||||
confidence: Confidence::Medium,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
};
|
||||
let rule = Arc::new(Rule::new(syntax));
|
||||
Match {
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 10, end: 20 },
|
||||
|
|
@ -147,20 +158,17 @@ mod tests {
|
|||
},
|
||||
},
|
||||
groups: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: Some("token".to_string()),
|
||||
match_number: 1,
|
||||
start: 10,
|
||||
end: 20,
|
||||
value: "mock_token".into(),
|
||||
value: intern("mock_token"),
|
||||
}],
|
||||
},
|
||||
blob_id: BlobId::new(b"mock_blob"),
|
||||
finding_fingerprint: 0123,
|
||||
rule_finding_fingerprint: intern(rule_finding_fingerprint),
|
||||
rule_text_id: intern(rule_text_id),
|
||||
rule_name: intern(rule_name),
|
||||
rule_confidence: Confidence::Medium,
|
||||
rule,
|
||||
validation_response_body: "validation response".to_string(),
|
||||
validation_response_status: 200,
|
||||
validation_success,
|
||||
|
|
@ -204,8 +212,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_json_format() -> Result<()> {
|
||||
let mock_match =
|
||||
create_mock_match("MockRule", "mock_rule_1", "mock_finding_fingerprint", true);
|
||||
let mock_match = create_mock_match("MockRule", "mock_rule_1", true);
|
||||
let matches = vec![ReportMatch {
|
||||
origin: OriginSet::new(Origin::from_file(PathBuf::from("/mock/path/file.rs")), vec![]),
|
||||
blob_metadata: BlobMetadata {
|
||||
|
|
@ -238,12 +245,8 @@ mod tests {
|
|||
fn test_validation_status_in_json() -> Result<()> {
|
||||
let test_cases = vec![(true, "Active Credential"), (false, "Inactive Credential")];
|
||||
for (validation_success, expected_status) in test_cases {
|
||||
let mock_match = create_mock_match(
|
||||
"MockRule",
|
||||
"mock_rule_1",
|
||||
"mock_finding_fingerprint",
|
||||
validation_success,
|
||||
);
|
||||
let mock_match =
|
||||
create_mock_match("MockRule", "mock_rule_1", validation_success);
|
||||
let matches = vec![ReportMatch {
|
||||
origin: OriginSet::new(
|
||||
Origin::from_file(PathBuf::from("/mock/path/file.rs")),
|
||||
|
|
|
|||
|
|
@ -175,9 +175,11 @@ impl Docker {
|
|||
|
||||
pb.set_length(layer_paths.len() as u64);
|
||||
for p in layer_paths {
|
||||
let mut data = Vec::new();
|
||||
File::open(&p)?.read_to_end(&mut data)?;
|
||||
let digest = format!("{:x}", Sha256::digest(&data));
|
||||
let mut file = File::open(&p)?;
|
||||
let mut hasher = Sha256::new();
|
||||
std::io::copy(&mut file, &mut hasher)?;
|
||||
let digest = format!("{:x}", hasher.finalize());
|
||||
|
||||
let new_path = out_dir.join(format!("layer_{digest}.tar"));
|
||||
std::fs::rename(&p, &new_path)?;
|
||||
// extract layer contents so inner filenames appear in scan results
|
||||
|
|
@ -280,4 +282,4 @@ mod tests {
|
|||
fn docker_struct_new() {
|
||||
let _ = Docker::new();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -13,7 +13,7 @@ use futures::{stream, StreamExt};
|
|||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use liquid::Parser;
|
||||
use reqwest::{Client, StatusCode};
|
||||
use rustc_hash::{FxHashMap, FxHashSet};
|
||||
use rustc_hash::FxHashMap;
|
||||
use tokio::{sync::Notify, time::timeout};
|
||||
|
||||
use crate::{
|
||||
|
|
@ -21,7 +21,6 @@ use crate::{
|
|||
findings_store::{FindingsStore, FindingsStoreMessage},
|
||||
location::OffsetSpan,
|
||||
matcher::{Match, OwnedBlobMatch},
|
||||
rules::rule,
|
||||
validation::{collect_variables_and_dependencies, validate_single_match, CachedResponse},
|
||||
};
|
||||
|
||||
|
|
@ -40,7 +39,7 @@ pub async fn run_secret_validation(
|
|||
let fail_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
// ── 2. Fetch rules + matches ────────────────────────────────────────────
|
||||
let (all_rules, all_matches_by_blob) = {
|
||||
let (_all_rules, all_matches_by_blob) = {
|
||||
let ds = datastore.lock().unwrap();
|
||||
let rules = ds.get_rules()?;
|
||||
let mut map: FxHashMap<BlobId, Vec<Arc<FindingsStoreMessage>>> = FxHashMap::default();
|
||||
|
|
@ -51,16 +50,13 @@ pub async fn run_secret_validation(
|
|||
};
|
||||
|
||||
// ── 3. Partition blobs ──────────────────────────────────────────────────
|
||||
let rules_with_deps: FxHashSet<&str> = all_rules
|
||||
.iter()
|
||||
.filter(|r| !r.syntax().depends_on_rule.is_empty())
|
||||
.map(|r| r.id())
|
||||
.collect();
|
||||
|
||||
let mut simple_matches = Vec::new();
|
||||
let mut dependent_blobs = FxHashMap::default(); // blob_id -- Vec<Arc<…>>
|
||||
for (blob_id, matches) in all_matches_by_blob {
|
||||
if matches.iter().any(|m| rules_with_deps.contains(m.2.rule_text_id)) {
|
||||
if matches
|
||||
.iter()
|
||||
.any(|m| !m.2.rule.syntax().depends_on_rule.is_empty())
|
||||
{
|
||||
dependent_blobs.insert(blob_id, matches);
|
||||
} else {
|
||||
simple_matches.extend(matches);
|
||||
|
|
@ -80,9 +76,9 @@ pub async fn run_secret_validation(
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| arc_msg.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.value.as_ref());
|
||||
.map_or("", |c| c.value);
|
||||
groups
|
||||
.entry(format!("{}|{}", arc_msg.2.rule_text_id, secret))
|
||||
.entry(format!("{}|{}", arc_msg.2.rule.id(), secret))
|
||||
.or_default()
|
||||
.push(arc_msg);
|
||||
}
|
||||
|
|
@ -109,7 +105,6 @@ pub async fn run_secret_validation(
|
|||
let client = client.clone();
|
||||
let cache_glob = cache.clone();
|
||||
let val_res = &validation_results;
|
||||
let rules = &all_rules;
|
||||
let success = success_count.clone();
|
||||
let fail = fail_count.clone();
|
||||
// *** FIX: Clone the progress bar for each concurrent task ***
|
||||
|
|
@ -122,8 +117,8 @@ pub async fn run_secret_validation(
|
|||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rep_arc.2.groups.captures.get(0))
|
||||
.map_or("", |c| c.value.as_ref());
|
||||
let key = format!("{}|{}", rep_arc.2.rule_text_id, secret);
|
||||
.map_or("", |c| c.value);
|
||||
let key = format!("{}|{}", rep_arc.2.rule.id(), secret);
|
||||
|
||||
match val_res.entry(key.clone()) {
|
||||
dashmap::mapref::entry::Entry::Occupied(_) => return,
|
||||
|
|
@ -138,8 +133,8 @@ pub async fn run_secret_validation(
|
|||
}
|
||||
}
|
||||
|
||||
let rule = find_rule_for_match(rules, rep_arc.2.rule_text_id).unwrap();
|
||||
let mut om = OwnedBlobMatch::convert_match_to_owned_blobmatch(&rep_arc.2, rule);
|
||||
let mut om =
|
||||
OwnedBlobMatch::convert_match_to_owned_blobmatch(&rep_arc.2, rep_arc.2.rule.clone());
|
||||
|
||||
validate_single(
|
||||
&mut om,
|
||||
|
|
@ -211,7 +206,6 @@ pub async fn run_secret_validation(
|
|||
|
||||
let val_cache = Arc::new(DashMap::<String, CachedResponse>::new());
|
||||
let in_flight = Arc::new(DashMap::<String, ()>::new());
|
||||
let rules_ref = Arc::new(all_rules.clone());
|
||||
|
||||
for chunk in blob_ids.chunks(chunk_size) {
|
||||
let tasks: Vec<_> = chunk
|
||||
|
|
@ -225,15 +219,15 @@ pub async fn run_secret_validation(
|
|||
let success = success_count.clone();
|
||||
let fail = fail_count.clone();
|
||||
let cache_glob = cache.clone();
|
||||
let rules = rules_ref.clone();
|
||||
|
||||
async move {
|
||||
let owned = matches_for_blob
|
||||
.iter()
|
||||
.map(|arc_msg| {
|
||||
let rule = find_rule_for_match(&rules, arc_msg.2.rule_text_id)
|
||||
.expect("rule");
|
||||
OwnedBlobMatch::convert_match_to_owned_blobmatch(&arc_msg.2, rule)
|
||||
OwnedBlobMatch::convert_match_to_owned_blobmatch(
|
||||
&arc_msg.2,
|
||||
arc_msg.2.rule.clone(),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
@ -338,21 +332,6 @@ pub async fn run_secret_validation(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns `Some(Arc<Rule>)` if a matching rule is found; otherwise returns `None`.
|
||||
/// Callers can decide how to handle the `None` case (e.g., skip processing).
|
||||
fn find_rule_for_match(
|
||||
all_rules: &[Arc<rule::Rule>],
|
||||
rule_text_id: &str,
|
||||
) -> Option<Arc<rule::Rule>> {
|
||||
match all_rules.iter().find(|r| r.syntax().id == rule_text_id).cloned() {
|
||||
Some(rule) => Some(rule),
|
||||
None => {
|
||||
eprintln!("Warning: no rule found with id '{}'. Skipping.", rule_text_id);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------
|
||||
// The core validation logic, used in an async pipeline
|
||||
// ---------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -910,6 +910,7 @@ mod tests {
|
|||
use crossbeam_skiplist::SkipMap;
|
||||
use http::StatusCode;
|
||||
use rustc_hash::FxHashMap;
|
||||
use smallvec::smallvec;
|
||||
|
||||
use crate::{
|
||||
blob::BlobId,
|
||||
|
|
@ -920,6 +921,7 @@ mod tests {
|
|||
rule::{Confidence, Rule},
|
||||
Rules,
|
||||
},
|
||||
util::intern,
|
||||
validation::{validate_single_match, Cache},
|
||||
};
|
||||
#[tokio::test]
|
||||
|
|
@ -1016,12 +1018,12 @@ rules:
|
|||
// matching_input: token.as_bytes().to_vec(),
|
||||
matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
|
||||
captures: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: Some("TOKEN".to_string()),
|
||||
match_number: -1,
|
||||
start: 0,
|
||||
end: token.len(),
|
||||
value: token.into(),
|
||||
value: intern(token),
|
||||
}],
|
||||
},
|
||||
validation_response_body: String::new(),
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String,
|
|||
.map(|cap| {
|
||||
let name =
|
||||
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
|
||||
(name, cap.value.clone().into_owned(), cap.start, cap.end)
|
||||
(name, cap.value.to_string(), cap.start, cap.end)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
|
@ -75,17 +75,18 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::E
|
|||
mod tests {
|
||||
use super::*;
|
||||
use crate::matcher::{SerializableCapture, SerializableCaptures};
|
||||
use smallvec::smallvec;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn single_unnamed_capture_is_returned() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "abc".into(),
|
||||
value: "abc",
|
||||
}],
|
||||
};
|
||||
let result = process_captures(&captures);
|
||||
|
|
@ -95,20 +96,20 @@ mod tests {
|
|||
#[test]
|
||||
fn skips_whole_match_when_multiple() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: vec![
|
||||
captures: smallvec![
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 5,
|
||||
value: "abcde".into(),
|
||||
value: "abcde",
|
||||
},
|
||||
SerializableCapture {
|
||||
name: Some("foo".to_string()),
|
||||
match_number: -1,
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "bcd".into(),
|
||||
value: "bcd",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
@ -119,27 +120,27 @@ mod tests {
|
|||
#[test]
|
||||
fn includes_unnamed_groups_but_skips_whole_match() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: vec![
|
||||
captures: smallvec![
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 6,
|
||||
value: "aabbcc".into(),
|
||||
value: "aabbcc",
|
||||
},
|
||||
SerializableCapture {
|
||||
name: Some("foo".to_string()),
|
||||
match_number: -1,
|
||||
start: 0,
|
||||
end: 2,
|
||||
value: "aa".into(),
|
||||
value: "aa",
|
||||
},
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 1,
|
||||
start: 4,
|
||||
end: 6,
|
||||
value: "cc".into(),
|
||||
value: "cc",
|
||||
},
|
||||
],
|
||||
};
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use std::{
|
|||
|
||||
use anyhow::Result;
|
||||
use gix::{date, ObjectId};
|
||||
use smallvec::smallvec;
|
||||
use kingfisher::{
|
||||
blob::{BlobId, BlobMetadata},
|
||||
findings_store::FindingsStore,
|
||||
|
|
@ -14,11 +15,26 @@ use kingfisher::{
|
|||
matcher::{Match, SerializableCapture, SerializableCaptures},
|
||||
origin::{Origin, OriginSet},
|
||||
reporter::{styles::Styles, DetailsReporter, ReportMatch},
|
||||
rules::rule::Confidence,
|
||||
rules::rule::{Confidence, Rule, RuleSyntax},
|
||||
util::intern,
|
||||
};
|
||||
// ---- helpers -------------------------------------------------------------------------------
|
||||
|
||||
fn make_match(fp: u64) -> Match {
|
||||
let syntax = RuleSyntax {
|
||||
name: "Example Rule".to_string(),
|
||||
id: "RULE.1".to_string(),
|
||||
pattern: "dummy".to_string(),
|
||||
min_entropy: 0.0,
|
||||
confidence: Confidence::Medium,
|
||||
visible: true,
|
||||
examples: vec![],
|
||||
negative_examples: vec![],
|
||||
references: vec![],
|
||||
validation: None,
|
||||
depends_on_rule: vec![],
|
||||
};
|
||||
let rule = Arc::new(Rule::new(syntax));
|
||||
Match {
|
||||
location: Location {
|
||||
offset_span: OffsetSpan { start: 0, end: 10 },
|
||||
|
|
@ -28,20 +44,17 @@ fn make_match(fp: u64) -> Match {
|
|||
},
|
||||
},
|
||||
groups: SerializableCaptures {
|
||||
captures: vec![SerializableCapture {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 10,
|
||||
value: "dummy".into(),
|
||||
value: intern("dummy"),
|
||||
}],
|
||||
},
|
||||
blob_id: BlobId::new(b"dummy"),
|
||||
finding_fingerprint: fp,
|
||||
rule_finding_fingerprint: "structural.1".into(),
|
||||
rule_text_id: "RULE.1".into(),
|
||||
rule_name: "Example Rule".into(),
|
||||
rule_confidence: Confidence::Medium,
|
||||
rule,
|
||||
validation_response_body: String::new(),
|
||||
validation_response_status: 0,
|
||||
validation_success: false,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue