diff --git a/CHANGELOG.md b/CHANGELOG.md index cb5b3e8..e49a4e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ All notable changes to this project will be documented in this file. +## [1.42.0] +- Internal refactoring of rule loader and git enumerator + ## [1.41.0] - Added support for scanning gitlab subgroups, with `kingfisher scan --gitlab-group my-group --gitlab-include-subgroups` - Added rule for Vercel diff --git a/src/git_commit_metadata.rs b/src/git_commit_metadata.rs index 75540d3..2588d73 100644 --- a/src/git_commit_metadata.rs +++ b/src/git_commit_metadata.rs @@ -141,14 +141,4 @@ pub struct CommitMetadata { #[serde(with = "TextTime")] pub committer_timestamp: Time, - // #[serde(with = "BStringLossyUtf8")] - // pub author_name: BString, - - // #[serde(with = "BStringLossyUtf8")] - // pub author_email: BString, - - // #[serde(with = "TextTime")] - // pub author_timestamp: Time, - // #[serde(with = "BStringLossyUtf8")] - // pub message: BString, } diff --git a/src/rule_loader.rs b/src/rule_loader.rs index 13a7c2f..acc8b89 100644 --- a/src/rule_loader.rs +++ b/src/rule_loader.rs @@ -1,5 +1,5 @@ use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, HashSet}, path::{Path, PathBuf}, }; @@ -31,8 +31,9 @@ pub enum RuleLoaderError { pub struct RuleLoader { load_builtins: bool, additional_load_paths: Vec, - enabled_rule_ids: Option>, + enabled_rule_ids: Option>, } + impl RuleLoader { pub fn new() -> Self { Self { @@ -57,31 +58,36 @@ impl RuleLoader { pub fn enable_rule_ids, I: IntoIterator>(mut self, ids: I) -> Self { let ids: Vec = ids.into_iter().map(|s| s.as_ref().to_string()).collect(); - if ids.contains(&"all".to_string()) { + if ids.iter().any(|id| id == "all") { self.enabled_rule_ids = None; // Reset to "all rules enabled" } else { - self.enabled_rule_ids = Some(ids.into_iter().collect()); + self.enabled_rule_ids = Some(ids); } self } pub fn load(&self, args: &cli::commands::scan::ScanArgs) -> Result { let confidence = Confidence::from(args.confidence); - let mut rules = Rules::new(); + let mut id_to_rule: BTreeMap = BTreeMap::new(); + if self.load_builtins { let builtin_rules = get_builtin_rules(Some(confidence)).context(RuleLoaderError::BuiltinLoadError)?; - rules.update(builtin_rules); + for rule_syntax in builtin_rules { + let id = rule_syntax.id.clone(); + id_to_rule.insert(id, Rule::new(rule_syntax)); + } } + if !self.additional_load_paths.is_empty() { - let custom = Rules::from_paths(&self.additional_load_paths, confidence) + let custom_rules = Rules::from_paths(&self.additional_load_paths, confidence) .context(RuleLoaderError::AdditionalPathLoadError)?; - rules.update(custom); + for rule_syntax in custom_rules { + let id = rule_syntax.id.clone(); + id_to_rule.insert(id, Rule::new(rule_syntax)); + } } - let mut rules = rules.rules; - rules.sort_by(|r1, r2| r1.id.cmp(&r2.id)); - let id_to_rule: HashMap = - rules.into_iter().map(|r| (r.id.clone(), Rule::new(r))).collect(); + Ok(LoadedRules { id_to_rule, enabled_rule_ids: self.enabled_rule_ids.clone() }) } @@ -92,10 +98,12 @@ impl RuleLoader { .enable_rule_ids(specs.rule.iter()) } } + pub struct LoadedRules { - id_to_rule: HashMap, - enabled_rule_ids: Option>, + id_to_rule: BTreeMap, + enabled_rule_ids: Option>, } + impl LoadedRules { #[inline] pub fn num_rules(&self) -> usize { @@ -118,6 +126,7 @@ impl LoadedRules { // At least one selector was given Some(selectors) => { let mut resolved = Vec::new(); + let mut seen = HashSet::new(); // For each selector, collect rules that match it for selector in selectors { @@ -129,8 +138,10 @@ impl LoadedRules { || (id.starts_with(selector) && id.as_bytes().get(selector.len()) == Some(&b'.')) { - resolved.push(rule); matched_any = true; + if seen.insert(id.clone()) { + resolved.push(rule); + } } } @@ -145,18 +156,10 @@ impl LoadedRules { } }; - // Deduplicate & sort for deterministic order - let mut resolved_rules = resolved_rules; - sort_and_deduplicate_rules(&mut resolved_rules); - - info!("Loaded {}", Counted::regular(resolved_rules.len(), "rule"),); + info!("Loaded {}", Counted::regular(resolved_rules.len(), "rule")); for rule in &resolved_rules { trace!("Using rule `{}`: {}", rule.id(), rule.name()); } Ok(resolved_rules) } } -fn sort_and_deduplicate_rules(rules: &mut Vec<&Rule>) { - rules.sort_by(|r1, r2| r1.id().cmp(r2.id())); - rules.dedup_by(|r1, r2| r1.id() == r2.id()); -} diff --git a/src/rules.rs b/src/rules.rs index ac7e2fb..d0484e7 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -1,17 +1,15 @@ -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; use ignore::{types::TypesBuilder, WalkBuilder}; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; use thiserror::Error; use tracing::{debug, debug_span, error}; pub mod rule; -use std::{fs::File, io::BufReader, path::Path}; +use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path}; -use anyhow::Context; use rule::{Confidence, RuleSyntax, Validation}; use serde::de::DeserializeOwned; -/// Custom error type for more granular rules loading errors. #[derive(Debug, Error)] pub enum RulesError { #[error("Failed to parse YAML file at path: {0}")] @@ -33,35 +31,37 @@ pub enum RulesError { MissingResponseMatcher { path: String, rule_id: String }, } -/// Represents a collection of rule syntaxes. -#[derive(Serialize, Deserialize, Clone)] +#[derive(Clone, Default)] pub struct Rules { - pub rules: Vec, + pub rules: BTreeMap, +} + +#[derive(Deserialize)] +struct RawRules { + rules: Vec, } impl Rules { - /// Creates a new empty set of rules. pub fn new() -> Self { - Self { rules: Vec::new() } + Self { rules: BTreeMap::new() } } - /// Updates the current set with the rules from another set. pub fn update(&mut self, other: Rules) { self.rules.extend(other.rules); } - /// Loads rules from an iterator over (path, contents) pairs. - /// Only rules with a confidence level at least as high as `confidence` are retained. pub fn from_paths_and_contents<'a, I: IntoIterator>( iterable: I, confidence: Confidence, ) -> Result { let mut rules = Self::new(); - for (path, contents) in iterable.into_iter() { - match serde_yaml::from_reader::<_, Rules>(contents) { - Ok(mut rs) => { - rs.rules.retain(|rule| rule.confidence.is_at_least(&confidence)); - for rule_syntax in &rs.rules { + for (path, contents) in iterable { + match serde_yaml::from_slice::(contents) { + Ok(rs) => { + for rule_syntax in rs.rules { + if !rule_syntax.confidence.is_at_least(&confidence) { + continue; + } if let Some(Validation::Http(http_val)) = &rule_syntax.validation { if http_val .request @@ -75,8 +75,8 @@ impl Rules { }); } } + rules.rules.insert(rule_syntax.id.clone(), rule_syntax); } - rules.update(rs); } Err(e) => { if let Some(location) = e.location() { @@ -90,7 +90,7 @@ impl Rules { bail!(RulesError::InvalidResponseMatcherVariant( path.display().to_string(), location.line(), - location.column() + location.column(), )); } else { error!("Failed to parse rules YAML from {}: {}", path.display(), e); @@ -106,8 +106,6 @@ impl Rules { Ok(rules) } - /// Loads rules from the given paths. - /// Each path may be a file or a directory. pub fn from_paths, I: IntoIterator>( paths: I, confidence: Confidence, @@ -130,13 +128,27 @@ impl Rules { Ok(rules) } - /// Loads rules from a YAML file. pub fn from_yaml_file>(path: P, confidence: Confidence) -> Result { let path = path.as_ref(); let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered(); - match load_yaml_file::(path) { - Ok(mut rules) => { - rules.rules.retain(|rule| rule.confidence.is_at_least(&confidence)); + match load_yaml_file::(path) { + Ok(rs) => { + let mut rules = Rules::new(); + for rule_syntax in rs.rules { + if !rule_syntax.confidence.is_at_least(&confidence) { + continue; + } + if let Some(Validation::Http(http_val)) = &rule_syntax.validation { + if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty()) + { + bail!(RulesError::MissingResponseMatcher { + path: path.display().to_string(), + rule_id: rule_syntax.id.clone(), + }); + } + } + rules.rules.insert(rule_syntax.id.clone(), rule_syntax); + } debug!("Loaded {} rules from {}", rules.num_rules(), path.display()); Ok(rules) } @@ -151,7 +163,6 @@ impl Rules { } } - /// Loads rules from multiple YAML files. pub fn from_yaml_files, I: IntoIterator>( paths: I, confidence: Confidence, @@ -166,7 +177,6 @@ impl Rules { Ok(rules) } - /// Loads rules from all YAML files in a directory. pub fn from_directory>(path: P, confidence: Confidence) -> Result { let path = path.as_ref(); let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered(); @@ -198,32 +208,31 @@ impl Rules { Self::from_yaml_files(&yaml_files, confidence) } - /// Returns the number of rules. #[inline] pub fn num_rules(&self) -> usize { self.rules.len() } - /// Returns true if no rules are present. #[inline] pub fn is_empty(&self) -> bool { self.rules.is_empty() } - /// Returns an iterator over the rules. #[inline] - pub fn iter_rules(&self) -> std::slice::Iter<'_, RuleSyntax> { - self.rules.iter() + pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> { + self.rules.values() } } -impl Default for Rules { - fn default() -> Self { - Self::new() +impl IntoIterator for Rules { + type Item = RuleSyntax; + type IntoIter = std::collections::btree_map::IntoValues; + + fn into_iter(self) -> Self::IntoIter { + self.rules.into_values() } } -/// Loads and deserializes a YAML file into a value of type `T`. pub fn load_yaml_file>(path: P) -> Result { let path = path.as_ref(); let file = File::open(path) diff --git a/src/rules.rs.orig b/src/rules.rs.orig new file mode 100644 index 0000000..888e198 --- /dev/null +++ b/src/rules.rs.orig @@ -0,0 +1,250 @@ +use anyhow::{bail, Context, Result}; +use ignore::{types::TypesBuilder, WalkBuilder}; +use serde::Deserialize; +use thiserror::Error; +use tracing::{debug, debug_span, error}; + +pub mod rule; +use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path}; + +use rule::{Confidence, RuleSyntax, Validation}; +use serde::de::DeserializeOwned; + +#[derive(Debug, Error)] +pub enum RulesError { + #[error("Failed to parse YAML file at path: {0}")] + YamlParseError(String), + + #[error("Invalid input: {0} is neither a file nor a directory")] + InvalidInputError(String), + + #[error("File system error: {0}")] + FileSystemError(#[from] std::io::Error), + + #[error("Error building YAML types: {0}")] + YamlTypesBuildError(String), + + #[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")] + InvalidResponseMatcherVariant(String, usize, usize), + + #[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")] + MissingResponseMatcher { path: String, rule_id: String }, +} + +#[derive(Clone, Default)] +pub struct Rules { + pub rules: BTreeMap, +} + +#[derive(Deserialize)] +struct RawRules { + rules: Vec, +} + +impl Rules { + pub fn new() -> Self { + Self { rules: BTreeMap::new() } + } + + pub fn update(&mut self, other: Rules) { + self.rules.extend(other.rules); + } + + pub fn from_paths_and_contents<'a, I: IntoIterator>( + iterable: I, + confidence: Confidence, + ) -> Result { + let mut rules = Self::new(); + for (path, contents) in iterable { + match serde_yaml::from_slice::(contents) { + Ok(rs) => { + for rule_syntax in rs.rules { + if !rule_syntax.confidence.is_at_least(&confidence) { + continue; + } + if let Some(Validation::Http(http_val)) = &rule_syntax.validation { + if http_val + .request + .response_matcher + .as_ref() + .map_or(true, |m| m.is_empty()) + { + bail!(RulesError::MissingResponseMatcher { + path: path.display().to_string(), + rule_id: rule_syntax.id.clone(), + }); + } + } + rules.rules.insert(rule_syntax.id.clone(), rule_syntax); + } + } + Err(e) => { + if let Some(location) = e.location() { + error!( + "Failed to parse rules YAML from {}: {}, at line: {}, column: {}", + path.display(), + e, + location.line(), + location.column() + ); + bail!(RulesError::InvalidResponseMatcherVariant( + path.display().to_string(), + location.line(), + location.column(), + )); + } else { + error!("Failed to parse rules YAML from {}: {}", path.display(), e); + bail!(RulesError::YamlParseError(format!( + "Failed to load rules YAML from {}: {}", + path.display(), + e + ))); + } + } + } + } + Ok(rules) + } + + pub fn from_paths, I: IntoIterator>( + paths: I, + confidence: Confidence, + ) -> Result { + let mut num_paths = 0; + let mut rules = Rules::new(); + for input in paths { + num_paths += 1; + let input = input.as_ref(); + if input.is_file() { + rules.update(Rules::from_yaml_file(input, confidence)?); + } else if input.is_dir() { + rules.update(Rules::from_directory(input, confidence)?); + } else { + error!("Invalid input type: {} is neither a file nor a directory", input.display()); + bail!(RulesError::InvalidInputError(input.display().to_string())); + } + } + debug!("Loaded {} rules from {} paths", rules.num_rules(), num_paths); + Ok(rules) + } + + pub fn from_yaml_file>(path: P, confidence: Confidence) -> Result { + let path = path.as_ref(); + let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered(); + match load_yaml_file::(path) { + Ok(rs) => { + let mut rules = Rules::new(); + for rule_syntax in rs.rules { + if !rule_syntax.confidence.is_at_least(&confidence) { + continue; + } + if let Some(Validation::Http(http_val)) = &rule_syntax.validation { + if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty()) + { + bail!(RulesError::MissingResponseMatcher { + path: path.display().to_string(), + rule_id: rule_syntax.id.clone(), + }); + } + } + rules.rules.insert(rule_syntax.id.clone(), rule_syntax); + } + debug!("Loaded {} rules from {}", rules.num_rules(), path.display()); + Ok(rules) + } + Err(e) => { + error!("Failed to load rules YAML from {}: {}", path.display(), e); + bail!(RulesError::YamlParseError(format!( + "Failed to load rules YAML from {}: {}", + path.display(), + e + ))) + } + } + } + + pub fn from_yaml_files, I: IntoIterator>( + paths: I, + confidence: Confidence, + ) -> Result { + let mut num_paths = 0; + let mut rules = Rules::new(); + for path in paths { + num_paths += 1; + rules.update(Rules::from_yaml_file(path.as_ref(), confidence)?); + } + debug!("Loaded {} rules from {} YAML files", rules.num_rules(), num_paths); + Ok(rules) + } + + pub fn from_directory>(path: P, confidence: Confidence) -> Result { + let path = path.as_ref(); + let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered(); + let yaml_types = + TypesBuilder::new().add_defaults().select("yaml").build().map_err(|e| { + error!("Failed to build YAML types: {}", e); + RulesError::YamlTypesBuildError(e.to_string()) + })?; + let walker = WalkBuilder::new(path) + .types(yaml_types) + .follow_links(true) + .standard_filters(false) + .build(); + let mut yaml_files = Vec::new(); + for entry in walker { + match entry { + Ok(entry) => { + if entry.file_type().map_or(false, |t| !t.is_dir()) { + yaml_files.push(entry.into_path()); + } + } + Err(e) => { + debug!("Failed to read directory entry: {}", e); + } + } + } + yaml_files.sort(); + debug!("Found {} YAML files in {}", yaml_files.len(), path.display()); + Self::from_yaml_files(&yaml_files, confidence) + } + + #[inline] + pub fn num_rules(&self) -> usize { + self.rules.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.rules.is_empty() + } + + #[inline] + pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> { + self.rules.values() + } +} + +impl Default for Rules { + fn default() -> Self { + Self::new() + } +} + +impl IntoIterator for Rules { + type Item = RuleSyntax; + type IntoIter = std::collections::btree_map::IntoValues; + + fn into_iter(self) -> Self::IntoIter { + self.rules.into_values() + } +} + +pub fn load_yaml_file>(path: P) -> Result { + let path = path.as_ref(); + let file = File::open(path) + .with_context(|| format!("Failed to open YAML file: {}", path.display()))?; + let reader = BufReader::new(file); + let data = serde_yaml::from_reader(reader) + .with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?; + Ok(data) +} diff --git a/src/rules.rs.rej b/src/rules.rs.rej new file mode 100644 index 0000000..a92df2f --- /dev/null +++ b/src/rules.rs.rej @@ -0,0 +1,282 @@ +@@ -1,235 +1,244 @@ +-use anyhow::{bail, Result}; ++use anyhow::{bail, Context, Result}; + use ignore::{types::TypesBuilder, WalkBuilder}; +-use serde::{Deserialize, Serialize}; ++use serde::Deserialize; + use thiserror::Error; + use tracing::{debug, debug_span, error}; + + pub mod rule; +-use std::{fs::File, io::BufReader, path::Path}; ++use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path}; + +-use anyhow::Context; + use rule::{Confidence, RuleSyntax, Validation}; + use serde::de::DeserializeOwned; + +-/// Custom error type for more granular rules loading errors. + #[derive(Debug, Error)] + pub enum RulesError { + #[error("Failed to parse YAML file at path: {0}")] + YamlParseError(String), + + #[error("Invalid input: {0} is neither a file nor a directory")] + InvalidInputError(String), + + #[error("File system error: {0}")] + FileSystemError(#[from] std::io::Error), + + #[error("Error building YAML types: {0}")] + YamlTypesBuildError(String), + + #[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")] + InvalidResponseMatcherVariant(String, usize, usize), + + #[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")] + MissingResponseMatcher { path: String, rule_id: String }, + } + +-/// Represents a collection of rule syntaxes. +-#[derive(Serialize, Deserialize, Clone)] ++#[derive(Clone, Default)] + pub struct Rules { +- pub rules: Vec, ++ pub rules: BTreeMap, ++} ++ ++#[derive(Deserialize)] ++struct RawRules { ++ rules: Vec, + } + + impl Rules { +- /// Creates a new empty set of rules. + pub fn new() -> Self { +- Self { rules: Vec::new() } ++ Self { rules: BTreeMap::new() } + } + +- /// Updates the current set with the rules from another set. + pub fn update(&mut self, other: Rules) { + self.rules.extend(other.rules); + } + +- /// Loads rules from an iterator over (path, contents) pairs. +- /// Only rules with a confidence level at least as high as `confidence` are retained. + pub fn from_paths_and_contents<'a, I: IntoIterator>( + iterable: I, + confidence: Confidence, + ) -> Result { + let mut rules = Self::new(); +- for (path, contents) in iterable.into_iter() { +- match serde_yaml::from_reader::<_, Rules>(contents) { +- Ok(mut rs) => { +- rs.rules.retain(|rule| rule.confidence.is_at_least(&confidence)); +- for rule_syntax in &rs.rules { ++ for (path, contents) in iterable { ++ match serde_yaml::from_slice::(contents) { ++ Ok(rs) => { ++ for rule_syntax in rs.rules { ++ if !rule_syntax.confidence.is_at_least(&confidence) { ++ continue; ++ } + if let Some(Validation::Http(http_val)) = &rule_syntax.validation { + if http_val + .request + .response_matcher + .as_ref() + .map_or(true, |m| m.is_empty()) + { + bail!(RulesError::MissingResponseMatcher { + path: path.display().to_string(), + rule_id: rule_syntax.id.clone(), + }); + } + } ++ rules.rules.insert(rule_syntax.id.clone(), rule_syntax); + } +- rules.update(rs); + } + Err(e) => { + if let Some(location) = e.location() { + error!( + "Failed to parse rules YAML from {}: {}, at line: {}, column: {}", + path.display(), + e, + location.line(), + location.column() + ); + bail!(RulesError::InvalidResponseMatcherVariant( + path.display().to_string(), + location.line(), +- location.column() ++ location.column(), + )); + } else { + error!("Failed to parse rules YAML from {}: {}", path.display(), e); + bail!(RulesError::YamlParseError(format!( + "Failed to load rules YAML from {}: {}", + path.display(), + e + ))); + } + } + } + } + Ok(rules) + } + +- /// Loads rules from the given paths. +- /// Each path may be a file or a directory. + pub fn from_paths, I: IntoIterator>( + paths: I, + confidence: Confidence, + ) -> Result { + let mut num_paths = 0; + let mut rules = Rules::new(); + for input in paths { + num_paths += 1; + let input = input.as_ref(); + if input.is_file() { + rules.update(Rules::from_yaml_file(input, confidence)?); + } else if input.is_dir() { + rules.update(Rules::from_directory(input, confidence)?); + } else { + error!("Invalid input type: {} is neither a file nor a directory", input.display()); + bail!(RulesError::InvalidInputError(input.display().to_string())); + } + } + debug!("Loaded {} rules from {} paths", rules.num_rules(), num_paths); + Ok(rules) + } + +- /// Loads rules from a YAML file. + pub fn from_yaml_file>(path: P, confidence: Confidence) -> Result { + let path = path.as_ref(); + let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered(); +- match load_yaml_file::(path) { +- Ok(mut rules) => { +- rules.rules.retain(|rule| rule.confidence.is_at_least(&confidence)); ++ match load_yaml_file::(path) { ++ Ok(rs) => { ++ let mut rules = Rules::new(); ++ for rule_syntax in rs.rules { ++ if !rule_syntax.confidence.is_at_least(&confidence) { ++ continue; ++ } ++ if let Some(Validation::Http(http_val)) = &rule_syntax.validation { ++ if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty()) ++ { ++ bail!(RulesError::MissingResponseMatcher { ++ path: path.display().to_string(), ++ rule_id: rule_syntax.id.clone(), ++ }); ++ } ++ } ++ rules.rules.insert(rule_syntax.id.clone(), rule_syntax); ++ } + debug!("Loaded {} rules from {}", rules.num_rules(), path.display()); + Ok(rules) + } + Err(e) => { + error!("Failed to load rules YAML from {}: {}", path.display(), e); + bail!(RulesError::YamlParseError(format!( + "Failed to load rules YAML from {}: {}", + path.display(), + e + ))) + } + } + } + +- /// Loads rules from multiple YAML files. + pub fn from_yaml_files, I: IntoIterator>( + paths: I, + confidence: Confidence, + ) -> Result { + let mut num_paths = 0; + let mut rules = Rules::new(); + for path in paths { + num_paths += 1; + rules.update(Rules::from_yaml_file(path.as_ref(), confidence)?); + } + debug!("Loaded {} rules from {} YAML files", rules.num_rules(), num_paths); + Ok(rules) + } + +- /// Loads rules from all YAML files in a directory. + pub fn from_directory>(path: P, confidence: Confidence) -> Result { + let path = path.as_ref(); + let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered(); + let yaml_types = + TypesBuilder::new().add_defaults().select("yaml").build().map_err(|e| { + error!("Failed to build YAML types: {}", e); + RulesError::YamlTypesBuildError(e.to_string()) + })?; + let walker = WalkBuilder::new(path) + .types(yaml_types) + .follow_links(true) + .standard_filters(false) + .build(); + let mut yaml_files = Vec::new(); + for entry in walker { + match entry { + Ok(entry) => { + if entry.file_type().map_or(false, |t| !t.is_dir()) { + yaml_files.push(entry.into_path()); + } + } + Err(e) => { + debug!("Failed to read directory entry: {}", e); + } + } + } + yaml_files.sort(); + debug!("Found {} YAML files in {}", yaml_files.len(), path.display()); + Self::from_yaml_files(&yaml_files, confidence) + } + +- /// Returns the number of rules. + #[inline] + pub fn num_rules(&self) -> usize { + self.rules.len() + } + +- /// Returns true if no rules are present. + #[inline] + pub fn is_empty(&self) -> bool { + self.rules.is_empty() + } + +- /// Returns an iterator over the rules. + #[inline] +- pub fn iter_rules(&self) -> std::slice::Iter<'_, RuleSyntax> { +- self.rules.iter() ++ pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> { ++ self.rules.values() + } + } + +-impl Default for Rules { +- fn default() -> Self { +- Self::new() ++impl IntoIterator for Rules { ++ type Item = RuleSyntax; ++ type IntoIter = std::collections::btree_map::IntoValues; ++ ++ fn into_iter(self) -> Self::IntoIter { ++ self.rules.into_values() + } + } + +-/// Loads and deserializes a YAML file into a value of type `T`. + pub fn load_yaml_file>(path: P) -> Result { + let path = path.as_ref(); + let file = File::open(path) + .with_context(|| format!("Failed to open YAML file: {}", path.display()))?; + let reader = BufReader::new(file); + let data = serde_yaml::from_reader(reader) + .with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?; + Ok(data) + } diff --git a/src/validation.rs b/src/validation.rs index cff95c0..3e981ea 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -990,8 +990,7 @@ rules: let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; // Find the PyPI rule we just loaded let pypi_rule_syntax = rules - .rules - .iter() + .iter_rules() .find(|r| r.id == "kingfisher.pypi.1") .expect("Failed to find PyPI rule in test YAML") .clone(); // Clone so we can create a `Rule` from it