forked from mirrors/kingfisher
refactored rule loading
This commit is contained in:
parent
9b282cb33f
commit
5b8e83f5e7
7 changed files with 609 additions and 73 deletions
|
|
@ -2,6 +2,9 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.42.0]
|
||||
- Internal refactoring of rule loader and git enumerator
|
||||
|
||||
## [1.41.0]
|
||||
- Added support for scanning gitlab subgroups, with `kingfisher scan --gitlab-group my-group --gitlab-include-subgroups`
|
||||
- Added rule for Vercel
|
||||
|
|
|
|||
|
|
@ -141,14 +141,4 @@ pub struct CommitMetadata {
|
|||
|
||||
#[serde(with = "TextTime")]
|
||||
pub committer_timestamp: Time,
|
||||
// #[serde(with = "BStringLossyUtf8")]
|
||||
// pub author_name: BString,
|
||||
|
||||
// #[serde(with = "BStringLossyUtf8")]
|
||||
// pub author_email: BString,
|
||||
|
||||
// #[serde(with = "TextTime")]
|
||||
// pub author_timestamp: Time,
|
||||
// #[serde(with = "BStringLossyUtf8")]
|
||||
// pub message: BString,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
collections::{BTreeMap, HashSet},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
|
|
@ -31,8 +31,9 @@ pub enum RuleLoaderError {
|
|||
pub struct RuleLoader {
|
||||
load_builtins: bool,
|
||||
additional_load_paths: Vec<PathBuf>,
|
||||
enabled_rule_ids: Option<HashSet<String>>,
|
||||
enabled_rule_ids: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl RuleLoader {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
|
|
@ -57,31 +58,36 @@ impl RuleLoader {
|
|||
|
||||
pub fn enable_rule_ids<S: AsRef<str>, I: IntoIterator<Item = S>>(mut self, ids: I) -> Self {
|
||||
let ids: Vec<String> = ids.into_iter().map(|s| s.as_ref().to_string()).collect();
|
||||
if ids.contains(&"all".to_string()) {
|
||||
if ids.iter().any(|id| id == "all") {
|
||||
self.enabled_rule_ids = None; // Reset to "all rules enabled"
|
||||
} else {
|
||||
self.enabled_rule_ids = Some(ids.into_iter().collect());
|
||||
self.enabled_rule_ids = Some(ids);
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
pub fn load(&self, args: &cli::commands::scan::ScanArgs) -> Result<LoadedRules> {
|
||||
let confidence = Confidence::from(args.confidence);
|
||||
let mut rules = Rules::new();
|
||||
let mut id_to_rule: BTreeMap<String, Rule> = BTreeMap::new();
|
||||
|
||||
if self.load_builtins {
|
||||
let builtin_rules =
|
||||
get_builtin_rules(Some(confidence)).context(RuleLoaderError::BuiltinLoadError)?;
|
||||
rules.update(builtin_rules);
|
||||
for rule_syntax in builtin_rules {
|
||||
let id = rule_syntax.id.clone();
|
||||
id_to_rule.insert(id, Rule::new(rule_syntax));
|
||||
}
|
||||
}
|
||||
|
||||
if !self.additional_load_paths.is_empty() {
|
||||
let custom = Rules::from_paths(&self.additional_load_paths, confidence)
|
||||
let custom_rules = Rules::from_paths(&self.additional_load_paths, confidence)
|
||||
.context(RuleLoaderError::AdditionalPathLoadError)?;
|
||||
rules.update(custom);
|
||||
for rule_syntax in custom_rules {
|
||||
let id = rule_syntax.id.clone();
|
||||
id_to_rule.insert(id, Rule::new(rule_syntax));
|
||||
}
|
||||
}
|
||||
let mut rules = rules.rules;
|
||||
rules.sort_by(|r1, r2| r1.id.cmp(&r2.id));
|
||||
let id_to_rule: HashMap<String, Rule> =
|
||||
rules.into_iter().map(|r| (r.id.clone(), Rule::new(r))).collect();
|
||||
|
||||
Ok(LoadedRules { id_to_rule, enabled_rule_ids: self.enabled_rule_ids.clone() })
|
||||
}
|
||||
|
||||
|
|
@ -92,10 +98,12 @@ impl RuleLoader {
|
|||
.enable_rule_ids(specs.rule.iter())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LoadedRules {
|
||||
id_to_rule: HashMap<String, Rule>,
|
||||
enabled_rule_ids: Option<HashSet<String>>,
|
||||
id_to_rule: BTreeMap<String, Rule>,
|
||||
enabled_rule_ids: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl LoadedRules {
|
||||
#[inline]
|
||||
pub fn num_rules(&self) -> usize {
|
||||
|
|
@ -118,6 +126,7 @@ impl LoadedRules {
|
|||
// At least one selector was given
|
||||
Some(selectors) => {
|
||||
let mut resolved = Vec::new();
|
||||
let mut seen = HashSet::new();
|
||||
|
||||
// For each selector, collect rules that match it
|
||||
for selector in selectors {
|
||||
|
|
@ -129,8 +138,10 @@ impl LoadedRules {
|
|||
|| (id.starts_with(selector)
|
||||
&& id.as_bytes().get(selector.len()) == Some(&b'.'))
|
||||
{
|
||||
resolved.push(rule);
|
||||
matched_any = true;
|
||||
if seen.insert(id.clone()) {
|
||||
resolved.push(rule);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -145,18 +156,10 @@ impl LoadedRules {
|
|||
}
|
||||
};
|
||||
|
||||
// Deduplicate & sort for deterministic order
|
||||
let mut resolved_rules = resolved_rules;
|
||||
sort_and_deduplicate_rules(&mut resolved_rules);
|
||||
|
||||
info!("Loaded {}", Counted::regular(resolved_rules.len(), "rule"),);
|
||||
info!("Loaded {}", Counted::regular(resolved_rules.len(), "rule"));
|
||||
for rule in &resolved_rules {
|
||||
trace!("Using rule `{}`: {}", rule.id(), rule.name());
|
||||
}
|
||||
Ok(resolved_rules)
|
||||
}
|
||||
}
|
||||
fn sort_and_deduplicate_rules(rules: &mut Vec<&Rule>) {
|
||||
rules.sort_by(|r1, r2| r1.id().cmp(r2.id()));
|
||||
rules.dedup_by(|r1, r2| r1.id() == r2.id());
|
||||
}
|
||||
|
|
|
|||
83
src/rules.rs
83
src/rules.rs
|
|
@ -1,17 +1,15 @@
|
|||
use anyhow::{bail, Result};
|
||||
use anyhow::{bail, Context, Result};
|
||||
use ignore::{types::TypesBuilder, WalkBuilder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde::Deserialize;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, debug_span, error};
|
||||
|
||||
pub mod rule;
|
||||
use std::{fs::File, io::BufReader, path::Path};
|
||||
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use rule::{Confidence, RuleSyntax, Validation};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
/// Custom error type for more granular rules loading errors.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RulesError {
|
||||
#[error("Failed to parse YAML file at path: {0}")]
|
||||
|
|
@ -33,35 +31,37 @@ pub enum RulesError {
|
|||
MissingResponseMatcher { path: String, rule_id: String },
|
||||
}
|
||||
|
||||
/// Represents a collection of rule syntaxes.
|
||||
#[derive(Serialize, Deserialize, Clone)]
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Rules {
|
||||
pub rules: Vec<RuleSyntax>,
|
||||
pub rules: BTreeMap<String, RuleSyntax>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct RawRules {
|
||||
rules: Vec<RuleSyntax>,
|
||||
}
|
||||
|
||||
impl Rules {
|
||||
/// Creates a new empty set of rules.
|
||||
pub fn new() -> Self {
|
||||
Self { rules: Vec::new() }
|
||||
Self { rules: BTreeMap::new() }
|
||||
}
|
||||
|
||||
/// Updates the current set with the rules from another set.
|
||||
pub fn update(&mut self, other: Rules) {
|
||||
self.rules.extend(other.rules);
|
||||
}
|
||||
|
||||
/// Loads rules from an iterator over (path, contents) pairs.
|
||||
/// Only rules with a confidence level at least as high as `confidence` are retained.
|
||||
pub fn from_paths_and_contents<'a, I: IntoIterator<Item = (&'a Path, &'a [u8])>>(
|
||||
iterable: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut rules = Self::new();
|
||||
for (path, contents) in iterable.into_iter() {
|
||||
match serde_yaml::from_reader::<_, Rules>(contents) {
|
||||
Ok(mut rs) => {
|
||||
rs.rules.retain(|rule| rule.confidence.is_at_least(&confidence));
|
||||
for rule_syntax in &rs.rules {
|
||||
for (path, contents) in iterable {
|
||||
match serde_yaml::from_slice::<RawRules>(contents) {
|
||||
Ok(rs) => {
|
||||
for rule_syntax in rs.rules {
|
||||
if !rule_syntax.confidence.is_at_least(&confidence) {
|
||||
continue;
|
||||
}
|
||||
if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
|
||||
if http_val
|
||||
.request
|
||||
|
|
@ -75,8 +75,8 @@ impl Rules {
|
|||
});
|
||||
}
|
||||
}
|
||||
rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
|
||||
}
|
||||
rules.update(rs);
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(location) = e.location() {
|
||||
|
|
@ -90,7 +90,7 @@ impl Rules {
|
|||
bail!(RulesError::InvalidResponseMatcherVariant(
|
||||
path.display().to_string(),
|
||||
location.line(),
|
||||
location.column()
|
||||
location.column(),
|
||||
));
|
||||
} else {
|
||||
error!("Failed to parse rules YAML from {}: {}", path.display(), e);
|
||||
|
|
@ -106,8 +106,6 @@ impl Rules {
|
|||
Ok(rules)
|
||||
}
|
||||
|
||||
/// Loads rules from the given paths.
|
||||
/// Each path may be a file or a directory.
|
||||
pub fn from_paths<P: AsRef<Path>, I: IntoIterator<Item = P>>(
|
||||
paths: I,
|
||||
confidence: Confidence,
|
||||
|
|
@ -130,13 +128,27 @@ impl Rules {
|
|||
Ok(rules)
|
||||
}
|
||||
|
||||
/// Loads rules from a YAML file.
|
||||
pub fn from_yaml_file<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered();
|
||||
match load_yaml_file::<Rules, _>(path) {
|
||||
Ok(mut rules) => {
|
||||
rules.rules.retain(|rule| rule.confidence.is_at_least(&confidence));
|
||||
match load_yaml_file::<RawRules, _>(path) {
|
||||
Ok(rs) => {
|
||||
let mut rules = Rules::new();
|
||||
for rule_syntax in rs.rules {
|
||||
if !rule_syntax.confidence.is_at_least(&confidence) {
|
||||
continue;
|
||||
}
|
||||
if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
|
||||
if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty())
|
||||
{
|
||||
bail!(RulesError::MissingResponseMatcher {
|
||||
path: path.display().to_string(),
|
||||
rule_id: rule_syntax.id.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
|
||||
}
|
||||
debug!("Loaded {} rules from {}", rules.num_rules(), path.display());
|
||||
Ok(rules)
|
||||
}
|
||||
|
|
@ -151,7 +163,6 @@ impl Rules {
|
|||
}
|
||||
}
|
||||
|
||||
/// Loads rules from multiple YAML files.
|
||||
pub fn from_yaml_files<P: AsRef<Path>, I: IntoIterator<Item = P>>(
|
||||
paths: I,
|
||||
confidence: Confidence,
|
||||
|
|
@ -166,7 +177,6 @@ impl Rules {
|
|||
Ok(rules)
|
||||
}
|
||||
|
||||
/// Loads rules from all YAML files in a directory.
|
||||
pub fn from_directory<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered();
|
||||
|
|
@ -198,32 +208,31 @@ impl Rules {
|
|||
Self::from_yaml_files(&yaml_files, confidence)
|
||||
}
|
||||
|
||||
/// Returns the number of rules.
|
||||
#[inline]
|
||||
pub fn num_rules(&self) -> usize {
|
||||
self.rules.len()
|
||||
}
|
||||
|
||||
/// Returns true if no rules are present.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.rules.is_empty()
|
||||
}
|
||||
|
||||
/// Returns an iterator over the rules.
|
||||
#[inline]
|
||||
pub fn iter_rules(&self) -> std::slice::Iter<'_, RuleSyntax> {
|
||||
self.rules.iter()
|
||||
pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> {
|
||||
self.rules.values()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Rules {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
impl IntoIterator for Rules {
|
||||
type Item = RuleSyntax;
|
||||
type IntoIter = std::collections::btree_map::IntoValues<String, RuleSyntax>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.rules.into_values()
|
||||
}
|
||||
}
|
||||
|
||||
/// Loads and deserializes a YAML file into a value of type `T`.
|
||||
pub fn load_yaml_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
|
||||
let path = path.as_ref();
|
||||
let file = File::open(path)
|
||||
|
|
|
|||
250
src/rules.rs.orig
Normal file
250
src/rules.rs.orig
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
use anyhow::{bail, Context, Result};
|
||||
use ignore::{types::TypesBuilder, WalkBuilder};
|
||||
use serde::Deserialize;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, debug_span, error};
|
||||
|
||||
pub mod rule;
|
||||
use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
||||
|
||||
use rule::{Confidence, RuleSyntax, Validation};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RulesError {
|
||||
#[error("Failed to parse YAML file at path: {0}")]
|
||||
YamlParseError(String),
|
||||
|
||||
#[error("Invalid input: {0} is neither a file nor a directory")]
|
||||
InvalidInputError(String),
|
||||
|
||||
#[error("File system error: {0}")]
|
||||
FileSystemError(#[from] std::io::Error),
|
||||
|
||||
#[error("Error building YAML types: {0}")]
|
||||
YamlTypesBuildError(String),
|
||||
|
||||
#[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")]
|
||||
InvalidResponseMatcherVariant(String, usize, usize),
|
||||
|
||||
#[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")]
|
||||
MissingResponseMatcher { path: String, rule_id: String },
|
||||
}
|
||||
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Rules {
|
||||
pub rules: BTreeMap<String, RuleSyntax>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct RawRules {
|
||||
rules: Vec<RuleSyntax>,
|
||||
}
|
||||
|
||||
impl Rules {
|
||||
pub fn new() -> Self {
|
||||
Self { rules: BTreeMap::new() }
|
||||
}
|
||||
|
||||
pub fn update(&mut self, other: Rules) {
|
||||
self.rules.extend(other.rules);
|
||||
}
|
||||
|
||||
pub fn from_paths_and_contents<'a, I: IntoIterator<Item = (&'a Path, &'a [u8])>>(
|
||||
iterable: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut rules = Self::new();
|
||||
for (path, contents) in iterable {
|
||||
match serde_yaml::from_slice::<RawRules>(contents) {
|
||||
Ok(rs) => {
|
||||
for rule_syntax in rs.rules {
|
||||
if !rule_syntax.confidence.is_at_least(&confidence) {
|
||||
continue;
|
||||
}
|
||||
if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
|
||||
if http_val
|
||||
.request
|
||||
.response_matcher
|
||||
.as_ref()
|
||||
.map_or(true, |m| m.is_empty())
|
||||
{
|
||||
bail!(RulesError::MissingResponseMatcher {
|
||||
path: path.display().to_string(),
|
||||
rule_id: rule_syntax.id.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(location) = e.location() {
|
||||
error!(
|
||||
"Failed to parse rules YAML from {}: {}, at line: {}, column: {}",
|
||||
path.display(),
|
||||
e,
|
||||
location.line(),
|
||||
location.column()
|
||||
);
|
||||
bail!(RulesError::InvalidResponseMatcherVariant(
|
||||
path.display().to_string(),
|
||||
location.line(),
|
||||
location.column(),
|
||||
));
|
||||
} else {
|
||||
error!("Failed to parse rules YAML from {}: {}", path.display(), e);
|
||||
bail!(RulesError::YamlParseError(format!(
|
||||
"Failed to load rules YAML from {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
pub fn from_paths<P: AsRef<Path>, I: IntoIterator<Item = P>>(
|
||||
paths: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut num_paths = 0;
|
||||
let mut rules = Rules::new();
|
||||
for input in paths {
|
||||
num_paths += 1;
|
||||
let input = input.as_ref();
|
||||
if input.is_file() {
|
||||
rules.update(Rules::from_yaml_file(input, confidence)?);
|
||||
} else if input.is_dir() {
|
||||
rules.update(Rules::from_directory(input, confidence)?);
|
||||
} else {
|
||||
error!("Invalid input type: {} is neither a file nor a directory", input.display());
|
||||
bail!(RulesError::InvalidInputError(input.display().to_string()));
|
||||
}
|
||||
}
|
||||
debug!("Loaded {} rules from {} paths", rules.num_rules(), num_paths);
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
pub fn from_yaml_file<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered();
|
||||
match load_yaml_file::<RawRules, _>(path) {
|
||||
Ok(rs) => {
|
||||
let mut rules = Rules::new();
|
||||
for rule_syntax in rs.rules {
|
||||
if !rule_syntax.confidence.is_at_least(&confidence) {
|
||||
continue;
|
||||
}
|
||||
if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
|
||||
if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty())
|
||||
{
|
||||
bail!(RulesError::MissingResponseMatcher {
|
||||
path: path.display().to_string(),
|
||||
rule_id: rule_syntax.id.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
|
||||
}
|
||||
debug!("Loaded {} rules from {}", rules.num_rules(), path.display());
|
||||
Ok(rules)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to load rules YAML from {}: {}", path.display(), e);
|
||||
bail!(RulesError::YamlParseError(format!(
|
||||
"Failed to load rules YAML from {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_yaml_files<P: AsRef<Path>, I: IntoIterator<Item = P>>(
|
||||
paths: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut num_paths = 0;
|
||||
let mut rules = Rules::new();
|
||||
for path in paths {
|
||||
num_paths += 1;
|
||||
rules.update(Rules::from_yaml_file(path.as_ref(), confidence)?);
|
||||
}
|
||||
debug!("Loaded {} rules from {} YAML files", rules.num_rules(), num_paths);
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
pub fn from_directory<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered();
|
||||
let yaml_types =
|
||||
TypesBuilder::new().add_defaults().select("yaml").build().map_err(|e| {
|
||||
error!("Failed to build YAML types: {}", e);
|
||||
RulesError::YamlTypesBuildError(e.to_string())
|
||||
})?;
|
||||
let walker = WalkBuilder::new(path)
|
||||
.types(yaml_types)
|
||||
.follow_links(true)
|
||||
.standard_filters(false)
|
||||
.build();
|
||||
let mut yaml_files = Vec::new();
|
||||
for entry in walker {
|
||||
match entry {
|
||||
Ok(entry) => {
|
||||
if entry.file_type().map_or(false, |t| !t.is_dir()) {
|
||||
yaml_files.push(entry.into_path());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to read directory entry: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
yaml_files.sort();
|
||||
debug!("Found {} YAML files in {}", yaml_files.len(), path.display());
|
||||
Self::from_yaml_files(&yaml_files, confidence)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn num_rules(&self) -> usize {
|
||||
self.rules.len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.rules.is_empty()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> {
|
||||
self.rules.values()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Rules {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for Rules {
|
||||
type Item = RuleSyntax;
|
||||
type IntoIter = std::collections::btree_map::IntoValues<String, RuleSyntax>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.rules.into_values()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn load_yaml_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
|
||||
let path = path.as_ref();
|
||||
let file = File::open(path)
|
||||
.with_context(|| format!("Failed to open YAML file: {}", path.display()))?;
|
||||
let reader = BufReader::new(file);
|
||||
let data = serde_yaml::from_reader(reader)
|
||||
.with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?;
|
||||
Ok(data)
|
||||
}
|
||||
282
src/rules.rs.rej
Normal file
282
src/rules.rs.rej
Normal file
|
|
@ -0,0 +1,282 @@
|
|||
@@ -1,235 +1,244 @@
|
||||
-use anyhow::{bail, Result};
|
||||
+use anyhow::{bail, Context, Result};
|
||||
use ignore::{types::TypesBuilder, WalkBuilder};
|
||||
-use serde::{Deserialize, Serialize};
|
||||
+use serde::Deserialize;
|
||||
use thiserror::Error;
|
||||
use tracing::{debug, debug_span, error};
|
||||
|
||||
pub mod rule;
|
||||
-use std::{fs::File, io::BufReader, path::Path};
|
||||
+use std::{collections::BTreeMap, fs::File, io::BufReader, path::Path};
|
||||
|
||||
-use anyhow::Context;
|
||||
use rule::{Confidence, RuleSyntax, Validation};
|
||||
use serde::de::DeserializeOwned;
|
||||
|
||||
-/// Custom error type for more granular rules loading errors.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum RulesError {
|
||||
#[error("Failed to parse YAML file at path: {0}")]
|
||||
YamlParseError(String),
|
||||
|
||||
#[error("Invalid input: {0} is neither a file nor a directory")]
|
||||
InvalidInputError(String),
|
||||
|
||||
#[error("File system error: {0}")]
|
||||
FileSystemError(#[from] std::io::Error),
|
||||
|
||||
#[error("Error building YAML types: {0}")]
|
||||
YamlTypesBuildError(String),
|
||||
|
||||
#[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")]
|
||||
InvalidResponseMatcherVariant(String, usize, usize),
|
||||
|
||||
#[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")]
|
||||
MissingResponseMatcher { path: String, rule_id: String },
|
||||
}
|
||||
|
||||
-/// Represents a collection of rule syntaxes.
|
||||
-#[derive(Serialize, Deserialize, Clone)]
|
||||
+#[derive(Clone, Default)]
|
||||
pub struct Rules {
|
||||
- pub rules: Vec<RuleSyntax>,
|
||||
+ pub rules: BTreeMap<String, RuleSyntax>,
|
||||
+}
|
||||
+
|
||||
+#[derive(Deserialize)]
|
||||
+struct RawRules {
|
||||
+ rules: Vec<RuleSyntax>,
|
||||
}
|
||||
|
||||
impl Rules {
|
||||
- /// Creates a new empty set of rules.
|
||||
pub fn new() -> Self {
|
||||
- Self { rules: Vec::new() }
|
||||
+ Self { rules: BTreeMap::new() }
|
||||
}
|
||||
|
||||
- /// Updates the current set with the rules from another set.
|
||||
pub fn update(&mut self, other: Rules) {
|
||||
self.rules.extend(other.rules);
|
||||
}
|
||||
|
||||
- /// Loads rules from an iterator over (path, contents) pairs.
|
||||
- /// Only rules with a confidence level at least as high as `confidence` are retained.
|
||||
pub fn from_paths_and_contents<'a, I: IntoIterator<Item = (&'a Path, &'a [u8])>>(
|
||||
iterable: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut rules = Self::new();
|
||||
- for (path, contents) in iterable.into_iter() {
|
||||
- match serde_yaml::from_reader::<_, Rules>(contents) {
|
||||
- Ok(mut rs) => {
|
||||
- rs.rules.retain(|rule| rule.confidence.is_at_least(&confidence));
|
||||
- for rule_syntax in &rs.rules {
|
||||
+ for (path, contents) in iterable {
|
||||
+ match serde_yaml::from_slice::<RawRules>(contents) {
|
||||
+ Ok(rs) => {
|
||||
+ for rule_syntax in rs.rules {
|
||||
+ if !rule_syntax.confidence.is_at_least(&confidence) {
|
||||
+ continue;
|
||||
+ }
|
||||
if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
|
||||
if http_val
|
||||
.request
|
||||
.response_matcher
|
||||
.as_ref()
|
||||
.map_or(true, |m| m.is_empty())
|
||||
{
|
||||
bail!(RulesError::MissingResponseMatcher {
|
||||
path: path.display().to_string(),
|
||||
rule_id: rule_syntax.id.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
+ rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
|
||||
}
|
||||
- rules.update(rs);
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(location) = e.location() {
|
||||
error!(
|
||||
"Failed to parse rules YAML from {}: {}, at line: {}, column: {}",
|
||||
path.display(),
|
||||
e,
|
||||
location.line(),
|
||||
location.column()
|
||||
);
|
||||
bail!(RulesError::InvalidResponseMatcherVariant(
|
||||
path.display().to_string(),
|
||||
location.line(),
|
||||
- location.column()
|
||||
+ location.column(),
|
||||
));
|
||||
} else {
|
||||
error!("Failed to parse rules YAML from {}: {}", path.display(), e);
|
||||
bail!(RulesError::YamlParseError(format!(
|
||||
"Failed to load rules YAML from {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
- /// Loads rules from the given paths.
|
||||
- /// Each path may be a file or a directory.
|
||||
pub fn from_paths<P: AsRef<Path>, I: IntoIterator<Item = P>>(
|
||||
paths: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut num_paths = 0;
|
||||
let mut rules = Rules::new();
|
||||
for input in paths {
|
||||
num_paths += 1;
|
||||
let input = input.as_ref();
|
||||
if input.is_file() {
|
||||
rules.update(Rules::from_yaml_file(input, confidence)?);
|
||||
} else if input.is_dir() {
|
||||
rules.update(Rules::from_directory(input, confidence)?);
|
||||
} else {
|
||||
error!("Invalid input type: {} is neither a file nor a directory", input.display());
|
||||
bail!(RulesError::InvalidInputError(input.display().to_string()));
|
||||
}
|
||||
}
|
||||
debug!("Loaded {} rules from {} paths", rules.num_rules(), num_paths);
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
- /// Loads rules from a YAML file.
|
||||
pub fn from_yaml_file<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let _span = debug_span!("Rules::from_yaml_file", "{}", path.display()).entered();
|
||||
- match load_yaml_file::<Rules, _>(path) {
|
||||
- Ok(mut rules) => {
|
||||
- rules.rules.retain(|rule| rule.confidence.is_at_least(&confidence));
|
||||
+ match load_yaml_file::<RawRules, _>(path) {
|
||||
+ Ok(rs) => {
|
||||
+ let mut rules = Rules::new();
|
||||
+ for rule_syntax in rs.rules {
|
||||
+ if !rule_syntax.confidence.is_at_least(&confidence) {
|
||||
+ continue;
|
||||
+ }
|
||||
+ if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
|
||||
+ if http_val.request.response_matcher.as_ref().map_or(true, |m| m.is_empty())
|
||||
+ {
|
||||
+ bail!(RulesError::MissingResponseMatcher {
|
||||
+ path: path.display().to_string(),
|
||||
+ rule_id: rule_syntax.id.clone(),
|
||||
+ });
|
||||
+ }
|
||||
+ }
|
||||
+ rules.rules.insert(rule_syntax.id.clone(), rule_syntax);
|
||||
+ }
|
||||
debug!("Loaded {} rules from {}", rules.num_rules(), path.display());
|
||||
Ok(rules)
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Failed to load rules YAML from {}: {}", path.display(), e);
|
||||
bail!(RulesError::YamlParseError(format!(
|
||||
"Failed to load rules YAML from {}: {}",
|
||||
path.display(),
|
||||
e
|
||||
)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- /// Loads rules from multiple YAML files.
|
||||
pub fn from_yaml_files<P: AsRef<Path>, I: IntoIterator<Item = P>>(
|
||||
paths: I,
|
||||
confidence: Confidence,
|
||||
) -> Result<Self> {
|
||||
let mut num_paths = 0;
|
||||
let mut rules = Rules::new();
|
||||
for path in paths {
|
||||
num_paths += 1;
|
||||
rules.update(Rules::from_yaml_file(path.as_ref(), confidence)?);
|
||||
}
|
||||
debug!("Loaded {} rules from {} YAML files", rules.num_rules(), num_paths);
|
||||
Ok(rules)
|
||||
}
|
||||
|
||||
- /// Loads rules from all YAML files in a directory.
|
||||
pub fn from_directory<P: AsRef<Path>>(path: P, confidence: Confidence) -> Result<Self> {
|
||||
let path = path.as_ref();
|
||||
let _span = debug_span!("Rules::from_directory", "{}", path.display()).entered();
|
||||
let yaml_types =
|
||||
TypesBuilder::new().add_defaults().select("yaml").build().map_err(|e| {
|
||||
error!("Failed to build YAML types: {}", e);
|
||||
RulesError::YamlTypesBuildError(e.to_string())
|
||||
})?;
|
||||
let walker = WalkBuilder::new(path)
|
||||
.types(yaml_types)
|
||||
.follow_links(true)
|
||||
.standard_filters(false)
|
||||
.build();
|
||||
let mut yaml_files = Vec::new();
|
||||
for entry in walker {
|
||||
match entry {
|
||||
Ok(entry) => {
|
||||
if entry.file_type().map_or(false, |t| !t.is_dir()) {
|
||||
yaml_files.push(entry.into_path());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
debug!("Failed to read directory entry: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
yaml_files.sort();
|
||||
debug!("Found {} YAML files in {}", yaml_files.len(), path.display());
|
||||
Self::from_yaml_files(&yaml_files, confidence)
|
||||
}
|
||||
|
||||
- /// Returns the number of rules.
|
||||
#[inline]
|
||||
pub fn num_rules(&self) -> usize {
|
||||
self.rules.len()
|
||||
}
|
||||
|
||||
- /// Returns true if no rules are present.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.rules.is_empty()
|
||||
}
|
||||
|
||||
- /// Returns an iterator over the rules.
|
||||
#[inline]
|
||||
- pub fn iter_rules(&self) -> std::slice::Iter<'_, RuleSyntax> {
|
||||
- self.rules.iter()
|
||||
+ pub fn iter_rules(&self) -> std::collections::btree_map::Values<'_, String, RuleSyntax> {
|
||||
+ self.rules.values()
|
||||
}
|
||||
}
|
||||
|
||||
-impl Default for Rules {
|
||||
- fn default() -> Self {
|
||||
- Self::new()
|
||||
+impl IntoIterator for Rules {
|
||||
+ type Item = RuleSyntax;
|
||||
+ type IntoIter = std::collections::btree_map::IntoValues<String, RuleSyntax>;
|
||||
+
|
||||
+ fn into_iter(self) -> Self::IntoIter {
|
||||
+ self.rules.into_values()
|
||||
}
|
||||
}
|
||||
|
||||
-/// Loads and deserializes a YAML file into a value of type `T`.
|
||||
pub fn load_yaml_file<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
|
||||
let path = path.as_ref();
|
||||
let file = File::open(path)
|
||||
.with_context(|| format!("Failed to open YAML file: {}", path.display()))?;
|
||||
let reader = BufReader::new(file);
|
||||
let data = serde_yaml::from_reader(reader)
|
||||
.with_context(|| format!("Failed to parse YAML from file: {}", path.display()))?;
|
||||
Ok(data)
|
||||
}
|
||||
|
|
@ -990,8 +990,7 @@ rules:
|
|||
let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
|
||||
// Find the PyPI rule we just loaded
|
||||
let pypi_rule_syntax = rules
|
||||
.rules
|
||||
.iter()
|
||||
.iter_rules()
|
||||
.find(|r| r.id == "kingfisher.pypi.1")
|
||||
.expect("Failed to find PyPI rule in test YAML")
|
||||
.clone(); // Clone so we can create a `Rule` from it
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue