diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5f625a6..cc46915 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -175,13 +175,12 @@ jobs: name: kingfisher-windows-x64 path: dist/kingfisher-*windows-x64*.* - # ──────────────── Draft public release ──────────────── release: name: Public GitHub Release - needs: [linux-x64, linux-arm64, windows, macos-x64, macos-arm64] # wait for all builds to finish + needs: [linux-x64, linux-arm64, windows, macos-x64, macos-arm64] runs-on: ubuntu-latest permissions: - contents: write # allow release upload + contents: write steps: - uses: actions/checkout@v4 - name: Read version from Cargo.toml @@ -193,11 +192,23 @@ jobs: with: path: target/release/kingfisher-* merge-multiple: true + - name: Extract latest changelog section + run: | + awk ' + BEGIN { grabbing = 0 } + /^## \[/ { + if (grabbing) exit; # already grabbed latest entry + grabbing = 1 + } + grabbing { print } + ' CHANGELOG.md > .latest_changelog.md + + # ── create the release using just that snippet ───────────────────── - name: Create release & upload assets uses: ncipollo/release-action@v1 with: tag: v${{ steps.version.outputs.version }} name: "Kingfisher v${{ steps.version.outputs.version }}" - bodyFile: CHANGELOG.md # use existing changelog - generateReleaseNotes: false # turn off auto-notes - artifacts: target/release/** + bodyFile: .latest_changelog.md # ← only the most-recent entry + generateReleaseNotes: false + artifacts: target/release/** \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 342e0c8..689cb46 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ All notable changes to this project will be documented in this file. +## [1.13.0] +- Added new rules for Planetscale, Postman, Openweather, opsgenie, pagerduty, pastebin, paypal, netlify, netrc, newrelic, ngrok, npm, nuget, mandrill, mapbox, microsoft teams, stripe, linkedin, mailchimp, mailgun, linear, line, huggingface, ibm cloud, intercom, ipstack, heroku, gradle, grafana +- Added `--rule-stats` command-line flag that will display rule performance statistics during a scan. Useful when creating or debugging rules + + ## [1.12.0] - Added automatic update checks using GitHub releases. - New `--self-update` flag installs updates when available diff --git a/Cargo.toml b/Cargo.toml index 4d9d103..2b30bee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.12.0" +version = "1.13.0" edition.workspace = true rust-version.workspace = true license.workspace = true diff --git a/README.md b/README.md index 0a24069..67098a3 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,11 @@ cat /path/to/file.py | kingfisher scan - kingfisher scan /path/to/repo --rule kingfisher.aws ``` +### Display rule performance statistics +```bash +kingfisher scan /path/to/repo --rule-stats +``` + --- ## Scanning GitHub @@ -244,6 +249,10 @@ The document below details the four-field formula (rule SHA-1, origin label, st See ([docs/FINGERPRINT.md](docs/FINGERPRINT.md)) +## Rule Performance Profiling +Use `--rule-stats` to collect timing information for every rule. After scanning, the summary prints a **Rule Performance Stats** section showing how many matches each rule produced along with its slowest and average match times. Useful when creating rules or debugging rules. + + ## CLI Options ```bash kingfisher scan --help diff --git a/src/matcher.rs b/src/matcher.rs index 9048206..89aed1b 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -25,6 +25,7 @@ use smallvec::SmallVec; use tracing::debug; use xxhash_rust::xxh3::xxh3_64; +use crate::rule_profiling::RuleTimer; use crate::{ blob::{Blob, BlobId, BlobIdMap}, entropy::calculate_shannon_entropy, @@ -386,6 +387,8 @@ impl<'a> Matcher<'a> { origin, None, redact, + &filename, + self.profiler.as_ref(), ); } // If tree-sitter produced base64-decoded matches, try them against all rules @@ -407,6 +410,8 @@ impl<'a> Matcher<'a> { origin, Some(ts_match.clone()), redact, + &filename, + self.profiler.as_ref(), ); } } @@ -456,7 +461,21 @@ fn filter_match<'b>( _origin: &OriginSet, ts_match: Option, redact: bool, + filename: &str, + profiler: Option<&Arc>, ) { + let mut timer = profiler.map(|p| { + RuleTimer::new( + p, + rule.id(), + rule.name(), + &rule.syntax.pattern, + filename, + ) + }); + + let initial_len = matches.len(); + // Use Cow to avoid unnecessary copying when ts_match is None let byte_slice: Cow<[u8]> = match ts_match { Some(ts_match_value) => Cow::Owned(ts_match_value.into_bytes()), @@ -515,6 +534,10 @@ fn filter_match<'b>( }); previous_matches.push((rule_id, matching_input_offset_span)); } + if let Some(t) = timer.take() { + let new_count = (matches.len() - initial_len) as u64; + t.end(new_count > 0, new_count, 0); + } } fn get_language_and_queries(lang: &str) -> Option<(Language, FxHashMap)> { match lang.to_lowercase().as_str() { diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index 86165d4..112a04e 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -75,7 +75,7 @@ pub async fn run_async_scan( progress_enabled, rules_db, enable_profiling, - shared_profiler, + Arc::clone(&shared_profiler), &matcher_stats, )?; @@ -117,7 +117,15 @@ pub async fn run_async_scan( // // Call cmd_report here crate::reporter::run(global_args, Arc::clone(&datastore), args) .context("Failed to run report command")?; - print_scan_summary(start_time, &datastore, global_args, args, rules_db, &matcher_stats); + print_scan_summary( + start_time, + &datastore, + global_args, + args, + rules_db, + &matcher_stats, + if enable_profiling { Some(shared_profiler.as_ref()) } else { None }, + ); Ok(()) } diff --git a/src/scanner/summary.rs b/src/scanner/summary.rs index ff331c4..11a45f9 100644 --- a/src/scanner/summary.rs +++ b/src/scanner/summary.rs @@ -17,6 +17,7 @@ use crate::{ }, findings_store, matcher::MatcherStats, + rule_profiling::ConcurrentRuleProfiler, rules_database::RulesDatabase, }; @@ -42,6 +43,7 @@ pub fn print_scan_summary( // inputs: &FilesystemEnumeratorResult, rules_db: &RulesDatabase, matcher_stats: &Mutex, + profiler: Option<&ConcurrentRuleProfiler>, ) { // let duration = start_time.elapsed(); let ds = datastore.lock().unwrap(); @@ -152,6 +154,47 @@ pub fn print_scan_summary( humantime::format_duration(duration) ); } + + if args.rule_stats { + if let Some(prof) = profiler { + let stats = prof.generate_report(); + if !stats.is_empty() { + // Calculate dynamic column widths + let name_w = stats.iter().map(|s| s.rule_name.len()).max().unwrap_or(4); + let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2); + + // Header + safe_println!("\n{:-^1$}", " Rule Performance Stats ", name_w + id_w + 47); + safe_println!( + "{: 8} {: >15} {: >15}", + "Rule", + "ID", + "Matches", + "Slowest", + "Average", + name_w = name_w, + id_w = id_w + ); + safe_println!("{:-8} {: >15?} {: >15?}", + rs.rule_name, + rs.rule_id, + rs.total_matches, + rs.slowest_match_time, + rs.average_match_time, + name_w = name_w, + id_w = id_w + ); + } + } + } + } + + debug!("\nAll Rules with Matches:"); debug!("======================="); let max_rule_length = sorted_findings.iter().map(|(rule, _)| rule.len()).max().unwrap_or(0);