Updated github actions to extract only the latest changelog entry. Added --rule-stats flag to display rule performance

This commit is contained in:
Mick Grove 2025-06-25 17:23:35 -07:00
commit 9b5c220182
7 changed files with 108 additions and 9 deletions

View file

@ -175,13 +175,12 @@ jobs:
name: kingfisher-windows-x64
path: dist/kingfisher-*windows-x64*.*
# ──────────────── Draft public release ────────────────
release:
name: Public GitHub Release
needs: [linux-x64, linux-arm64, windows, macos-x64, macos-arm64] # wait for all builds to finish
needs: [linux-x64, linux-arm64, windows, macos-x64, macos-arm64]
runs-on: ubuntu-latest
permissions:
contents: write # allow release upload
contents: write
steps:
- uses: actions/checkout@v4
- name: Read version from Cargo.toml
@ -193,11 +192,23 @@ jobs:
with:
path: target/release/kingfisher-*
merge-multiple: true
- name: Extract latest changelog section
run: |
awk '
BEGIN { grabbing = 0 }
/^## \[/ {
if (grabbing) exit; # already grabbed latest entry
grabbing = 1
}
grabbing { print }
' CHANGELOG.md > .latest_changelog.md
# ── create the release using just that snippet ─────────────────────
- name: Create release & upload assets
uses: ncipollo/release-action@v1
with:
tag: v${{ steps.version.outputs.version }}
name: "Kingfisher v${{ steps.version.outputs.version }}"
bodyFile: CHANGELOG.md # use existing changelog
generateReleaseNotes: false # turn off auto-notes
artifacts: target/release/**
bodyFile: .latest_changelog.md # ← only the most-recent entry
generateReleaseNotes: false
artifacts: target/release/**

View file

@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
## [1.13.0]
- Added new rules for Planetscale, Postman, Openweather, opsgenie, pagerduty, pastebin, paypal, netlify, netrc, newrelic, ngrok, npm, nuget, mandrill, mapbox, microsoft teams, stripe, linkedin, mailchimp, mailgun, linear, line, huggingface, ibm cloud, intercom, ipstack, heroku, gradle, grafana
- Added `--rule-stats` command-line flag that will display rule performance statistics during a scan. Useful when creating or debugging rules
## [1.12.0]
- Added automatic update checks using GitHub releases.
- New `--self-update` flag installs updates when available

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.12.0"
version = "1.13.0"
edition.workspace = true
rust-version.workspace = true
license.workspace = true

View file

@ -122,6 +122,11 @@ cat /path/to/file.py | kingfisher scan -
kingfisher scan /path/to/repo --rule kingfisher.aws
```
### Display rule performance statistics
```bash
kingfisher scan /path/to/repo --rule-stats
```
---
## Scanning GitHub
@ -244,6 +249,10 @@ The document below details the four-field formula (rule SHA-1, origin label, st
See ([docs/FINGERPRINT.md](docs/FINGERPRINT.md))
## Rule Performance Profiling
Use `--rule-stats` to collect timing information for every rule. After scanning, the summary prints a **Rule Performance Stats** section showing how many matches each rule produced along with its slowest and average match times. Useful when creating rules or debugging rules.
## CLI Options
```bash
kingfisher scan --help

View file

@ -25,6 +25,7 @@ use smallvec::SmallVec;
use tracing::debug;
use xxhash_rust::xxh3::xxh3_64;
use crate::rule_profiling::RuleTimer;
use crate::{
blob::{Blob, BlobId, BlobIdMap},
entropy::calculate_shannon_entropy,
@ -386,6 +387,8 @@ impl<'a> Matcher<'a> {
origin,
None,
redact,
&filename,
self.profiler.as_ref(),
);
}
// If tree-sitter produced base64-decoded matches, try them against all rules
@ -407,6 +410,8 @@ impl<'a> Matcher<'a> {
origin,
Some(ts_match.clone()),
redact,
&filename,
self.profiler.as_ref(),
);
}
}
@ -456,7 +461,21 @@ fn filter_match<'b>(
_origin: &OriginSet,
ts_match: Option<String>,
redact: bool,
filename: &str,
profiler: Option<&Arc<ConcurrentRuleProfiler>>,
) {
let mut timer = profiler.map(|p| {
RuleTimer::new(
p,
rule.id(),
rule.name(),
&rule.syntax.pattern,
filename,
)
});
let initial_len = matches.len();
// Use Cow to avoid unnecessary copying when ts_match is None
let byte_slice: Cow<[u8]> = match ts_match {
Some(ts_match_value) => Cow::Owned(ts_match_value.into_bytes()),
@ -515,6 +534,10 @@ fn filter_match<'b>(
});
previous_matches.push((rule_id, matching_input_offset_span));
}
if let Some(t) = timer.take() {
let new_count = (matches.len() - initial_len) as u64;
t.end(new_count > 0, new_count, 0);
}
}
fn get_language_and_queries(lang: &str) -> Option<(Language, FxHashMap<String, String>)> {
match lang.to_lowercase().as_str() {

View file

@ -75,7 +75,7 @@ pub async fn run_async_scan(
progress_enabled,
rules_db,
enable_profiling,
shared_profiler,
Arc::clone(&shared_profiler),
&matcher_stats,
)?;
@ -117,7 +117,15 @@ pub async fn run_async_scan(
// // Call cmd_report here
crate::reporter::run(global_args, Arc::clone(&datastore), args)
.context("Failed to run report command")?;
print_scan_summary(start_time, &datastore, global_args, args, rules_db, &matcher_stats);
print_scan_summary(
start_time,
&datastore,
global_args,
args,
rules_db,
&matcher_stats,
if enable_profiling { Some(shared_profiler.as_ref()) } else { None },
);
Ok(())
}

View file

@ -17,6 +17,7 @@ use crate::{
},
findings_store,
matcher::MatcherStats,
rule_profiling::ConcurrentRuleProfiler,
rules_database::RulesDatabase,
};
@ -42,6 +43,7 @@ pub fn print_scan_summary(
// inputs: &FilesystemEnumeratorResult,
rules_db: &RulesDatabase,
matcher_stats: &Mutex<MatcherStats>,
profiler: Option<&ConcurrentRuleProfiler>,
) {
// let duration = start_time.elapsed();
let ds = datastore.lock().unwrap();
@ -152,6 +154,47 @@ pub fn print_scan_summary(
humantime::format_duration(duration)
);
}
if args.rule_stats {
if let Some(prof) = profiler {
let stats = prof.generate_report();
if !stats.is_empty() {
// Calculate dynamic column widths
let name_w = stats.iter().map(|s| s.rule_name.len()).max().unwrap_or(4);
let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2);
// Header
safe_println!("\n{:-^1$}", " Rule Performance Stats ", name_w + id_w + 47);
safe_println!(
"{: <name_w$} {: <id_w$} {: >8} {: >15} {: >15}",
"Rule",
"ID",
"Matches",
"Slowest",
"Average",
name_w = name_w,
id_w = id_w
);
safe_println!("{:-<width$}", "", width = name_w + id_w + 49);
// Rows
for rs in stats {
safe_println!(
"{: <name_w$} {: <id_w$} {: >8} {: >15?} {: >15?}",
rs.rule_name,
rs.rule_id,
rs.total_matches,
rs.slowest_match_time,
rs.average_match_time,
name_w = name_w,
id_w = id_w
);
}
}
}
}
debug!("\nAll Rules with Matches:");
debug!("=======================");
let max_rule_length = sorted_findings.iter().map(|(rule, _)| rule.len()).max().unwrap_or(0);