diff --git a/CHANGELOG.md b/CHANGELOG.md index 689cb46..5ba1012 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.14.0] +- Fixed several malformed rules +- Now validating that response_matcher is present in validation section of all rules + ## [1.13.0] - Added new rules for Planetscale, Postman, Openweather, opsgenie, pagerduty, pastebin, paypal, netlify, netrc, newrelic, ngrok, npm, nuget, mandrill, mapbox, microsoft teams, stripe, linkedin, mailchimp, mailgun, linear, line, huggingface, ibm cloud, intercom, ipstack, heroku, gradle, grafana - Added `--rule-stats` command-line flag that will display rule performance statistics during a scan. Useful when creating or debugging rules diff --git a/Cargo.toml b/Cargo.toml index 2b30bee..7ced54e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.13.0" +version = "1.14.0" edition.workspace = true rust-version.workspace = true license.workspace = true diff --git a/data/rules/digitalocean.yml b/data/rules/digitalocean.yml index 3206ab7..b02bb52 100644 --- a/data/rules/digitalocean.yml +++ b/data/rules/digitalocean.yml @@ -14,9 +14,7 @@ rules: examples: - dop_v1_1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef - 'token = "dop_v1_ef0e04edc13918192246e0c90f0735c7f4db7a5a036a857e48d6cc98f1c9576b"' - categories: - - api - - secret + validation: type: Http content: @@ -60,9 +58,9 @@ rules: "grant_type": "refresh_token", "refresh_token": "{{ TOKEN }}" } - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 - - type: JsonValid + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 + - type: JsonValid diff --git a/data/rules/doppler.yml b/data/rules/doppler.yml index fe21244..0afdba5 100644 --- a/data/rules/doppler.yml +++ b/data/rules/doppler.yml @@ -23,11 +23,11 @@ rules: headers: Authorization: Bearer {{ TOKEN }} Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 - name: Doppler Personal Token id: kingfisher.doppler.2 pattern: | @@ -52,11 +52,11 @@ rules: headers: Authorization: Bearer {{ TOKEN }} Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 - name: Doppler Service Token id: kingfisher.doppler.3 @@ -82,11 +82,11 @@ rules: headers: Authorization: Bearer {{ TOKEN }} Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 - name: Doppler Service Account Token id: kingfisher.doppler.4 @@ -112,11 +112,11 @@ rules: headers: Authorization: Bearer {{ TOKEN }} Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 - name: Doppler SCIM Token id: kingfisher.doppler.5 @@ -142,11 +142,11 @@ rules: headers: Authorization: Bearer {{ TOKEN }} Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 - name: Doppler Audit Token id: kingfisher.doppler.6 @@ -172,8 +172,8 @@ rules: headers: Authorization: Bearer {{ TOKEN }} Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 200 \ No newline at end of file + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 \ No newline at end of file diff --git a/data/rules/figma.yml b/data/rules/figma.yml index 444a003..9863b6f 100644 --- a/data/rules/figma.yml +++ b/data/rules/figma.yml @@ -23,6 +23,12 @@ rules: X-Figma-Token: '{{ TOKEN }}' method: GET url: https://api.figma.com/v1/me + response_matcher: + - report_response: true + - type: WordMatch + words: + - "Invalid token" + negative: true - name: Figma Personal Access Header Token id: kingfisher.figma.2 diff --git a/data/rules/ibm.yml b/data/rules/ibm.yml index 0cf2e24..55a33be 100644 --- a/data/rules/ibm.yml +++ b/data/rules/ibm.yml @@ -3,8 +3,11 @@ rules: id: kingfisher.ibm.1 pattern: | (?xi) + \b (?:ibm(?:cloud)?|bx) (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? \b ( [0-9A-Z_-]{42,44} diff --git a/data/rules/linear.yml b/data/rules/linear.yml index 9adf6e8..62749ea 100644 --- a/data/rules/linear.yml +++ b/data/rules/linear.yml @@ -30,9 +30,9 @@ rules: "query": "query { issues(first: 1) { nodes { id } } }" } url: https://api.linear.app/graphql - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - type: WordMatch - words: ['"issues":', '"nodes":'] \ No newline at end of file + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['"issues":', '"nodes":'] \ No newline at end of file diff --git a/data/rules/microsoft_teams.yml b/data/rules/microsoft_teams.yml index 13cc044..37e4030 100644 --- a/data/rules/microsoft_teams.yml +++ b/data/rules/microsoft_teams.yml @@ -42,11 +42,11 @@ rules: headers: Content-Type: application/json body: '{"text":""}' - response_matcher: - - report_response: true - - type: StatusMatch - status: - - 400 - - type: WordMatch - words: - - 'Text is required' \ No newline at end of file + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 400 + - type: WordMatch + words: + - 'Text is required' \ No newline at end of file diff --git a/data/rules/pagerdutyapikey.yml b/data/rules/pagerdutyapikey.yml index be4b24f..d65bced 100644 --- a/data/rules/pagerdutyapikey.yml +++ b/data/rules/pagerdutyapikey.yml @@ -2,35 +2,46 @@ rules: - name: PagerDuty API Key id: kingfisher.pagerduty.1 pattern: | - (?xi) + (?xi) \b - (?:pagerduty|pager[_-]duty|pd[-_\]=\)]|pd\.webhook?) - (?:.|[\n\r]){0,16}? - ( - u\+[A-Z0-9_+-]{18} # new personal tokens - | - [A-Z0-9_-]{20} # legacy personal tokens - | - [A-F0-9]{32} # integration keys / routing keys + (?: + Token | + Authorization | + pd[_-]? | + pd[_-]? | + pagerduty[_-]? | + pagerduty ) - \b - min_entropy: 3.3 + \W{0,20} + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,16}? + \b + ( + u\+[A-Z0-9_+-]{18} | # personal user token (20 chars) + [A-Z0-9_-]{20} | # legacy PAT (20 chars, mixed case) + [a-f0-9]{32} # integration / routing key (32 hex, lower case) + ) + \b + min_entropy: 3.5 confidence: medium examples: - - pagerduty_key = u+Lyhd2_N2MCy+ZoH-S5 + - "Authorization: Token token=u+Lyhd2_N2MCy+ZoH-S5" - pd_key = u+3xVszZ-b4m+T6d23KA + - Token token=ABCDEF1234567890ABCDEF1234567890 + references: + - https://developer.pagerduty.com/api-reference/4555ca1c983d0-get-the-current-user validation: type: Http content: request: method: GET - url: https://api.pagerduty.com/abilities + url: https://api.pagerduty.com/users headers: Authorization: Token token={{ TOKEN }} - Accept: application/vnd.pagerduty+json;version=2 - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - type: WordMatch - words: ['"abilities":'] \ No newline at end of file + Accept: application/json + response_matcher: + - report_response: true + - type: JsonValid + - type: WordMatch + words: + - '"users":' diff --git a/data/rules/particle.io.yml b/data/rules/particle.io.yml index 8237ebd..76d6f27 100644 --- a/data/rules/particle.io.yml +++ b/data/rules/particle.io.yml @@ -29,13 +29,13 @@ rules: request: method: GET url: https://api.particle.io/v1/user?access_token={{ TOKEN }} - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - type: WordMatch - match_all_words: true - words: ['"username":'] + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + match_all_words: true + words: ['"username":'] - name: particle.io Access Token id: kingfisher.particleio.2 @@ -65,10 +65,10 @@ rules: request: method: GET url: https://api.particle.io/v1/user?access_token={{ TOKEN }} - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - type: WordMatch - match_all_words: true - words: ['"username":'] \ No newline at end of file + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + match_all_words: true + words: ['"username":'] \ No newline at end of file diff --git a/data/rules/pastebin.yml b/data/rules/pastebin.yml index 26a55dd..2173766 100644 --- a/data/rules/pastebin.yml +++ b/data/rules/pastebin.yml @@ -28,10 +28,10 @@ rules: Content-Type: application/x-www-form-urlencoded body: | api_dev_key={{ TOKEN }}&api_user_name=dummy&api_user_password=dummy - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - type: WordMatch - words: ['invalid api_dev_key'] - negative: true \ No newline at end of file + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['invalid api_dev_key'] + negative: true \ No newline at end of file diff --git a/data/rules/paypal.yml b/data/rules/paypal.yml index 2441a68..4011eee 100644 --- a/data/rules/paypal.yml +++ b/data/rules/paypal.yml @@ -47,10 +47,10 @@ rules: Authorization: | Basic {{ CLIENTID | append: ':' | append: TOKEN | b64enc }} body: grant_type=client_credentials - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] depends_on_rule: - rule_id: kingfisher.paypal.1 variable: CLIENTID diff --git a/data/rules/pypi.yml b/data/rules/pypi.yml index 0767afd..d0be047 100644 --- a/data/rules/pypi.yml +++ b/data/rules/pypi.yml @@ -18,11 +18,11 @@ rules: request: method: POST url: https://upload.pypi.org/legacy/ - response_is_html: true + response_is_html: true response_matcher: - report_response: true - type: WordMatch - words: + words: - "isn't allowed to upload to project" headers: Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}' diff --git a/data/rules/tailscale.yml b/data/rules/tailscale.yml index 8ac7e50..b06f16b 100644 --- a/data/rules/tailscale.yml +++ b/data/rules/tailscale.yml @@ -25,7 +25,7 @@ rules: headers: Authorization: "Bearer {{ TOKEN }}" Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] diff --git a/data/rules/travisci.yml b/data/rules/travisci.yml index 4299937..dd0bd05 100644 --- a/data/rules/travisci.yml +++ b/data/rules/travisci.yml @@ -28,7 +28,7 @@ rules: Authorization: token {{ TOKEN }} Accept: application/vnd.travis-ci.3+json Travis-API-Version: "3" - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] diff --git a/src/decompress.rs b/src/decompress.rs index 0ac0e0e..88a11ae 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -382,16 +382,11 @@ mod tests { Ok(()) } - /// 3) Nested archive: - /// outer.tar.gz ──▶ outer.tar (contains inner.tar.gz) - /// └──▶ inner.tar.gz ──▶ inner.tar (contains secret.txt) + /// 3) Nested archive: outer.tar.gz ──▶ outer.tar (contains inner.tar.gz) └──▶ inner.tar.gz + /// ──▶ inner.tar (contains secret.txt) #[test] fn smoke_decompress_nested_tar_gz_archives() -> anyhow::Result<()> { - use std::{ - fs::File, - io::Read, - path::PathBuf, - }; + use std::{fs::File, io::Read, path::PathBuf}; use flate2::{write::GzEncoder, Compression}; use tar::Builder; @@ -468,10 +463,7 @@ mod tests { for (logical, path) in files { if logical.ends_with("!secret.txt") { let txt = std::fs::read_to_string(&path)?; - assert!( - txt.contains("nested_secret=shh"), - "secret.txt content corrupted" - ); + assert!(txt.contains("nested_secret=shh"), "secret.txt content corrupted"); found = true; } } diff --git a/src/matcher.rs b/src/matcher.rs index 89aed1b..d7ae76d 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -25,7 +25,6 @@ use smallvec::SmallVec; use tracing::debug; use xxhash_rust::xxh3::xxh3_64; -use crate::rule_profiling::RuleTimer; use crate::{ blob::{Blob, BlobId, BlobIdMap}, entropy::calculate_shannon_entropy, @@ -33,7 +32,7 @@ use crate::{ origin::OriginSet, parser, parser::{Checker, Language}, - rule_profiling::{ConcurrentRuleProfiler, RuleStats}, + rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer}, rules::rule::Rule, rules_database::RulesDatabase, safe_list::is_safe_match, @@ -464,15 +463,8 @@ fn filter_match<'b>( filename: &str, profiler: Option<&Arc>, ) { - let mut timer = profiler.map(|p| { - RuleTimer::new( - p, - rule.id(), - rule.name(), - &rule.syntax.pattern, - filename, - ) - }); + let mut timer = + profiler.map(|p| RuleTimer::new(p, rule.id(), rule.name(), &rule.syntax.pattern, filename)); let initial_len = matches.len(); @@ -989,7 +981,7 @@ mod test { method: "GET".to_string(), url: "https://example.com".to_string(), headers: BTreeMap::new(), - response_matcher: vec![], + response_matcher: Some(vec![]), multipart: None, response_is_html: false, }, diff --git a/src/rules.rs b/src/rules.rs index ad3c9f2..ac7e2fb 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -8,7 +8,7 @@ pub mod rule; use std::{fs::File, io::BufReader, path::Path}; use anyhow::Context; -use rule::{Confidence, RuleSyntax}; +use rule::{Confidence, RuleSyntax, Validation}; use serde::de::DeserializeOwned; /// Custom error type for more granular rules loading errors. @@ -28,6 +28,9 @@ pub enum RulesError { #[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")] InvalidResponseMatcherVariant(String, usize, usize), + + #[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")] + MissingResponseMatcher { path: String, rule_id: String }, } /// Represents a collection of rule syntaxes. @@ -58,6 +61,21 @@ impl Rules { match serde_yaml::from_reader::<_, Rules>(contents) { Ok(mut rs) => { rs.rules.retain(|rule| rule.confidence.is_at_least(&confidence)); + for rule_syntax in &rs.rules { + if let Some(Validation::Http(http_val)) = &rule_syntax.validation { + if http_val + .request + .response_matcher + .as_ref() + .map_or(true, |m| m.is_empty()) + { + bail!(RulesError::MissingResponseMatcher { + path: path.display().to_string(), + rule_id: rule_syntax.id.clone(), + }); + } + } + } rules.update(rs); } Err(e) => { diff --git a/src/rules/rule.rs b/src/rules/rule.rs index 6d3c6e9..83bb844 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -65,7 +65,7 @@ pub struct HttpRequest { #[serde(default)] pub headers: BTreeMap, #[serde(default)] - pub response_matcher: Vec, + pub response_matcher: Option>, #[serde(default)] pub multipart: Option, // allow HTML only when explicitly set true diff --git a/src/scanner/summary.rs b/src/scanner/summary.rs index 11a45f9..7555d22 100644 --- a/src/scanner/summary.rs +++ b/src/scanner/summary.rs @@ -161,7 +161,7 @@ pub fn print_scan_summary( if !stats.is_empty() { // Calculate dynamic column widths let name_w = stats.iter().map(|s| s.rule_name.len()).max().unwrap_or(4); - let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2); + let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2); // Header safe_println!("\n{:-^1$}", " Rule Performance Stats ", name_w + id_w + 47); @@ -173,7 +173,7 @@ pub fn print_scan_summary( "Slowest", "Average", name_w = name_w, - id_w = id_w + id_w = id_w ); safe_println!("{:-( m.validation_response_status = status; m.validation_response_body = body.clone(); + let matchers = http_validation + .request + .response_matcher + .as_ref() + .expect("missing response_matcher"); + m.validation_success = httpvalidation::validate_response( - &http_validation.request.response_matcher, + matchers, &body, &status, &headers, @@ -880,6 +886,12 @@ rules: request: method: POST url: https://upload.pypi.org/legacy/ + response_is_html: true + response_matcher: + - report_response: true + - type: WordMatch + words: + - "isn't allowed to upload to project" headers: Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}' multipart: diff --git a/tests/cli_failure.rs b/tests/cli_failure.rs index ca2e715..7e18195 100644 --- a/tests/cli_failure.rs +++ b/tests/cli_failure.rs @@ -30,7 +30,7 @@ fn scan_fails_for_bad_rule_yaml() { "--rules-path", tmp.path().to_str().unwrap(), // point loader at bad YAML "--no-validate", // keep the test fast - "--no-update-check", // skip update check to avoid network calls + "--no-update-check", // skip update check to avoid network calls ]) .assert() .failure() @@ -72,7 +72,7 @@ rules: tmp.path().to_str().unwrap(), // only the custom rule "--no-dedup", "--load-builtins=false", // skip the builtin rules - "--no-update-check", // skip update check to avoid network calls + "--no-update-check", // skip update check to avoid network calls ]) .assert() .failure() // CLI exits 0 diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 657a40a..4b9af83 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -76,6 +76,12 @@ async fn test_validation_cache_and_depvars() -> Result<()> { request: method: GET url: '{base}/validate?token={{ {{ TOKEN }} }}' + response_matcher: + - report_response: true + - type: WordMatch + words: + - '"error_code":"403003"' + negative: true "#, base = server.uri() ); diff --git a/tests/smoke_archive.rs b/tests/smoke_archive.rs index addba57..c2afc7f 100644 --- a/tests/smoke_archive.rs +++ b/tests/smoke_archive.rs @@ -30,7 +30,14 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> { // ── 1) extraction ENABLED -- secret should be found ───────────────────────── Command::cargo_bin("kingfisher")? - .args(["scan", tar_gz.to_str().unwrap(), "--confidence=low", "--format", "json", "--no-update-check"]) + .args([ + "scan", + tar_gz.to_str().unwrap(), + "--confidence=low", + "--format", + "json", + "--no-update-check", + ]) .assert() .code(findings_code) .stdout(predicates::str::contains(github_pat));