Merge pull request #11 from mongodb/development

Preparing for v1.14.0

This PR prepares for v1.14.0 by making response_matcher optional with a default, enforcing its presence in rule validation, and updating related tests and rule data.

Made response_matcher required.
Added a validation step to error on missing or empty response_matcher in rules.
Reformatted tests, scanner code, data rules, bumped version, and updated changelog.
This commit is contained in:
Mick Grove 2025-06-25 23:53:59 -07:00 committed by GitHub
commit ff0cfb1012
24 changed files with 191 additions and 143 deletions

View file

@ -2,6 +2,10 @@
All notable changes to this project will be documented in this file.
## [1.14.0]
- Fixed several malformed rules
- Now validating that response_matcher is present in validation section of all rules
## [1.13.0]
- Added new rules for Planetscale, Postman, Openweather, opsgenie, pagerduty, pastebin, paypal, netlify, netrc, newrelic, ngrok, npm, nuget, mandrill, mapbox, microsoft teams, stripe, linkedin, mailchimp, mailgun, linear, line, huggingface, ibm cloud, intercom, ipstack, heroku, gradle, grafana
- Added `--rule-stats` command-line flag that will display rule performance statistics during a scan. Useful when creating or debugging rules

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.13.0"
version = "1.14.0"
edition.workspace = true
rust-version.workspace = true
license.workspace = true

View file

@ -14,9 +14,7 @@ rules:
examples:
- dop_v1_1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef
- 'token = "dop_v1_ef0e04edc13918192246e0c90f0735c7f4db7a5a036a857e48d6cc98f1c9576b"'
categories:
- api
- secret
validation:
type: Http
content:
@ -60,9 +58,9 @@ rules:
"grant_type": "refresh_token",
"refresh_token": "{{ TOKEN }}"
}
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- type: JsonValid
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- type: JsonValid

View file

@ -23,11 +23,11 @@ rules:
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Doppler Personal Token
id: kingfisher.doppler.2
pattern: |
@ -52,11 +52,11 @@ rules:
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Doppler Service Token
id: kingfisher.doppler.3
@ -82,11 +82,11 @@ rules:
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Doppler Service Account Token
id: kingfisher.doppler.4
@ -112,11 +112,11 @@ rules:
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Doppler SCIM Token
id: kingfisher.doppler.5
@ -142,11 +142,11 @@ rules:
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- name: Doppler Audit Token
id: kingfisher.doppler.6
@ -172,8 +172,8 @@ rules:
headers:
Authorization: Bearer {{ TOKEN }}
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200

View file

@ -23,6 +23,12 @@ rules:
X-Figma-Token: '{{ TOKEN }}'
method: GET
url: https://api.figma.com/v1/me
response_matcher:
- report_response: true
- type: WordMatch
words:
- "Invalid token"
negative: true
- name: Figma Personal Access Header Token
id: kingfisher.figma.2

View file

@ -3,8 +3,11 @@ rules:
id: kingfisher.ibm.1
pattern: |
(?xi)
\b
(?:ibm(?:cloud)?|bx)
(?:.|[\n\r]){0,32}?
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
(
[0-9A-Z_-]{42,44}

View file

@ -30,9 +30,9 @@ rules:
"query": "query { issues(first: 1) { nodes { id } } }"
}
url: https://api.linear.app/graphql
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words: ['"issues":', '"nodes":']
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words: ['"issues":', '"nodes":']

View file

@ -42,11 +42,11 @@ rules:
headers:
Content-Type: application/json
body: '{"text":""}'
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 400
- type: WordMatch
words:
- 'Text is required'
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 400
- type: WordMatch
words:
- 'Text is required'

View file

@ -2,35 +2,46 @@ rules:
- name: PagerDuty API Key
id: kingfisher.pagerduty.1
pattern: |
(?xi)
(?xi)
\b
(?:pagerduty|pager[_-]duty|pd[-_\]=\)]|pd\.webhook?)
(?:.|[\n\r]){0,16}?
(
u\+[A-Z0-9_+-]{18} # new personal tokens
|
[A-Z0-9_-]{20} # legacy personal tokens
|
[A-F0-9]{32} # integration keys / routing keys
(?:
Token |
Authorization |
pd[_-]? |
pd[_-]? |
pagerduty[_-]? |
pagerduty
)
\b
min_entropy: 3.3
\W{0,20}
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,16}?
\b
(
u\+[A-Z0-9_+-]{18} | # personal user token (20 chars)
[A-Z0-9_-]{20} | # legacy PAT (20 chars, mixed case)
[a-f0-9]{32} # integration / routing key (32 hex, lower case)
)
\b
min_entropy: 3.5
confidence: medium
examples:
- pagerduty_key = u+Lyhd2_N2MCy+ZoH-S5
- "Authorization: Token token=u+Lyhd2_N2MCy+ZoH-S5"
- pd_key = u+3xVszZ-b4m+T6d23KA
- Token token=ABCDEF1234567890ABCDEF1234567890
references:
- https://developer.pagerduty.com/api-reference/4555ca1c983d0-get-the-current-user
validation:
type: Http
content:
request:
method: GET
url: https://api.pagerduty.com/abilities
url: https://api.pagerduty.com/users
headers:
Authorization: Token token={{ TOKEN }}
Accept: application/vnd.pagerduty+json;version=2
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words: ['"abilities":']
Accept: application/json
response_matcher:
- report_response: true
- type: JsonValid
- type: WordMatch
words:
- '"users":'

View file

@ -29,13 +29,13 @@ rules:
request:
method: GET
url: https://api.particle.io/v1/user?access_token={{ TOKEN }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
match_all_words: true
words: ['"username":']
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
match_all_words: true
words: ['"username":']
- name: particle.io Access Token
id: kingfisher.particleio.2
@ -65,10 +65,10 @@ rules:
request:
method: GET
url: https://api.particle.io/v1/user?access_token={{ TOKEN }}
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
match_all_words: true
words: ['"username":']
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
match_all_words: true
words: ['"username":']

View file

@ -28,10 +28,10 @@ rules:
Content-Type: application/x-www-form-urlencoded
body: |
api_dev_key={{ TOKEN }}&api_user_name=dummy&api_user_password=dummy
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words: ['invalid api_dev_key']
negative: true
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
- type: WordMatch
words: ['invalid api_dev_key']
negative: true

View file

@ -47,10 +47,10 @@ rules:
Authorization: |
Basic {{ CLIENTID | append: ':' | append: TOKEN | b64enc }}
body: grant_type=client_credentials
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
depends_on_rule:
- rule_id: kingfisher.paypal.1
variable: CLIENTID

View file

@ -18,11 +18,11 @@ rules:
request:
method: POST
url: https://upload.pypi.org/legacy/
response_is_html: true
response_is_html: true
response_matcher:
- report_response: true
- type: WordMatch
words:
words:
- "isn't allowed to upload to project"
headers:
Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}'

View file

@ -25,7 +25,7 @@ rules:
headers:
Authorization: "Bearer {{ TOKEN }}"
Accept: application/json
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]

View file

@ -28,7 +28,7 @@ rules:
Authorization: token {{ TOKEN }}
Accept: application/vnd.travis-ci.3+json
Travis-API-Version: "3"
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]
response_matcher:
- report_response: true
- type: StatusMatch
status: [200]

View file

@ -382,16 +382,11 @@ mod tests {
Ok(())
}
/// 3) Nested archive:
/// outer.tar.gz ──▶ outer.tar (contains inner.tar.gz)
/// └──▶ inner.tar.gz ──▶ inner.tar (contains secret.txt)
/// 3) Nested archive: outer.tar.gz ──▶ outer.tar (contains inner.tar.gz) └──▶ inner.tar.gz
/// ──▶ inner.tar (contains secret.txt)
#[test]
fn smoke_decompress_nested_tar_gz_archives() -> anyhow::Result<()> {
use std::{
fs::File,
io::Read,
path::PathBuf,
};
use std::{fs::File, io::Read, path::PathBuf};
use flate2::{write::GzEncoder, Compression};
use tar::Builder;
@ -468,10 +463,7 @@ mod tests {
for (logical, path) in files {
if logical.ends_with("!secret.txt") {
let txt = std::fs::read_to_string(&path)?;
assert!(
txt.contains("nested_secret=shh"),
"secret.txt content corrupted"
);
assert!(txt.contains("nested_secret=shh"), "secret.txt content corrupted");
found = true;
}
}

View file

@ -25,7 +25,6 @@ use smallvec::SmallVec;
use tracing::debug;
use xxhash_rust::xxh3::xxh3_64;
use crate::rule_profiling::RuleTimer;
use crate::{
blob::{Blob, BlobId, BlobIdMap},
entropy::calculate_shannon_entropy,
@ -33,7 +32,7 @@ use crate::{
origin::OriginSet,
parser,
parser::{Checker, Language},
rule_profiling::{ConcurrentRuleProfiler, RuleStats},
rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer},
rules::rule::Rule,
rules_database::RulesDatabase,
safe_list::is_safe_match,
@ -464,15 +463,8 @@ fn filter_match<'b>(
filename: &str,
profiler: Option<&Arc<ConcurrentRuleProfiler>>,
) {
let mut timer = profiler.map(|p| {
RuleTimer::new(
p,
rule.id(),
rule.name(),
&rule.syntax.pattern,
filename,
)
});
let mut timer =
profiler.map(|p| RuleTimer::new(p, rule.id(), rule.name(), &rule.syntax.pattern, filename));
let initial_len = matches.len();
@ -989,7 +981,7 @@ mod test {
method: "GET".to_string(),
url: "https://example.com".to_string(),
headers: BTreeMap::new(),
response_matcher: vec![],
response_matcher: Some(vec![]),
multipart: None,
response_is_html: false,
},

View file

@ -8,7 +8,7 @@ pub mod rule;
use std::{fs::File, io::BufReader, path::Path};
use anyhow::Context;
use rule::{Confidence, RuleSyntax};
use rule::{Confidence, RuleSyntax, Validation};
use serde::de::DeserializeOwned;
/// Custom error type for more granular rules loading errors.
@ -28,6 +28,9 @@ pub enum RulesError {
#[error("Invalid ResponseMatcher variant in file: {0}, at line: {1}, column: {2}")]
InvalidResponseMatcherVariant(String, usize, usize),
#[error("HTTP validation for rule `{rule_id}` in file {path} missing response_matcher")]
MissingResponseMatcher { path: String, rule_id: String },
}
/// Represents a collection of rule syntaxes.
@ -58,6 +61,21 @@ impl Rules {
match serde_yaml::from_reader::<_, Rules>(contents) {
Ok(mut rs) => {
rs.rules.retain(|rule| rule.confidence.is_at_least(&confidence));
for rule_syntax in &rs.rules {
if let Some(Validation::Http(http_val)) = &rule_syntax.validation {
if http_val
.request
.response_matcher
.as_ref()
.map_or(true, |m| m.is_empty())
{
bail!(RulesError::MissingResponseMatcher {
path: path.display().to_string(),
rule_id: rule_syntax.id.clone(),
});
}
}
}
rules.update(rs);
}
Err(e) => {

View file

@ -65,7 +65,7 @@ pub struct HttpRequest {
#[serde(default)]
pub headers: BTreeMap<String, String>,
#[serde(default)]
pub response_matcher: Vec<ResponseMatcher>,
pub response_matcher: Option<Vec<ResponseMatcher>>,
#[serde(default)]
pub multipart: Option<MultipartConfig>,
// allow HTML only when explicitly set true

View file

@ -161,7 +161,7 @@ pub fn print_scan_summary(
if !stats.is_empty() {
// Calculate dynamic column widths
let name_w = stats.iter().map(|s| s.rule_name.len()).max().unwrap_or(4);
let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2);
let id_w = stats.iter().map(|s| s.rule_id.len()).max().unwrap_or(2);
// Header
safe_println!("\n{:-^1$}", " Rule Performance Stats ", name_w + id_w + 47);
@ -173,7 +173,7 @@ pub fn print_scan_summary(
"Slowest",
"Average",
name_w = name_w,
id_w = id_w
id_w = id_w
);
safe_println!("{:-<width$}", "", width = name_w + id_w + 49);
@ -187,14 +187,13 @@ pub fn print_scan_summary(
rs.slowest_match_time,
rs.average_match_time,
name_w = name_w,
id_w = id_w
id_w = id_w
);
}
}
}
}
debug!("\nAll Rules with Matches:");
debug!("=======================");
let max_rule_length = sorted_findings.iter().map(|(rule, _)| rule.len()).max().unwrap_or(0);

View file

@ -514,8 +514,14 @@ async fn timed_validate_single_match<'a>(
m.validation_response_status = status;
m.validation_response_body = body.clone();
let matchers = http_validation
.request
.response_matcher
.as_ref()
.expect("missing response_matcher");
m.validation_success = httpvalidation::validate_response(
&http_validation.request.response_matcher,
matchers,
&body,
&status,
&headers,
@ -880,6 +886,12 @@ rules:
request:
method: POST
url: https://upload.pypi.org/legacy/
response_is_html: true
response_matcher:
- report_response: true
- type: WordMatch
words:
- "isn't allowed to upload to project"
headers:
Authorization: 'Basic {{ "__token__:" | append: TOKEN | b64enc }}'
multipart:

View file

@ -30,7 +30,7 @@ fn scan_fails_for_bad_rule_yaml() {
"--rules-path",
tmp.path().to_str().unwrap(), // point loader at bad YAML
"--no-validate", // keep the test fast
"--no-update-check", // skip update check to avoid network calls
"--no-update-check", // skip update check to avoid network calls
])
.assert()
.failure()
@ -72,7 +72,7 @@ rules:
tmp.path().to_str().unwrap(), // only the custom rule
"--no-dedup",
"--load-builtins=false", // skip the builtin rules
"--no-update-check", // skip update check to avoid network calls
"--no-update-check", // skip update check to avoid network calls
])
.assert()
.failure() // CLI exits 0

View file

@ -76,6 +76,12 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
request:
method: GET
url: '{base}/validate?token={{ {{ TOKEN }} }}'
response_matcher:
- report_response: true
- type: WordMatch
words:
- '"error_code":"403003"'
negative: true
"#,
base = server.uri()
);

View file

@ -30,7 +30,14 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> {
// ── 1) extraction ENABLED -- secret should be found ─────────────────────────
Command::cargo_bin("kingfisher")?
.args(["scan", tar_gz.to_str().unwrap(), "--confidence=low", "--format", "json", "--no-update-check"])
.args([
"scan",
tar_gz.to_str().unwrap(),
"--confidence=low",
"--format",
"json",
"--no-update-check",
])
.assert()
.code(findings_code)
.stdout(predicates::str::contains(github_pat));