From 951b62d61e2b93e501a55459a5f6581ea400a546 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Mon, 18 Aug 2025 22:56:34 -0700 Subject: [PATCH] - Added rules for clearbit, kickbox, azure container registry, improved Azure Storage key - Grouped JSON and JSONL outputs by rule, restoring matches arrays in reports --- CHANGELOG.md | 4 ++ Cargo.toml | 2 +- data/rules/azure.yml | 58 +++++++++++++++++------- data/rules/azurestorage.yml | 6 +-- data/rules/clearbit.yml | 33 ++++++++++++++ data/rules/intercom.yml | 6 +-- data/rules/kickbox.yml | 32 +++++++++++++ src/reporter.rs | 6 +++ src/reporter/json_format.rs | 33 +++++++++++--- tests/int_rules_no_validated_findings.rs | 25 ++++++---- 10 files changed, 166 insertions(+), 39 deletions(-) create mode 100644 data/rules/clearbit.yml create mode 100644 data/rules/kickbox.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e327e..5973cff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.43.0] +- Added rules for clearbit, kickbox, azure container registry, improved Azure Storage key +- Grouped JSON and JSONL outputs by rule, restoring `matches` arrays in reports + ## [1.42.0] - Fixed pagination issue when calling gitlab api - Expanded directory exclusion handling to interpret plain patterns as prefixes, ensuring options like --exclude .git also skip all nested paths diff --git a/Cargo.toml b/Cargo.toml index 7d79adc..1a8eebe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.42.0" +version = "1.43.0" description = "MongoDB's blazingly fast secret scanning and validation tool" edition.workspace = true rust-version.workspace = true diff --git a/data/rules/azure.yml b/data/rules/azure.yml index 277bea7..a7971ad 100644 --- a/data/rules/azure.yml +++ b/data/rules/azure.yml @@ -27,17 +27,10 @@ rules: "AZURE_STORAGE_CONNECTION_STRING": { "value": "DefaultEndpointsProtocol=https;AccountName=d1biblobstor521;AccountKey=NjEwGHd9+piK+iCi2C2XURWPmeDDjif9UKN1HAszYptL4iQ+yD7/dgjLMZc3VOpURsa53aJ4HZfbVWzL429C5g==;EndpointSuffix=core.windows.net" } - negative_examples: - - 'InstrumentationKey=00000000-0000-0000-0000-000000000000;EndpointSuffix=ai.contoso.com;' - - 'InstrumentationKey=00000000-0000-0000-0000-000000000000;IngestionEndpoint=https://custom.com:111/;LiveEndpoint=https://custom.com:222/;ProfilerEndpoint=https://custom.com:333/;SnapshotEndpoint=https://custom.com:444/;' references: - https://azure.microsoft.com/en-us/blog/windows-azure-web-sites-how-application-strings-and-connection-strings-work/ - https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string - https://learn.microsoft.com/en-us/azure/service-bus-messaging/service-bus-sas#best-practices-when-using-sas - categories: - - api - - fuzzy - - secret - name: Azure App Configuration Connection String id: kingfisher.azure.2 @@ -53,18 +46,10 @@ rules: - 'https://foo-nonprod-appconfig.azconfig.io;Id=ABCD-E6-s0:tl6ABcdefGHi7kLMno/p;Secret=abCD1EF+GHIJxLMnOA53ST8uVWX05zaBCdE/fg9hi4k=' - 'Endpoint=https://appconfig-test01.azconfig.io;Id=09pv-l0-s0:opFCQMC6+9485xJgN5Ws;Secret=GcoEA53t7GLRNJ910M46IrbHO/Vg0tt4HujRdsaCoTY=' - ' private static string appConfigurationConnectionString = "Endpoint=https://appcs-fg-pwc.azconfig.io;Id=pi5x-l9-s0:SZLlhHA53Nz2MpAl04cU;Secret=CQ+mlfQqkzfZv4XA53gigJ/seeXMKwNsqW/rM3wmtuE=";' - negative_examples: - - | - text: - az appconfig feature delete --connection-string Endpoint=https://contoso.azconfig.io;Id=xxx;Secret=xxx --feature color --label MyLabel references: - https://docs.microsoft.com/en-us/azure/azure-app-configuration/ - https://docs.microsoft.com/en-us/azure/azure-app-configuration/howto-best-practices - https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/appconfiguration/azure-appconfiguration/azure/appconfiguration/_utils.py - categories: - - api - - fuzzy - - secret - name: Azure Personal Access Token id: kingfisher.azure.3 @@ -84,4 +69,45 @@ rules: $token = "58oo4mvqr2tpw7b4w3loeckwfu5o6nw3sihfckvlwoxgqimlddza" - | if __name__ == "__main__": - ado_pat = "iyfmob6xjrfmit67anxbot64umfx2clwx7dz5ynxi4q2z3uqegvq" \ No newline at end of file + ado_pat = "iyfmob6xjrfmit67anxbot64umfx2clwx7dz5ynxi4q2z3uqegvq" + - name: Azure Container Registry URL + id: kingfisher.azure.4 + pattern: | + (?xi) + ( + [a-z0-9][a-z0-9-]{1,100}[a-z0-9] + )\.azurecr\.io + confidence: medium + min_entropy: 2.0 + examples: + - "myregistry.azurecr.io" + - name: Azure Container Registry Password + id: kingfisher.azure.5 + pattern: | + (?xi) + \b + ( + [A-Z0-9+/]{42}\+ACR[A-Z0-9]{6} + ) + \b + confidence: medium + min_entropy: 4.0 + validation: + type: Http + content: + request: + method: GET + url: "https://{{ACR_USERNAME}}.azurecr.io/v2/_catalog" + headers: + Authorization: "Basic {{ ACR_USERNAME | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + examples: + - "Abcdefghijklmnopqrstuvwxyz1234567890ABCD+ACRefg123" + depends_on_rule: + - rule_id: "kingfisher.azure.4" + variable: ACR_USERNAME + references: + - https://learn.microsoft.com/en-us/azure/container-registry/container-registry-authentication diff --git a/data/rules/azurestorage.yml b/data/rules/azurestorage.yml index 01dad46..3313d8b 100644 --- a/data/rules/azurestorage.yml +++ b/data/rules/azurestorage.yml @@ -31,9 +31,9 @@ rules: (?xi) \b azure - (?:.|[\n\r]){0,32}? - (?i:(?:Access|Account|Storage)[_.-]?Key) - (?:.|[\n\r]){0,25}? + (?:.|[\n\r]){0,128}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,128}? ( [A-Z0-9+\\/-]{86,88}={0,2} ) diff --git a/data/rules/clearbit.yml b/data/rules/clearbit.yml new file mode 100644 index 0000000..caa9e7a --- /dev/null +++ b/data/rules/clearbit.yml @@ -0,0 +1,33 @@ +rules: + - name: Clearbit API Key + id: kingfisher.clearbit.1 + pattern: | + (?xi) + \b + clearbit + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-z_]{35} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - clearbit_token = tq50141fm92fl4nid9c1c7liouhbertbvg1 + validation: + type: Http + content: + request: + method: GET + url: https://discovery.clearbit.com/v1/companies/entities?name=kingfisher + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: WordMatch + words: + - '"Invalid API key provided"' + negative: true \ No newline at end of file diff --git a/data/rules/intercom.yml b/data/rules/intercom.yml index 01fdf0d..e521759 100644 --- a/data/rules/intercom.yml +++ b/data/rules/intercom.yml @@ -3,10 +3,10 @@ rules: id: kingfisher.intercom.1 pattern: | (?xi) - (?:intercom|ic) - (?:.|[\n\r]){0,16}? + (?:intercom|ic) + (?:.|[\n\r]){0,16}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,16}? + (?:.|[\n\r]){0,16}? ( [0-9A-Z+/]{59}= ) diff --git a/data/rules/kickbox.yml b/data/rules/kickbox.yml new file mode 100644 index 0000000..35a4e38 --- /dev/null +++ b/data/rules/kickbox.yml @@ -0,0 +1,32 @@ +rules: + - name: Kickbox API Key + id: kingfisher.kickbox.1 + pattern: | + (?xi) + \b + kickbox + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Z0-9_]+[A-Z0-9]{64} + ) + \b + min_entropy: 3.5 + confidence: medium + examples: + - kickbox_key=test_abcdefghijklmnopqrstuvwxyzbu9JFVJtII3FINL1rOKcNpveXD4hSMtSDx7opOWd + - kickbox_token=live_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789efgh + validation: + type: Http + content: + request: + method: GET + url: "https://api.kickbox.com/v2/verify?apikey={{ TOKEN }}&email=kingfisher" + response_matcher: + - report_response: true + - type: JsonValid + - type: WordMatch + words: + - '"success":true' diff --git a/src/reporter.rs b/src/reporter.rs index a42e682..0606669 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -517,6 +517,12 @@ pub struct FindingRecordData { pub git_metadata: Option, } +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct RuleMatches { + pub id: String, + pub matches: Vec, +} + impl From for ReportMatch { fn from(e: finding_data::FindingDataEntry) -> Self { ReportMatch { diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 4020cee..f43637e 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -1,4 +1,5 @@ use super::*; +use std::collections::BTreeMap; impl DetailsReporter { pub fn json_format( @@ -8,7 +9,13 @@ impl DetailsReporter { ) -> Result<()> { let records = self.build_finding_records(args)?; if !records.is_empty() { - serde_json::to_writer_pretty(&mut writer, &records)?; + let mut grouped: BTreeMap> = BTreeMap::new(); + for record in records { + grouped.entry(record.rule.id.clone()).or_default().push(record); + } + let groups: Vec = + grouped.into_iter().map(|(id, matches)| RuleMatches { id, matches }).collect(); + serde_json::to_writer_pretty(&mut writer, &groups)?; writeln!(writer)?; } Ok(()) @@ -20,9 +27,16 @@ impl DetailsReporter { args: &cli::commands::scan::ScanArgs, ) -> Result<()> { let records = self.build_finding_records(args)?; - for record in records { - serde_json::to_writer(&mut writer, &record)?; - writeln!(writer)?; + if !records.is_empty() { + let mut grouped: BTreeMap> = BTreeMap::new(); + for record in records { + grouped.entry(record.rule.id.clone()).or_default().push(record); + } + for (id, matches) in grouped { + let group = RuleMatches { id, matches }; + serde_json::to_writer(&mut writer, &group)?; + writeln!(writer)?; + } } Ok(()) } @@ -223,7 +237,10 @@ mod tests { reporter.json_format(&mut output, &create_default_args())?; let json_output: Vec = serde_json::from_slice(&output.into_inner())?; assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; + let first_group = &json_output[0]; + assert_eq!(first_group["id"], "mock_rule_1"); + let matches = first_group["matches"].as_array().unwrap(); + let first = &matches[0]; assert_eq!(first["rule"]["name"], "MockRule"); assert_eq!(first["finding"]["language"], "Rust"); Ok(()) @@ -264,8 +281,10 @@ mod tests { reporter.json_format(&mut output, &create_default_args())?; let json_output: Vec = serde_json::from_slice(&output.into_inner())?; assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; - let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); + let first_group = &json_output[0]; + let first_match = &first_group["matches"][0]; + let validation_status = + first_match["finding"]["validation"]["status"].as_str().unwrap(); assert_eq!(validation_status, expected_status); } Ok(()) diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index 4692c4e..5cc4359 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -41,16 +41,23 @@ fn scan_rules_has_no_validated_findings() -> Result<()> { return Ok(()); } - let findings: Vec = serde_json::from_str(json_array_str)?; + let groups: Vec = serde_json::from_str(json_array_str)?; - for finding in findings { - let rule_id = finding["rule"]["id"].as_str().unwrap_or("unknown"); - - let status = - finding["finding"]["validation"]["status"].as_str().unwrap_or("").to_ascii_lowercase(); - - // Fail only on genuinely validated secrets - assert_ne!(&status, "active credential", "Validated finding detected in rule {rule_id}"); + for group in groups { + let rule_id = group["id"].as_str().unwrap_or("unknown"); + if let Some(matches) = group["matches"].as_array() { + for finding in matches { + let status = finding["finding"]["validation"]["status"] + .as_str() + .unwrap_or("") + .to_ascii_lowercase(); + // Fail only on genuinely validated secrets + assert_ne!( + &status, "active credential", + "Validated finding detected in rule {rule_id}" + ); + } + } } Ok(())