forked from mirrors/kingfisher
commit
22b111fd96
20 changed files with 318 additions and 130 deletions
5
.github/workflows/ci.yml
vendored
5
.github/workflows/ci.yml
vendored
|
|
@ -77,9 +77,10 @@ jobs:
|
|||
C:\vcpkg\downloads
|
||||
C:\vcpkg\archives
|
||||
C:\Users\runneradmin\AppData\Local\vcpkg\archives
|
||||
key: ${{ runner.os }}-vcpkg-hyperscan-542
|
||||
key: vcpkg-${{ runner.os }}-hs-542
|
||||
restore-keys: |
|
||||
${{ runner.os }}-vcpkg-
|
||||
vcpkg-${{ runner.os }}-
|
||||
vcpkg-
|
||||
|
||||
# Ensure downloads dir exists and seed PCRE 8.45 zip from a working mirror
|
||||
- name: Pre-seed PCRE 8.45 for vcpkg (bypass SourceForge redirect)
|
||||
|
|
|
|||
5
.github/workflows/release.yml
vendored
5
.github/workflows/release.yml
vendored
|
|
@ -212,9 +212,10 @@ jobs:
|
|||
C:\vcpkg\downloads
|
||||
C:\vcpkg\archives
|
||||
C:\Users\runneradmin\AppData\Local\vcpkg\archives
|
||||
key: ${{ runner.os }}-vcpkg-hyperscan-542
|
||||
key: vcpkg-${{ runner.os }}-hs-542
|
||||
restore-keys: |
|
||||
${{ runner.os }}-vcpkg-
|
||||
vcpkg-${{ runner.os }}-
|
||||
vcpkg-
|
||||
|
||||
# Ensure downloads dir exists and seed PCRE 8.45 zip from a working mirror
|
||||
- name: Pre-seed PCRE 8.45 for vcpkg (bypass SourceForge redirect)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [v1.63.0]
|
||||
- Fixed bug when retrieving some finding values and injecting them as TOKENS in the rule templates
|
||||
- Improved Datadog rule
|
||||
- Improved AWS rule
|
||||
|
||||
## [v1.62.0]
|
||||
- Added `pattern_requirements` checks to rules, providing lightweight post-regex character-class validation without lookarounds. See docs/RULES.md for detail
|
||||
- Added an `ignore_if_contains` option to `pattern_requirements` to drop matches containing case-insensitive placeholder words, with tests covering the new behavior.
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.62.0"
|
||||
version = "1.63.0"
|
||||
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
rules:
|
||||
- name: Datadog API Key
|
||||
id: kingfisher.datadog.1
|
||||
id: kingfisher.datadog.3
|
||||
pattern: |
|
||||
(?xi)
|
||||
(?xi)
|
||||
\b
|
||||
datadog
|
||||
(?:datadog|dd)
|
||||
(?:.|[\n\r]){0,64}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)?
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
\b
|
||||
(
|
||||
[a-z0-9]{32}
|
||||
[A-Za-z0-9]{32}
|
||||
)
|
||||
\b
|
||||
pattern_requirements:
|
||||
|
|
@ -18,48 +18,96 @@ rules:
|
|||
min_entropy: 3.3
|
||||
confidence: medium
|
||||
examples:
|
||||
- datadog-secrettoken-0024a29224affe29d173c0bf99e5a89d
|
||||
- DD_API_KEY=0024a29224affe29d173c0bf99e5a89d
|
||||
references:
|
||||
- https://docs.datadoghq.com/account_management/api-app-keys/
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: https://api.datadoghq.com/api/v1/validate
|
||||
headers:
|
||||
Accept: application/json
|
||||
DD-API-KEY: '{{ TOKEN }}'
|
||||
DD-APPLICATION-KEY: '{{ APPKEY }}'
|
||||
method: GET
|
||||
DD-API-KEY: "{{ TOKEN }}"
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- status:
|
||||
- 200
|
||||
type: StatusMatch
|
||||
url: https://api.datadoghq.com/api/v2/current_user
|
||||
depends_on_rule:
|
||||
- rule_id: kingfisher.datadog.2
|
||||
variable: APPKEY
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
|
||||
- name: Datadog Application Secret
|
||||
id: kingfisher.datadog.2
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
datadog
|
||||
(?:.|[\n\r]){0,64}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
(?:.|[\n\r]){0,16}?
|
||||
(
|
||||
[a-z0-9]{40}
|
||||
)
|
||||
pattern_requirements:
|
||||
min_digits: 2
|
||||
min_uppercase: 1
|
||||
min_lowercase: 1
|
||||
min_entropy: 3.3
|
||||
confidence: medium
|
||||
examples:
|
||||
- datadog_secret_key-3c0c3965368a6b10f7640dbda46abfdca981c2d3
|
||||
- datadog_token = BzHpkcs7LujMb3Q1vLRRjbpBNxxYV0ousumYoKJS
|
||||
references:
|
||||
- https://docs.datadoghq.com/account_management/api-app-keys/
|
||||
# - name: Datadog API Key
|
||||
# id: kingfisher.datadog.1
|
||||
# pattern: |
|
||||
# (?xi)
|
||||
# \b
|
||||
# datadog
|
||||
# (?:.|[\n\r]){0,64}?
|
||||
# (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
# (?:.|[\n\r]){0,32}?
|
||||
# \b
|
||||
# (
|
||||
# [a-z0-9]{32}
|
||||
# )
|
||||
# \b
|
||||
# pattern_requirements:
|
||||
# min_digits: 2
|
||||
# min_entropy: 3.3
|
||||
# confidence: medium
|
||||
# examples:
|
||||
# - datadog-secrettoken-0024a29224affe29d173c0bf99e5a89d
|
||||
# references:
|
||||
# - https://docs.datadoghq.com/account_management/api-app-keys/
|
||||
# validation:
|
||||
# type: Http
|
||||
# content:
|
||||
# request:
|
||||
# headers:
|
||||
# Accept: application/json
|
||||
# DD-API-KEY: '{{ TOKEN }}'
|
||||
# DD-APPLICATION-KEY: '{{ APPKEY }}'
|
||||
# method: GET
|
||||
# response_matcher:
|
||||
# - report_response: true
|
||||
# - status:
|
||||
# - 200
|
||||
# type: StatusMatch
|
||||
# url: https://api.datadoghq.com/api/v2/current_user
|
||||
# depends_on_rule:
|
||||
# - rule_id: kingfisher.datadog.2
|
||||
# variable: APPKEY
|
||||
|
||||
# - name: Datadog API Key (API-only validation)
|
||||
# id: kingfisher.datadog.3
|
||||
# pattern: |
|
||||
# (?xi)
|
||||
# \b
|
||||
# (?:datadog|dd)
|
||||
# (?:.|[\n\r]){0,64}?
|
||||
# (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)?
|
||||
# (?:.|[\n\r]){0,32}?
|
||||
# \b
|
||||
# (
|
||||
# [A-Za-z0-9]{32}
|
||||
# )
|
||||
# \b
|
||||
# pattern_requirements:
|
||||
# min_digits: 2
|
||||
# min_entropy: 3.3
|
||||
# confidence: medium
|
||||
# examples:
|
||||
# - DD_API_KEY=0024a29224affe29d173c0bf99e5a89d
|
||||
# references:
|
||||
# - https://docs.datadoghq.com/account_management/api-app-keys/
|
||||
# validation:
|
||||
# type: Http
|
||||
# content:
|
||||
# request:
|
||||
# method: GET
|
||||
# url: https://api.datadoghq.com/api/v1/validate
|
||||
# headers:
|
||||
# Accept: application/json
|
||||
# DD-API-KEY: "{{ TOKEN }}"
|
||||
# response_matcher:
|
||||
# - report_response: true
|
||||
# - type: StatusMatch
|
||||
# status: [200]
|
||||
|
|
|
|||
|
|
@ -340,7 +340,7 @@ mod tests {
|
|||
fn smoke_decompress_tar_gz_archive() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let tar_gz = dir.path().join("payload.tar.gz");
|
||||
let github_pat = "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"; // this is not a real secret
|
||||
let github_pat = "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"; // this is not a real secret
|
||||
|
||||
// build payload.tar.gz containing secret.txt
|
||||
{
|
||||
|
|
@ -393,7 +393,7 @@ mod tests {
|
|||
fn smoke_decompress_without_extract_archives() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let tar_gz = dir.path().join("payload.tar.gz");
|
||||
let github_pat = "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";
|
||||
let github_pat = "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";
|
||||
|
||||
// ── build payload.tar.gz containing secret.txt ──────────────────────────────
|
||||
{
|
||||
|
|
|
|||
30
src/main.rs
30
src/main.rs
|
|
@ -5,27 +5,27 @@
|
|||
// * Fallback - system allocator (`system-alloc` feature)
|
||||
// ────────────────────────────────────────────────────────────
|
||||
|
||||
// --- jemalloc (opt-in) ---
|
||||
#[cfg(feature = "use-jemalloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
// // --- jemalloc (opt-in) ---
|
||||
// #[cfg(feature = "use-jemalloc")]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
|
||||
|
||||
// --- mimalloc (default) ---
|
||||
#[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
|
||||
#[global_allocator]
|
||||
static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// --- system allocator (explicit opt-out) ---
|
||||
#[cfg(feature = "system-alloc")]
|
||||
use std::alloc::System;
|
||||
#[cfg(feature = "system-alloc")]
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
// // --- mimalloc (default) ---
|
||||
// #[cfg(all(not(feature = "use-jemalloc"), not(feature = "system-alloc")))]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
|
||||
// // --- system allocator (explicit opt-out) ---
|
||||
// #[cfg(feature = "system-alloc")]
|
||||
// use std::alloc::System;
|
||||
// #[cfg(feature = "system-alloc")]
|
||||
// #[global_allocator]
|
||||
// static GLOBAL: System = System;
|
||||
|
||||
use std::alloc::System;
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
|
||||
use std::{
|
||||
io::{IsTerminal, Read},
|
||||
sync::{Arc, Mutex},
|
||||
|
|
|
|||
121
src/matcher.rs
121
src/matcher.rs
|
|
@ -374,7 +374,9 @@ impl<'a> Matcher<'a> {
|
|||
} else {
|
||||
None
|
||||
};
|
||||
/////////////////////////////
|
||||
// Process matches
|
||||
/////////////////////////////
|
||||
let mut matches = Vec::new();
|
||||
let owned_ts_results = tree_sitter_result.map(|ts_results| {
|
||||
ts_results
|
||||
|
|
@ -565,9 +567,11 @@ fn record_match(
|
|||
) -> bool {
|
||||
insert_span(map.entry(rule_id).or_default(), span)
|
||||
}
|
||||
// in src/matcher.rs
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn filter_match<'b>(
|
||||
blob: &'b Blob,
|
||||
// rule: &'b Rule,
|
||||
rule: Arc<Rule>,
|
||||
re: &Regex,
|
||||
start: usize,
|
||||
|
|
@ -596,14 +600,44 @@ fn filter_match<'b>(
|
|||
|
||||
for captures in re.captures_iter(haystack) {
|
||||
let full_capture = captures.get(0).unwrap();
|
||||
let matching_input = captures.get(1).unwrap_or(full_capture);
|
||||
|
||||
// --- LOGIC TO FIND THE "SECRET" FOR ENTROPY/SAFE-LISTING ---
|
||||
let matching_input_for_entropy = 'block: {
|
||||
// 1. Prefer a named capture called TOKEN (case-insensitive).
|
||||
if let Some(token_cap) = re.capture_names().enumerate().find_map(|(i, name_opt)| {
|
||||
name_opt
|
||||
.filter(|name| name.eq_ignore_ascii_case("TOKEN"))
|
||||
.and_then(|_| captures.get(i))
|
||||
}) {
|
||||
break 'block token_cap;
|
||||
}
|
||||
|
||||
// 2. Otherwise, prefer the first *matched* named capture.
|
||||
if let Some(named_cap) = re.capture_names().enumerate().find_map(|(i, name_opt)| {
|
||||
name_opt.and_then(|_| captures.get(i)) // find(i > 0 && name_opt.is_some())
|
||||
}) {
|
||||
break 'block named_cap;
|
||||
}
|
||||
|
||||
// 3. Otherwise, fall back to the first positional capture (group 1).
|
||||
if let Some(pos_cap) = captures.get(1) {
|
||||
break 'block pos_cap;
|
||||
}
|
||||
|
||||
// 4. Finally, fall back to the full match (group 0).
|
||||
break 'block full_capture;
|
||||
};
|
||||
// --- END LOGIC ---
|
||||
|
||||
let min_entropy = rule.min_entropy();
|
||||
let mi_bytes = matching_input.as_bytes();
|
||||
let entropy_bytes = matching_input_for_entropy.as_bytes();
|
||||
let full_bytes = full_capture.as_bytes();
|
||||
let calculated_entropy = calculate_shannon_entropy(mi_bytes);
|
||||
let calculated_entropy = calculate_shannon_entropy(entropy_bytes);
|
||||
|
||||
// Check entropy and safe-listing against the *selected* secret bytes
|
||||
if calculated_entropy <= min_entropy
|
||||
|| is_safe_match(mi_bytes)
|
||||
|| is_user_match(mi_bytes, full_bytes)
|
||||
|| is_safe_match(entropy_bytes)
|
||||
|| is_user_match(entropy_bytes, full_bytes)
|
||||
{
|
||||
debug!(
|
||||
"Skipping match with entropy {} <= {} or safe match",
|
||||
|
|
@ -619,7 +653,15 @@ fn filter_match<'b>(
|
|||
captures: &captures,
|
||||
full_match: full_bytes,
|
||||
};
|
||||
match char_reqs.validate(mi_bytes, Some(context), respect_ignore_if_contains) {
|
||||
|
||||
// --- FIX IS HERE ---
|
||||
//
|
||||
// The `validate` function (and thus `{{ MATCH }}`) should *always*
|
||||
// operate on the *full match* (group 0), not just the entropy bytes.
|
||||
// This aligns the scan logic with the unit test's logic.
|
||||
match char_reqs.validate(full_bytes, Some(context), respect_ignore_if_contains) {
|
||||
//
|
||||
// --- END FIX ---
|
||||
PatternValidationResult::Passed => {}
|
||||
PatternValidationResult::Failed => {
|
||||
debug!(
|
||||
|
|
@ -647,6 +689,9 @@ fn filter_match<'b>(
|
|||
}
|
||||
}
|
||||
|
||||
// Use the `matching_input_for_entropy` as the span/key for the finding.
|
||||
let matching_input = matching_input_for_entropy;
|
||||
|
||||
let matching_input_offset_span = OffsetSpan::from_range(
|
||||
(start + matching_input.start())..(start + matching_input.end()),
|
||||
);
|
||||
|
|
@ -668,7 +713,10 @@ fn filter_match<'b>(
|
|||
}
|
||||
let only_matching_input =
|
||||
&blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end];
|
||||
|
||||
// Pass the *full* capture object to from_captures
|
||||
let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact);
|
||||
|
||||
matches.push(BlobMatch {
|
||||
rule: Arc::clone(&rule),
|
||||
blob_id: blob.id_ref(),
|
||||
|
|
@ -687,6 +735,7 @@ fn filter_match<'b>(
|
|||
t.end(new_count > 0, new_count, 0);
|
||||
}
|
||||
}
|
||||
|
||||
fn get_language_and_queries(lang: &str) -> Option<(Language, FxHashMap<String, String>)> {
|
||||
match lang.to_lowercase().as_str() {
|
||||
"bash" | "shell" => Some((Language::Bash, parser::queries::bash::get_bash_queries())),
|
||||
|
|
@ -796,6 +845,7 @@ pub struct SerializableCaptures {
|
|||
#[schemars(with = "Vec<SerializableCapture>")]
|
||||
pub captures: SmallVec<[SerializableCapture; 2]>, // All captures (named and unnamed)
|
||||
}
|
||||
|
||||
impl SerializableCaptures {
|
||||
pub fn from_captures(
|
||||
captures: ®ex::bytes::Captures,
|
||||
|
|
@ -808,26 +858,51 @@ impl SerializableCaptures {
|
|||
let capture_names: SmallVec<[Option<String>; 4]> =
|
||||
re.capture_names().map(|name| name.map(str::to_string)).collect();
|
||||
|
||||
for i in 0..captures.len() {
|
||||
if let Some(cap) = captures.get(i) {
|
||||
// If there are explicit capture groups (e.g., group 1, 2, ...),
|
||||
// only serialize those.
|
||||
if captures.len() > 1 {
|
||||
for i in 1..captures.len() {
|
||||
// Start from 1
|
||||
if let Some(cap) = captures.get(i) {
|
||||
let value = if redact {
|
||||
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
|
||||
} else {
|
||||
String::from_utf8_lossy(cap.as_bytes()).to_string()
|
||||
};
|
||||
let interned = intern(&value);
|
||||
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
|
||||
|
||||
serialized_captures.push(SerializableCapture {
|
||||
name,
|
||||
match_number: i32::try_from(i).unwrap_or(0),
|
||||
start: cap.start(),
|
||||
end: cap.end(),
|
||||
value: interned,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if captures.len() == 1 {
|
||||
// ELSE, if there is ONLY the full match (len == 1),
|
||||
// serialize just that full match (group 0) as the fallback.
|
||||
if let Some(cap) = captures.get(0) {
|
||||
let value = if redact {
|
||||
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
|
||||
} else {
|
||||
String::from_utf8_lossy(cap.as_bytes()).to_string()
|
||||
};
|
||||
let interned = intern(&value);
|
||||
|
||||
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
|
||||
let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned();
|
||||
|
||||
serialized_captures.push(SerializableCapture {
|
||||
name,
|
||||
match_number: i32::try_from(i).unwrap_or(0),
|
||||
match_number: 0,
|
||||
start: cap.start(),
|
||||
end: cap.end(),
|
||||
value: interned,
|
||||
});
|
||||
}
|
||||
}
|
||||
// If len == 0 (no match), loop is skipped, empty vec is returned.
|
||||
|
||||
SerializableCaptures { captures: serialized_captures }
|
||||
}
|
||||
|
|
@ -950,7 +1025,8 @@ pub struct DecodedData {
|
|||
}
|
||||
#[inline]
|
||||
fn is_base64_byte(b: u8) -> bool {
|
||||
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/')
|
||||
// Include URL-safe characters '-' and '_'
|
||||
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'-' | b'_')
|
||||
}
|
||||
|
||||
pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
|
||||
|
|
@ -975,7 +1051,14 @@ pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
|
|||
let len = end - start;
|
||||
if len >= 32 && len % 4 == 0 {
|
||||
let base64_slice = &input[start..end];
|
||||
if let Ok(decoded) = general_purpose::STANDARD.decode(base64_slice) {
|
||||
|
||||
// Try decoding with STANDARD, then URL_SAFE, then URL_SAFE_NO_PAD
|
||||
let decode_result = general_purpose::STANDARD
|
||||
.decode(base64_slice)
|
||||
.or_else(|_| general_purpose::URL_SAFE.decode(base64_slice))
|
||||
.or_else(|_| general_purpose::URL_SAFE_NO_PAD.decode(base64_slice));
|
||||
|
||||
if let Ok(decoded) = decode_result {
|
||||
if let Ok(decoded_str) = std::str::from_utf8(&decoded) {
|
||||
if decoded_str.is_ascii() {
|
||||
results.push(DecodedData {
|
||||
|
|
@ -1521,10 +1604,10 @@ line2
|
|||
.map(|cap| (cap.name.as_deref(), cap.match_number, cap.value))
|
||||
.collect();
|
||||
|
||||
assert_eq!(entries.len(), 4);
|
||||
assert_eq!(entries[0], (None, 0, "ghp_ABC12"));
|
||||
assert_eq!(entries[1], (None, 1, "ghp_ABC12"));
|
||||
assert_eq!(entries[2], (Some("body"), 2, "ABC"));
|
||||
assert_eq!(entries[3], (Some("checksum"), 3, "12"));
|
||||
assert_eq!(entries.len(), 3);
|
||||
|
||||
assert_eq!(entries[0], (None, 1, "ghp_ABC12"));
|
||||
assert_eq!(entries[1], (Some("body"), 2, "ABC"));
|
||||
assert_eq!(entries[2], (Some("checksum"), 3, "12"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -417,15 +417,19 @@ impl DetailsReporter {
|
|||
let source_span = &rm.m.location.source_span;
|
||||
let line_num = source_span.start.line;
|
||||
|
||||
// --- FIX IS HERE ---
|
||||
// We now correctly serialize *only* the explicit capture groups (or group 0
|
||||
// as a fallback). The primary "secret" is therefore always at index 0
|
||||
// of the captures SmallVec.
|
||||
let snippet = Escaped(
|
||||
rm.m.groups
|
||||
.captures
|
||||
.get(1)
|
||||
.or_else(|| rm.m.groups.captures.get(0))
|
||||
.get(0) // Get the first (and primary) serialized capture
|
||||
.map(|capture| capture.value.as_bytes())
|
||||
.unwrap_or_default(),
|
||||
)
|
||||
.to_string();
|
||||
// --- END FIX ---
|
||||
|
||||
let validation_status = if rm.validation_success {
|
||||
"Active Credential".to_string()
|
||||
|
|
|
|||
|
|
@ -731,7 +731,7 @@ mod tests {
|
|||
}),
|
||||
};
|
||||
|
||||
let token = b"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";
|
||||
let token = b"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";
|
||||
let regex =
|
||||
BytesRegex::new(r"(?x) ghp_(?P<body>[A-Za-z0-9]{30})(?P<checksum>[A-Za-z0-9]{6})")
|
||||
.unwrap();
|
||||
|
|
|
|||
|
|
@ -321,6 +321,10 @@ async fn timed_validate_single_match<'a>(
|
|||
for dep in m.rule.syntax().depends_on_rule.iter().flatten() {
|
||||
if let Some(vals) = dependent_variables.get(&dep.variable.to_uppercase()) {
|
||||
for (val, span) in vals {
|
||||
// Skip adding captured values for TOKEN dependencies
|
||||
if dep.variable.eq_ignore_ascii_case("TOKEN") {
|
||||
continue;
|
||||
}
|
||||
captured_values.push((
|
||||
dep.variable.to_uppercase(),
|
||||
val.clone(),
|
||||
|
|
|
|||
|
|
@ -187,9 +187,7 @@ pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) ->
|
|||
if !access_key_id.chars().all(|c| c.is_ascii_alphanumeric()) {
|
||||
return Err("AWS access key ID contains invalid characters".to_string());
|
||||
}
|
||||
if !secret_key.chars().all(|c| c.is_ascii_alphanumeric() || c == '/' || c == '+') {
|
||||
return Err("AWS secret key contains invalid characters".to_string());
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,22 +3,46 @@ use tokio::net::lookup_host;
|
|||
|
||||
use crate::validation::SerializableCaptures;
|
||||
|
||||
/// Return (NAME, value, start, end) for every capture we care about.
|
||||
/// Return (NAME, value, start, end) for the captures we care about.
|
||||
///
|
||||
/// * If a capture has a name, use that (upper-cased)
|
||||
/// * If it’s unnamed, fall back to `"TOKEN"`
|
||||
/// * Named captures keep their (upper-cased) name
|
||||
/// * Among unnamed captures, keep **only the first one** and call it "TOKEN"
|
||||
pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
|
||||
let mut saw_unnamed = false;
|
||||
|
||||
captures
|
||||
.captures
|
||||
.iter()
|
||||
.map(|cap| {
|
||||
let name =
|
||||
cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
|
||||
(name, cap.value.to_string(), cap.start, cap.end)
|
||||
.filter_map(|cap| {
|
||||
if let Some(name) = &cap.name {
|
||||
Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end))
|
||||
} else if !saw_unnamed {
|
||||
saw_unnamed = true;
|
||||
Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end))
|
||||
} else {
|
||||
// Ignore any additional unnamed captures (e.g., from unintended groups)
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
// /// Return (NAME, value, start, end) for every capture we care about.
|
||||
// ///
|
||||
// /// * If a capture has a name, use that (upper-cased)
|
||||
// /// * If it’s unnamed, fall back to `"TOKEN"`
|
||||
// pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> {
|
||||
// captures
|
||||
// .captures
|
||||
// .iter()
|
||||
// .map(|cap| {
|
||||
// let name =
|
||||
// cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string());
|
||||
// (name, cap.value.to_string(), cap.start, cap.end)
|
||||
// })
|
||||
// .collect()
|
||||
// }
|
||||
|
||||
pub fn find_closest_variable(
|
||||
captures: &[(String, String, usize, usize)],
|
||||
target_value: &String,
|
||||
|
|
@ -108,7 +132,7 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box<dyn std::error::E
|
|||
// -----------------------------------------------------------------------------
|
||||
// tests
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
//
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -121,7 +145,7 @@ mod tests {
|
|||
let captures = SerializableCaptures {
|
||||
captures: smallvec![SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
match_number: 0, // This test is for a rule with *no* explicit captures
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "abc",
|
||||
|
|
@ -130,21 +154,26 @@ mod tests {
|
|||
let result = process_captures(&captures);
|
||||
assert_eq!(result, vec![("TOKEN".to_string(), "abc".to_string(), 1usize, 4usize)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_whole_match_when_multiple() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: smallvec![
|
||||
// --- FIX ---
|
||||
// This test simulated a regex like `(abc)de(?P<foo>bcd)`.
|
||||
// With our fix, group 0 ("abcde") is NOT serialized.
|
||||
// We only get the explicit captures (group 1 and "foo").
|
||||
SerializableCapture {
|
||||
// This is group 1 (unnamed)
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 5,
|
||||
value: "abcde",
|
||||
match_number: 1, // Corrected match_number
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "bcd",
|
||||
},
|
||||
SerializableCapture {
|
||||
// This is group 2 (named "foo")
|
||||
name: Some("foo".to_string()),
|
||||
match_number: -1,
|
||||
match_number: 2, // Corrected match_number
|
||||
start: 1,
|
||||
end: 4,
|
||||
value: "bcd",
|
||||
|
|
@ -152,45 +181,60 @@ mod tests {
|
|||
],
|
||||
};
|
||||
let result = process_captures(&captures);
|
||||
|
||||
// --- FIX ---
|
||||
// The expected result now only contains the explicit captures.
|
||||
// The first unnamed capture ("bcd") becomes "TOKEN".
|
||||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
("TOKEN".to_string(), "abcde".to_string(), 0usize, 5usize),
|
||||
("TOKEN".to_string(), "bcd".to_string(), 1usize, 4usize),
|
||||
("FOO".to_string(), "bcd".to_string(), 1usize, 4usize),
|
||||
]
|
||||
);
|
||||
// --- END FIX ---
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[test]
|
||||
fn includes_whole_match_and_unnamed_groups() {
|
||||
let captures = SerializableCaptures {
|
||||
captures: smallvec![
|
||||
// --- FIX ---
|
||||
// This test simulated a regex like `(?P<foo>aa)bb(cc)`.
|
||||
// With our fix, group 0 ("aabbcc") is NOT serialized.
|
||||
// We only get the explicit captures ("foo" and group 2).
|
||||
SerializableCapture {
|
||||
name: None,
|
||||
match_number: 0,
|
||||
start: 0,
|
||||
end: 6,
|
||||
value: "aabbcc",
|
||||
},
|
||||
SerializableCapture {
|
||||
// This is group 1 (named "foo")
|
||||
name: Some("foo".to_string()),
|
||||
match_number: -1,
|
||||
match_number: 1, // Corrected match_number
|
||||
start: 0,
|
||||
end: 2,
|
||||
value: "aa",
|
||||
},
|
||||
SerializableCapture { name: None, match_number: 1, start: 4, end: 6, value: "cc" },
|
||||
SerializableCapture {
|
||||
// This is group 2 (unnamed)
|
||||
name: None,
|
||||
match_number: 2, // Corrected match_number
|
||||
start: 4,
|
||||
end: 6,
|
||||
value: "cc"
|
||||
},
|
||||
],
|
||||
};
|
||||
let result = process_captures(&captures);
|
||||
|
||||
// --- FIX ---
|
||||
// The expected result no longer contains the full match ("aabbcc").
|
||||
// The first (and only) unnamed capture ("cc") is now correctly labeled "TOKEN".
|
||||
assert_eq!(
|
||||
result,
|
||||
vec![
|
||||
("TOKEN".to_string(), "aabbcc".to_string(), 0usize, 6usize),
|
||||
("FOO".to_string(), "aa".to_string(), 0usize, 2usize),
|
||||
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize),
|
||||
("FOO".to_string(), "aa".to_string(), 0usize, 2usize), // From named group 1
|
||||
("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize), // From unnamed group 2
|
||||
]
|
||||
);
|
||||
// --- END FIX ---
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@ use tempfile::tempdir;
|
|||
fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.txt");
|
||||
// Base64 for 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
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw==";
|
||||
// Base64 for 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
|
||||
let encoded = "Z2hwX0Vab3BaRE1XZWlsZGZvRnp5SDBLbld5UTVZeTN2eTBZMlNVNg==";
|
||||
fs::write(&file_path, encoded)?;
|
||||
|
||||
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
|
||||
|
|
@ -26,7 +26,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
|||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
predicate::str::contains("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")
|
||||
predicate::str::contains("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")
|
||||
.and(predicate::str::contains("\"encoding\": \"base64\"")),
|
||||
);
|
||||
|
||||
|
|
@ -39,7 +39,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
|||
fn skips_base64_when_disabled() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.txt");
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw==";
|
||||
let encoded = "Z2hwX0Vab3BaRE1XZWlsZGZvRnp5SDBLbld5UTVZeTN2eTBZMlNVNg==";
|
||||
fs::write(&file_path, encoded)?;
|
||||
|
||||
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
|
||||
|
|
@ -92,8 +92,8 @@ fn no_base64_skips_empty_files() -> anyhow::Result<()> {
|
|||
fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.py");
|
||||
// Base64 for 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
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw==";
|
||||
// Base64 for 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
|
||||
let encoded = "Z2hwX0Vab3BaRE1XZWlsZGZvRnp5SDBLbld5UTVZeTN2eTBZMlNVNg==";
|
||||
fs::write(&file_path, format!("token = \"{}\"\n", encoded))?;
|
||||
|
||||
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
|
||||
|
|
@ -110,7 +110,7 @@ fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> {
|
|||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
predicate::str::contains("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")
|
||||
predicate::str::contains("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")
|
||||
.and(predicate::str::contains("\"encoding\": \"base64\"")),
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -159,7 +159,7 @@ async fn test_scan_slack_messages() -> Result<()> {
|
|||
"messages": {
|
||||
"matches": [{
|
||||
"permalink": "https://example.slack.com/archives/C123/p1234",
|
||||
"text": "This contains a github token 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",
|
||||
"text": "This contains a github token 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",
|
||||
"ts": "1234.56",
|
||||
"channel": {"id": "C123", "name": "general"}
|
||||
}],
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> {
|
|||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let tar_gz = dir.path().join("payload.tar.gz");
|
||||
let github_pat = "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";
|
||||
let github_pat = "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";
|
||||
|
||||
// --- build a payload.tar.gz -------------------------------------------------
|
||||
{
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use clap::Parser;
|
|||
use predicates::prelude::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
const GH_PAT: &str = "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";
|
||||
const GH_PAT: &str = "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";
|
||||
|
||||
#[test]
|
||||
fn manage_baseline_enables_no_dedup() -> anyhow::Result<()> {
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use assert_cmd::Command;
|
|||
use predicates::prelude::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
const SECRET: &str = "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";
|
||||
const SECRET: &str = "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";
|
||||
|
||||
#[test]
|
||||
fn exclude_pattern_hides_matches() -> anyhow::Result<()> {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use assert_cmd::prelude::*;
|
|||
use predicates::prelude::*;
|
||||
use tempfile::tempdir;
|
||||
|
||||
const GITHUB_PAT: &str = "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";
|
||||
const GITHUB_PAT: &str = "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";
|
||||
|
||||
#[test]
|
||||
fn smoke_scan_filesystem_text_and_binary() -> anyhow::Result<()> {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> {
|
|||
|
||||
// commit v1
|
||||
let file_path = repo_dir.join("config.yml");
|
||||
fs::write(&file_path, b"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")?;
|
||||
fs::write(&file_path, b"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")?;
|
||||
let mut idx = repo.index()?;
|
||||
idx.add_path(std::path::Path::new("config.yml"))?;
|
||||
let oid1 = idx.write_tree()?;
|
||||
|
|
@ -23,7 +23,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> {
|
|||
repo.commit(Some("HEAD"), &sig, &sig, "init", &tree1, &[])?;
|
||||
|
||||
// commit v2 (same leak, will test dedup)
|
||||
fs::write(&file_path, b"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 # unchanged")?;
|
||||
fs::write(&file_path, b"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 # unchanged")?;
|
||||
idx.add_path(std::path::Path::new("config.yml"))?;
|
||||
let oid2 = idx.write_tree()?;
|
||||
let tree2 = repo.find_tree(oid2)?;
|
||||
|
|
@ -44,7 +44,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> {
|
|||
])
|
||||
.assert()
|
||||
.code(200) // ← kingfisher’s “findings present” status
|
||||
.stdout(predicate::str::contains("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"));
|
||||
.stdout(predicate::str::contains("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"));
|
||||
|
||||
dir.close()?;
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue