updated for v1.61.0

This commit is contained in:
Mick Grove 2025-10-30 22:50:41 -07:00
commit ca3f175427
5 changed files with 208 additions and 27 deletions

View file

@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
- Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans.
- Created Linux and Windows specific installer script
- Updated diff-focused scanning so `--branch-root-commit` can be provided alongside `--branch`, letting you diff from a chosen commit while targeting a specific branch tip (still defaulting back to the `--branch` ref when the commit is omitted).
- Updated rules
## [v1.60.0]
- Removed the `--bitbucket-username`, `--bitbucket-token`, and `--bitbucket-oauth-token` flags in favour of `KF_BITBUCKET_*` environment variables when authenticating to Bitbucket.

View file

@ -24,8 +24,6 @@ rules:
- AccountName=mystorageaccount
- mystorageaccount.blob.core.windows.net
- azure_storage_name="prodblob2024"
- name: Azure Storage Account Key
id: kingfisher.azurestorage.2
pattern: |
@ -36,7 +34,7 @@ rules:
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,128}?
(
[A-Za-z0-9+/]{86,88}={0,2}
[A-Z0-9+\\/-]{86,88}={0,2}
)
min_entropy: 4.0
confidence: medium
@ -46,4 +44,4 @@ rules:
type: AzureStorage
depends_on_rule:
- rule_id: kingfisher.azurestorage.1
variable: AZURENAME
variable: AZURENAME

View file

@ -3,12 +3,11 @@ rules:
id: kingfisher.gitlab.1
pattern: |
(?xi)
\b
(
\b
(
glpat-
[0-9A-Z_-]{20}
)
(?:\b|$)
)
min_entropy: 3.5
confidence: medium
examples:
@ -114,4 +113,32 @@ rules:
- '"token is missing"'
- '"403 Forbidden"'
negative: true
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }}
- name: GitLab Private Token - Updated Format
id: kingfisher.gitlab.4
pattern: |
(?x)
\b
(
glpat-[A-Za-z0-9_-]{36,38}\.01\.[a-z0-9]{9}
)
min_entropy: 3.5
confidence: medium
examples:
- glpat-5m8CwMZi4bwlRSCKzG0-3W86MQp1OmV5Y2UK.01.1012mzo24
references:
- https://github.com/diffblue/gitlab/blob/39c63ee83369bf5353256a6b95f3116728edd102/doc/api/personal_access_tokens.md
- https://docs.gitlab.com/api/personal_access_tokens/
validation:
type: Http
content:
request:
headers:
PRIVATE-TOKEN: '{{ TOKEN }}'
method: GET
response_matcher:
- report_response: true
- type: WordMatch
words:
- '"id"'
url: https://gitlab.com/api/v4/personal_access_tokens/self

View file

@ -10,7 +10,7 @@ use chrono::Local;
use serde::{Deserialize, Serialize};
use tracing::debug;
use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint};
use crate::findings_store::FindingsStore;
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BaselineFile {
@ -53,20 +53,6 @@ fn normalize_path(p: &Path, roots: &[PathBuf]) -> String {
p.to_string_lossy().replace('\\', "/")
}
fn compute_hash(secret: &str, path: &str) -> String {
let fp = compute_finding_fingerprint(secret, path, 0, 0);
format!("{:016x}", fp)
}
fn extract_secret(m: &crate::matcher::Match) -> String {
m.groups
.captures
.get(1)
.or_else(|| m.groups.captures.get(0))
.map(|c| c.value.to_string())
.unwrap_or_default()
}
pub fn apply_baseline(
store: &mut FindingsStore,
baseline_path: &Path,
@ -87,10 +73,10 @@ pub fn apply_baseline(
for arc_msg in store.get_matches_mut() {
let (origin, _blob, m) = Arc::make_mut(arc_msg);
let file_path = origin.iter().filter_map(|o| o.full_path()).next();
let hash = format!("{:016x}", m.finding_fingerprint);
if let Some(fp) = file_path {
let normalized = normalize_path(&fp, roots);
let secret = extract_secret(m);
let hash = compute_hash(&secret, &normalized);
if known.contains(&hash) {
debug!("Skipping {} due to baseline (hash {})", normalized, hash);
m.visible = false;
@ -108,6 +94,11 @@ pub fn apply_baseline(
};
new_entries.push(entry);
}
} else if known.contains(&hash) {
m.visible = false;
if manage {
encountered.insert(hash.clone());
}
}
}
if manage {
@ -127,3 +118,136 @@ pub fn apply_baseline(
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
blob::{BlobId, BlobMetadata},
location::{Location, OffsetSpan, SourcePoint, SourceSpan},
matcher::{Match, SerializableCapture, SerializableCaptures},
origin::{Origin, OriginSet},
rules::rule::{Confidence, Rule, RuleSyntax},
};
use anyhow::Result;
use smallvec::SmallVec;
use std::{path::Path, sync::Arc};
use tempfile::TempDir;
fn test_rule() -> Arc<Rule> {
Arc::new(Rule::new(RuleSyntax {
name: "test".to_string(),
id: "test.rule".to_string(),
pattern: "test".to_string(),
min_entropy: 0.0,
confidence: Confidence::Low,
visible: true,
examples: vec![],
negative_examples: vec![],
references: vec![],
validation: None,
depends_on_rule: vec![],
}))
}
fn empty_captures() -> SerializableCaptures {
SerializableCaptures { captures: SmallVec::<[SerializableCapture; 2]>::new() }
}
fn make_store_with_match(fingerprint: u64, file_path: &Path) -> FindingsStore {
let mut store = FindingsStore::new(PathBuf::from("."));
let rule = test_rule();
let match_item = Match {
location: Location {
offset_span: OffsetSpan { start: 0, end: 1 },
source_span: SourceSpan {
start: SourcePoint { line: 1, column: 0 },
end: SourcePoint { line: 1, column: 1 },
},
},
groups: empty_captures(),
blob_id: BlobId::default(),
finding_fingerprint: fingerprint,
rule: Arc::clone(&rule),
validation_response_body: String::new(),
validation_response_status: 0,
validation_success: false,
calculated_entropy: 0.0,
visible: true,
is_base64: false,
};
let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf()));
let blob_meta = Arc::new(BlobMetadata {
id: BlobId::default(),
num_bytes: 0,
mime_essence: None,
language: None,
});
let entry = Arc::new((Arc::new(origin), blob_meta, match_item));
store.get_matches_mut().push(entry);
store
}
fn expected_relative_path(root: &Path, file: &Path) -> String {
let mut expected = PathBuf::from(root.file_name().unwrap());
if let Ok(stripped) = file.strip_prefix(root) {
expected = expected.join(stripped);
}
expected.to_string_lossy().replace('\\', "/")
}
#[test]
fn apply_baseline_filters_existing_fingerprints() -> Result<()> {
let tmp = TempDir::new()?;
let roots = [tmp.path().to_path_buf()];
let secret_file = tmp.path().join("secret.txt");
fs::write(&secret_file, "dummy")?;
let baseline_path = tmp.path().join("baseline.yaml");
let fingerprint = 0x1234_u64;
let mut store = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut store, &baseline_path, true, &roots)?;
let baseline = load_baseline(&baseline_path)?;
assert_eq!(baseline.exact_findings.matches.len(), 1);
let entry = &baseline.exact_findings.matches[0];
assert_eq!(entry.fingerprint, format!("{:016x}", fingerprint));
assert_eq!(entry.filepath, expected_relative_path(roots[0].as_path(), &secret_file));
let (_, _, recorded) = store.get_matches()[0].as_ref();
assert!(recorded.visible);
let mut follow_up = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut follow_up, &baseline_path, false, &roots)?;
let (_, _, filtered) = follow_up.get_matches()[0].as_ref();
assert!(!filtered.visible);
Ok(())
}
#[test]
fn managing_baseline_is_idempotent() -> Result<()> {
let tmp = TempDir::new()?;
let roots = [tmp.path().to_path_buf()];
let secret_file = tmp.path().join("secret.txt");
fs::write(&secret_file, "dummy")?;
let baseline_path = tmp.path().join("baseline.yaml");
let fingerprint = 0xfeed_beef_dade_f00d_u64;
let mut initial = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut initial, &baseline_path, true, &roots)?;
let baseline_before = fs::read_to_string(&baseline_path)?;
let mut rerun = make_store_with_match(fingerprint, &secret_file);
apply_baseline(&mut rerun, &baseline_path, true, &roots)?;
let baseline_after = fs::read_to_string(&baseline_path)?;
assert_eq!(baseline_before, baseline_after);
let (_, _, suppressed) = rerun.get_matches()[0].as_ref();
assert!(!suppressed.visible);
Ok(())
}
}

View file

@ -26,6 +26,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
"--manage-baseline",
"--baseline-file",
baseline.to_str().unwrap(),
"--git-history=none",
"--no-update-check",
])
.assert()
@ -34,7 +35,10 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
assert!(baseline.exists(), "baseline file created");
// Scan again using the baseline
let initial_baseline = fs::read_to_string(&baseline)?;
// Scanning with the baseline should suppress the existing finding and leave
// the baseline untouched.
Command::cargo_bin("kingfisher")?
.args([
"scan",
@ -46,12 +50,39 @@ fn baseline_create_and_filter() -> anyhow::Result<()> {
"json",
"--baseline-file",
baseline.to_str().unwrap(),
"--git-history=none",
"--no-update-check",
])
.assert()
.code(0)
.stdout(predicate::str::contains(GH_PAT).not());
let baseline_after_scan = fs::read_to_string(&baseline)?;
assert_eq!(initial_baseline, baseline_after_scan, "baseline remains stable after reuse");
// Managing the baseline again should not churn entries or report the secret
Command::cargo_bin("kingfisher")?
.args([
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--confidence=low",
"--no-validate",
"--format",
"json",
"--manage-baseline",
"--baseline-file",
baseline.to_str().unwrap(),
"--git-history=none",
"--no-update-check",
])
.assert()
.code(0)
.stdout(predicate::str::contains(GH_PAT).not());
let rerun_baseline = fs::read_to_string(&baseline)?;
assert_eq!(initial_baseline, rerun_baseline, "baseline remains stable");
Ok(())
}