diff --git a/CHANGELOG.md b/CHANGELOG.md index fbe4eaf..7e23102 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file. - Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans. - Created Linux and Windows specific installer script - Updated diff-focused scanning so `--branch-root-commit` can be provided alongside `--branch`, letting you diff from a chosen commit while targeting a specific branch tip (still defaulting back to the `--branch` ref when the commit is omitted). +- Updated rules ## [v1.60.0] - Removed the `--bitbucket-username`, `--bitbucket-token`, and `--bitbucket-oauth-token` flags in favour of `KF_BITBUCKET_*` environment variables when authenticating to Bitbucket. diff --git a/data/rules/azurestorage.yml b/data/rules/azurestorage.yml index 8445dbb..aea15a9 100644 --- a/data/rules/azurestorage.yml +++ b/data/rules/azurestorage.yml @@ -24,8 +24,6 @@ rules: - AccountName=mystorageaccount - mystorageaccount.blob.core.windows.net - azure_storage_name="prodblob2024" - - - name: Azure Storage Account Key id: kingfisher.azurestorage.2 pattern: | @@ -36,7 +34,7 @@ rules: (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,128}? ( - [A-Za-z0-9+/]{86,88}={0,2} + [A-Z0-9+\\/-]{86,88}={0,2} ) min_entropy: 4.0 confidence: medium @@ -46,4 +44,4 @@ rules: type: AzureStorage depends_on_rule: - rule_id: kingfisher.azurestorage.1 - variable: AZURENAME + variable: AZURENAME \ No newline at end of file diff --git a/data/rules/gitlab.yml b/data/rules/gitlab.yml index c7475d6..1cdf48c 100644 --- a/data/rules/gitlab.yml +++ b/data/rules/gitlab.yml @@ -3,12 +3,11 @@ rules: id: kingfisher.gitlab.1 pattern: | (?xi) - \b - ( + \b + ( glpat- [0-9A-Z_-]{20} - ) - (?:\b|$) + ) min_entropy: 3.5 confidence: medium examples: @@ -114,4 +113,32 @@ rules: - '"token is missing"' - '"403 Forbidden"' negative: true - url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }} \ No newline at end of file + url: https://gitlab.com/api/v4/ci/pipeline_triggers/{{ TOKEN }} + - name: GitLab Private Token - Updated Format + id: kingfisher.gitlab.4 + pattern: | + (?x) + \b + ( + glpat-[A-Za-z0-9_-]{36,38}\.01\.[a-z0-9]{9} + ) + min_entropy: 3.5 + confidence: medium + examples: + - glpat-5m8CwMZi4bwlRSCKzG0-3W86MQp1OmV5Y2UK.01.1012mzo24 + references: + - https://github.com/diffblue/gitlab/blob/39c63ee83369bf5353256a6b95f3116728edd102/doc/api/personal_access_tokens.md + - https://docs.gitlab.com/api/personal_access_tokens/ + validation: + type: Http + content: + request: + headers: + PRIVATE-TOKEN: '{{ TOKEN }}' + method: GET + response_matcher: + - report_response: true + - type: WordMatch + words: + - '"id"' + url: https://gitlab.com/api/v4/personal_access_tokens/self \ No newline at end of file diff --git a/src/baseline.rs b/src/baseline.rs index 7616dd5..6f3dab5 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -10,7 +10,7 @@ use chrono::Local; use serde::{Deserialize, Serialize}; use tracing::debug; -use crate::{findings_store::FindingsStore, matcher::compute_finding_fingerprint}; +use crate::findings_store::FindingsStore; #[derive(Debug, Default, Serialize, Deserialize)] pub struct BaselineFile { @@ -53,20 +53,6 @@ fn normalize_path(p: &Path, roots: &[PathBuf]) -> String { p.to_string_lossy().replace('\\', "/") } -fn compute_hash(secret: &str, path: &str) -> String { - let fp = compute_finding_fingerprint(secret, path, 0, 0); - format!("{:016x}", fp) -} - -fn extract_secret(m: &crate::matcher::Match) -> String { - m.groups - .captures - .get(1) - .or_else(|| m.groups.captures.get(0)) - .map(|c| c.value.to_string()) - .unwrap_or_default() -} - pub fn apply_baseline( store: &mut FindingsStore, baseline_path: &Path, @@ -87,10 +73,10 @@ pub fn apply_baseline( for arc_msg in store.get_matches_mut() { let (origin, _blob, m) = Arc::make_mut(arc_msg); let file_path = origin.iter().filter_map(|o| o.full_path()).next(); + let hash = format!("{:016x}", m.finding_fingerprint); + if let Some(fp) = file_path { let normalized = normalize_path(&fp, roots); - let secret = extract_secret(m); - let hash = compute_hash(&secret, &normalized); if known.contains(&hash) { debug!("Skipping {} due to baseline (hash {})", normalized, hash); m.visible = false; @@ -108,6 +94,11 @@ pub fn apply_baseline( }; new_entries.push(entry); } + } else if known.contains(&hash) { + m.visible = false; + if manage { + encountered.insert(hash.clone()); + } } } if manage { @@ -127,3 +118,136 @@ pub fn apply_baseline( Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + blob::{BlobId, BlobMetadata}, + location::{Location, OffsetSpan, SourcePoint, SourceSpan}, + matcher::{Match, SerializableCapture, SerializableCaptures}, + origin::{Origin, OriginSet}, + rules::rule::{Confidence, Rule, RuleSyntax}, + }; + use anyhow::Result; + use smallvec::SmallVec; + use std::{path::Path, sync::Arc}; + use tempfile::TempDir; + + fn test_rule() -> Arc { + Arc::new(Rule::new(RuleSyntax { + name: "test".to_string(), + id: "test.rule".to_string(), + pattern: "test".to_string(), + min_entropy: 0.0, + confidence: Confidence::Low, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None, + depends_on_rule: vec![], + })) + } + + fn empty_captures() -> SerializableCaptures { + SerializableCaptures { captures: SmallVec::<[SerializableCapture; 2]>::new() } + } + + fn make_store_with_match(fingerprint: u64, file_path: &Path) -> FindingsStore { + let mut store = FindingsStore::new(PathBuf::from(".")); + let rule = test_rule(); + let match_item = Match { + location: Location { + offset_span: OffsetSpan { start: 0, end: 1 }, + source_span: SourceSpan { + start: SourcePoint { line: 1, column: 0 }, + end: SourcePoint { line: 1, column: 1 }, + }, + }, + groups: empty_captures(), + blob_id: BlobId::default(), + finding_fingerprint: fingerprint, + rule: Arc::clone(&rule), + validation_response_body: String::new(), + validation_response_status: 0, + validation_success: false, + calculated_entropy: 0.0, + visible: true, + is_base64: false, + }; + + let origin = OriginSet::from(Origin::from_file(file_path.to_path_buf())); + let blob_meta = Arc::new(BlobMetadata { + id: BlobId::default(), + num_bytes: 0, + mime_essence: None, + language: None, + }); + + let entry = Arc::new((Arc::new(origin), blob_meta, match_item)); + store.get_matches_mut().push(entry); + store + } + + fn expected_relative_path(root: &Path, file: &Path) -> String { + let mut expected = PathBuf::from(root.file_name().unwrap()); + if let Ok(stripped) = file.strip_prefix(root) { + expected = expected.join(stripped); + } + expected.to_string_lossy().replace('\\', "/") + } + + #[test] + fn apply_baseline_filters_existing_fingerprints() -> Result<()> { + let tmp = TempDir::new()?; + let roots = [tmp.path().to_path_buf()]; + let secret_file = tmp.path().join("secret.txt"); + fs::write(&secret_file, "dummy")?; + let baseline_path = tmp.path().join("baseline.yaml"); + let fingerprint = 0x1234_u64; + + let mut store = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut store, &baseline_path, true, &roots)?; + + let baseline = load_baseline(&baseline_path)?; + assert_eq!(baseline.exact_findings.matches.len(), 1); + let entry = &baseline.exact_findings.matches[0]; + assert_eq!(entry.fingerprint, format!("{:016x}", fingerprint)); + assert_eq!(entry.filepath, expected_relative_path(roots[0].as_path(), &secret_file)); + + let (_, _, recorded) = store.get_matches()[0].as_ref(); + assert!(recorded.visible); + + let mut follow_up = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut follow_up, &baseline_path, false, &roots)?; + let (_, _, filtered) = follow_up.get_matches()[0].as_ref(); + assert!(!filtered.visible); + + Ok(()) + } + + #[test] + fn managing_baseline_is_idempotent() -> Result<()> { + let tmp = TempDir::new()?; + let roots = [tmp.path().to_path_buf()]; + let secret_file = tmp.path().join("secret.txt"); + fs::write(&secret_file, "dummy")?; + let baseline_path = tmp.path().join("baseline.yaml"); + let fingerprint = 0xfeed_beef_dade_f00d_u64; + + let mut initial = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut initial, &baseline_path, true, &roots)?; + let baseline_before = fs::read_to_string(&baseline_path)?; + + let mut rerun = make_store_with_match(fingerprint, &secret_file); + apply_baseline(&mut rerun, &baseline_path, true, &roots)?; + let baseline_after = fs::read_to_string(&baseline_path)?; + assert_eq!(baseline_before, baseline_after); + + let (_, _, suppressed) = rerun.get_matches()[0].as_ref(); + assert!(!suppressed.visible); + + Ok(()) + } +} diff --git a/tests/smoke_baseline.rs b/tests/smoke_baseline.rs index 1c53a0f..f69be7c 100644 --- a/tests/smoke_baseline.rs +++ b/tests/smoke_baseline.rs @@ -26,6 +26,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { "--manage-baseline", "--baseline-file", baseline.to_str().unwrap(), + "--git-history=none", "--no-update-check", ]) .assert() @@ -34,7 +35,10 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { assert!(baseline.exists(), "baseline file created"); - // Scan again using the baseline + let initial_baseline = fs::read_to_string(&baseline)?; + + // Scanning with the baseline should suppress the existing finding and leave + // the baseline untouched. Command::cargo_bin("kingfisher")? .args([ "scan", @@ -46,12 +50,39 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { "json", "--baseline-file", baseline.to_str().unwrap(), + "--git-history=none", "--no-update-check", ]) .assert() .code(0) .stdout(predicate::str::contains(GH_PAT).not()); + let baseline_after_scan = fs::read_to_string(&baseline)?; + assert_eq!(initial_baseline, baseline_after_scan, "baseline remains stable after reuse"); + + // Managing the baseline again should not churn entries or report the secret + Command::cargo_bin("kingfisher")? + .args([ + "scan", + dir.path().to_str().unwrap(), + "--no-binary", + "--confidence=low", + "--no-validate", + "--format", + "json", + "--manage-baseline", + "--baseline-file", + baseline.to_str().unwrap(), + "--git-history=none", + "--no-update-check", + ]) + .assert() + .code(0) + .stdout(predicate::str::contains(GH_PAT).not()); + + let rerun_baseline = fs::read_to_string(&baseline)?; + assert_eq!(initial_baseline, rerun_baseline, "baseline remains stable"); + Ok(()) }