From edc88c4714563c4d9444b7c49a90a7a2a67e778b Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 07:08:22 -0700 Subject: [PATCH 1/9] Updated README --- Makefile | 7 ++++ README.md | 107 ++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 75 insertions(+), 39 deletions(-) diff --git a/Makefile b/Makefile index fb8b5e3..da375d4 100644 --- a/Makefile +++ b/Makefile @@ -400,6 +400,13 @@ check-rust: fi tests: + @echo "🔍 checking for cargo-nextest …" + @if command -v cargo-nextest >/dev/null 2>&1; then \ + echo "✅ cargo-nextest already present"; \ + else \ + echo "📦 installing cargo-nextest …"; \ + cargo install --locked cargo-nextest || true; \ + fi @echo "▶ running tests …"; \ if command -v cargo-nextest >/dev/null 2>&1; then \ cargo nextest run --workspace --all-targets; \ diff --git a/README.md b/README.md index 67098a3..3af9c20 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,13 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) Kingfisher is a blazingly fast secret‑scanning and validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production +

-**MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) +Kingfisher originated as a fork of **[Nosey Parker](https://github.com/praetorian-inc/noseyparker)** by Praetorian Security, Inc, and is built atop the incredible work contributed by the Nosey Parker community. + + +- **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) ## Key Features @@ -17,12 +21,12 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru - **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details - **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos - ## Getting Started ### Installation On macOS, you can simply + ```bash brew install kingfisher ``` @@ -53,7 +57,6 @@ make darwin-all # builds both x64 and arm64 make all # builds for every OS and architecture supported ``` - # Write Custom Rules! Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential. @@ -71,6 +74,7 @@ Once you've done that, you can provide your custom rules (defined in a YAML file > **Note**  `kingfisher scan` detects whether the input is a Git repository or a plain directory—no extra flags required. ### Scan with secret validation + ```bash kingfisher scan /path/to/code ## NOTE: This path can refer to: @@ -82,28 +86,32 @@ kingfisher scan /path/to/code # `--git-history=none` ``` - ### Scan a directory containing multiple Git repositories + ```bash kingfisher scan /projects/mono‑repo‑dir ``` ### Scan a Git repository without validation + ```bash kingfisher scan ~/src/myrepo --no-validate ``` ### Display only secrets confirmed active by third‑party APIs + ```bash kingfisher scan ./service --only-valid ``` ### Output JSON and capture to a file + ```bash kingfisher scan . --format json | tee kingfisher.json ``` ### Output SARIF directly to disk + ```bash kingfisher scan . --format sarif --output findings.sarif ``` @@ -114,8 +122,9 @@ kingfisher scan . --format sarif --output findings.sarif cat /path/to/file.py | kingfisher scan - ``` -### Scan using a rule *family* with one flag -*(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws.*`)* +### Scan using a rule _family_ with one flag + +_(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws._`)\* ```bash # Only apply AWS-related rules (kingfisher.aws.1 + kingfisher.aws.2) @@ -123,19 +132,23 @@ kingfisher scan /path/to/repo --rule kingfisher.aws ``` ### Display rule performance statistics + ```bash kingfisher scan /path/to/repo --rule-stats ``` --- + ## Scanning GitHub ### Scan GitHub organisation (requires `KF_GITHUB_TOKEN`) + ```bash kingfisher scan --github-organization my-org ``` ### Scan remote GitHub repository + ```bash kingfisher scan --git-url https://github.com/org/repo.git @@ -143,70 +156,84 @@ kingfisher scan --git-url https://github.com/org/repo.git KF_GITHUB_TOKEN="ghp_…" kingfisher scan --git-url https://github.com/org/private_repo.git ``` + --- + ## Scanning GitLab ### Scan GitLab group (requires `KF_GITLAB_TOKEN`) + ```bash kingfisher scan --gitlab-group my-group ``` ### Scan GitLab user + ```bash kingfisher scan --gitlab-user johndoe ``` ### Scan remote GitLab repository by URL + ```bash kingfisher scan --git-url https://gitlab.com/group/project.git ``` ### List GitLab repositories + ```bash kingfisher gitlab repos list --group my-group ``` --- + ## Environment Variables for Tokens -| Variable | Purpose | -|---------------------|---------------------------------------| -| `KF_GITHUB_TOKEN` | GitHub Personal Access Token | -| `KF_GITLAB_TOKEN` | GitLab Personal Access Token | +| Variable | Purpose | +| ----------------- | ---------------------------- | +| `KF_GITHUB_TOKEN` | GitHub Personal Access Token | +| `KF_GITLAB_TOKEN` | GitLab Personal Access Token | Set them temporarily per command: + ```bash KF_GITLAB_TOKEN="glpat-…" kingfisher scan --gitlab-group my-group ``` + Or export for the session: + ```bash export KF_GITLAB_TOKEN="glpat-…" ``` -*If no token is provided Kingfisher still works for public repositories.* +_If no token is provided Kingfisher still works for public repositories._ --- + ## Exit Codes -| Code | Meaning | -|------|-------------------------------------| -| 0 | No findings | -| 200 | Findings discovered | -| 205 | Validated findings discovered | +| Code | Meaning | +| ---- | ----------------------------- | +| 0 | No findings | +| 200 | Findings discovered | +| 205 | Validated findings discovered | --- ### Update Checks + Kingfisher checks for newer releases on GitHub each time it starts and exits, printing whether a new version is available. Use `--self-update` to automatically download and replace the binary when an update is found. Add `--no-update-check` to disable these checks entirely. --- - ### List Builtin Rules + ```bash kingfisher rules list ``` + ### To scan using **only** your own `my_rules.yaml` you could run: + ```bash kingfisher scan \ --load-builtins=false \ @@ -224,6 +251,7 @@ kingfisher scan \ ``` ## Other Examples + ```bash # Check custom rules - this ensures all regular expressions compile, and can match the rule's `examples` in the YML file kingfisher rules check --rules-path ./my_rules.yml @@ -235,6 +263,7 @@ kingfisher github repos list --organization my-org ``` ## Notable Scan Options + - `--no-dedup`: Report every occurrence of a finding (disable the default de-duplicate behavior) - `--confidence `: (low|medium|high) - `--min-entropy `: Override default threshold @@ -245,15 +274,15 @@ kingfisher github repos list --organization my-org ## Finding Fingerprint -The document below details the four-field formula (rule SHA-1, origin label, start & end offsets) hashed with XXH3-64 to create Kingfisher’s 64-bit finding fingerprint, and explains how this ID powers safe deduplication; plus how `--no-dedup` can be used shows every raw match. +The document below details the four-field formula (rule SHA-1, origin label, start & end offsets) hashed with XXH3-64 to create Kingfisher’s 64-bit finding fingerprint, and explains how this ID powers safe deduplication; plus how `--no-dedup` can be used shows every raw match. See ([docs/FINGERPRINT.md](docs/FINGERPRINT.md)) - ## Rule Performance Profiling + Use `--rule-stats` to collect timing information for every rule. After scanning, the summary prints a **Rule Performance Stats** section showing how many matches each rule produced along with its slowest and average match times. Useful when creating rules or debugging rules. - ## CLI Options + ```bash kingfisher scan --help ``` @@ -265,36 +294,39 @@ By integrating Kingfisher into your development lifecycle, you can: - **Prevent Costly Breaches** Early detection of embedded credentials avoids expensive incident response, legal fees, and reputation damage - **Automate Compliance** - Enforce secret‑scanning policies across GitOps, CI/CD, and pull requests to help satisfy SOC 2, PCI‑DSS, GDPR, and other standards + Enforce secret‑scanning policies across GitOps, CI/CD, and pull requests to help satisfy SOC 2, PCI‑DSS, GDPR, and other standards - **Reduce Noise, Focus on Real Threats** Validation logic filters out false positives and highlights only active, valid secrets (`--only-valid`) - **Accelerate Dev Workflows** Run in parallel across dozens of languages, integrate with GitHub Actions or any pipeline, and shift security left to minimize delays - ## The Risk of Leaked Secrets Embedding credentials in code repositories is a pervasive, ever‑present risk that leads directly to data breaches: 1. **Uber (2016)** - - *Incident*: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers. - - *Sources*: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/) + + - _Incident_: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers. + - _Sources_: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/) 2. **AWS** - - *Incident*: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo. - - *Sources*: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more) + + - _Incident_: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo. + - _Sources_: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more) 3. **Infosys** - - *Incident*: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year. - - *Sources*: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/) + + - _Incident_: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year. + - _Sources_: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/) 4. **Microsoft** - - *Incident*: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages). - - *Sources*: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/) + + - _Incident_: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages). + - _Sources_: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/) 5. **GitHub** - - *Incident*: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution. - - *Sources*: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/) + - _Incident_: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution. + - _Sources_: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/) Left unchecked, leaked secrets can lead to unauthorized access, pivoting within your environment, regulatory fines, and brand‑damaging incident response costs. @@ -304,14 +336,11 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) # Roadmap - - More rules - - Auto-updater - - Packages for Linux (deb, rpm) - - Please file a [feature request](https://github.com/mongodb/kingfisher/issues) if you have specific features you'd like added - +- More rules +- Auto-updater +- Packages for Linux (deb, rpm) +- Please file a [feature request](https://github.com/mongodb/kingfisher/issues) if you have specific features you'd like added # License [Apache2 License](LICENSE) - - From 26b5b7918fa87cc88dc0ae75ba365fba62863704 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 08:34:15 -0700 Subject: [PATCH 2/9] Added ruels for sonarcloud, sonarqube, sourcegraph --- README.md | 4 +- data/rules/netlify.yml | 8 ++-- data/rules/sonarcloud.yml | 35 ++++++++++++++++ data/rules/sonarqube.yml | 58 ++++++++++++++++++++++++++ data/rules/sourcegraph.yml | 85 ++++++++++++++++++++++++++++++++++++++ src/matcher.rs | 2 - 6 files changed, 183 insertions(+), 9 deletions(-) create mode 100644 data/rules/sonarcloud.yml create mode 100644 data/rules/sonarqube.yml create mode 100644 data/rules/sourcegraph.yml diff --git a/README.md b/README.md index 3af9c20..8f7af32 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,10 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru

-Kingfisher originated as a fork of **[Nosey Parker](https://github.com/praetorian-inc/noseyparker)** by Praetorian Security, Inc, and is built atop the incredible work contributed by the Nosey Parker community. +Kingfisher originated as a fork of **[Nosey Parker](https://github.com/praetorian-inc/noseyparker)** by Praetorian Security, Inc, and is built atop their incredible work and the work contributed by the Nosey Parker community. -- **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) +**MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) ## Key Features diff --git a/data/rules/netlify.yml b/data/rules/netlify.yml index eaf8399..0a29d25 100644 --- a/data/rules/netlify.yml +++ b/data/rules/netlify.yml @@ -14,8 +14,6 @@ rules: examples: - netlify_token=3cdfad7b885a6daceff3fb820389115750b373763fb30b10ca0382648b55872d - netlify_secret=7a9ef2c84d6b3e5f1c8a0b9d2e4f6a8c7b3d5e9f2a1c8b4d6e3f5a9c7b2d8e4 - references: - - https://howtorotate.com/docs/tutorials/netlify/ validation: type: Http content: @@ -39,15 +37,15 @@ rules: (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? \b - ([A-Z0-9_-]{43,45}) + ( + [A-Z0-9_-]{43,45} + ) \b min_entropy: 3.5 confidence: medium examples: - netlify_token=G5yT54abRasekrOpe7SaArsowiuHTeR45sfEhsH-K1L2 - netlify_key=H7xZ98cdWbsemqNpv8UaXtsnyjKgVeQ34rsDkpM-N5P6 - references: - - https://howtorotate.com/docs/tutorials/netlify/ validation: type: Http content: diff --git a/data/rules/sonarcloud.yml b/data/rules/sonarcloud.yml new file mode 100644 index 0000000..f0b874d --- /dev/null +++ b/data/rules/sonarcloud.yml @@ -0,0 +1,35 @@ +rules: + - name: SonarCloud API Token + id: kingfisher.sonarcloud.1 + pattern: | + (?xi) + \b + sonar + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [0-9a-z]{40} + ) + \b + min_entropy: 2.5 + examples: + - sonar_api_token=abcdef0123456789abcdef0123456789abcdef23 + validation: + type: Http + content: + request: + headers: + Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}" + Accept: application/json + method: GET + url: https://sonarcloud.io/api/user_tokens/search + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + match_all_words: true + words: + - '"tokens":' diff --git a/data/rules/sonarqube.yml b/data/rules/sonarqube.yml new file mode 100644 index 0000000..ceddbee --- /dev/null +++ b/data/rules/sonarqube.yml @@ -0,0 +1,58 @@ +rules: + - name: SonarQube API Key + id: kingfisher.sonarqube.1 + pattern: | + (?xi) + \b + ( + (?:sq[pua]) + _[a-z0-9]{40} + ) + min_entropy: 3.5 + examples: + - sonar.login=sqp_4b78f8494075e310d62dfdcaeb14be2c78fca2fc + - sonar.login=squ_4b78f8494075e310d62dfdcaeb14be2c78fca2fc + validation: + type: Http + content: + request: + headers: + Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}" + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: '{{ SONARHOST }}/api/user_tokens/search' + depends_on_rule: + - rule_id: kingfisher.sonarqube.2 + variable: SONARHOST + + - name: SonarQube Host + id: kingfisher.sonarqube.2 + pattern: | + (?xi) + sonar.{0,8}host + (?:.|[\n\r]){0,64}? + \b + ( + https?://.*?:\d{2,6} + ) + \b + min_entropy: 3.5 + visible: false + examples: + - sonar.host=https://sonar.internal.company.com:9000 + + - name: SonarQube Token + id: kingfisher.sonarqube.3 + pattern: '(?i)sonar.{0,5}login.{0,5}\s*\b([a-f0-9]{40})\b' + min_entropy: 3.3 + confidence: medium + examples: + - 'sonar.host.url=https://sonarcloud.io -Dsonar.login=5524bf449ca45fcace54698371466398321f3a82' + - "sonar.login', '826de5590c75919a8317fdface58206eebe7ebbc" + - '$sonarLogin = "4924be8f51f3e738c97db2c4ace51db7e938f28b"' + references: + - https://docs.sonarqube.org/latest/user-guide/user-token/ \ No newline at end of file diff --git a/data/rules/sourcegraph.yml b/data/rules/sourcegraph.yml new file mode 100644 index 0000000..965d99a --- /dev/null +++ b/data/rules/sourcegraph.yml @@ -0,0 +1,85 @@ +rules: + - name: Sourcegraph Access Token + id: kingfisher.sourcegraph.1 + pattern: | + (?xi) + \b + sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40} + \b + min_entropy: 3.3 + examples: + - sgp_210f1131b08e93adcfc3f05faa2d768ff883a61f + validation: + type: Http + content: + request: + method: POST + url: https://sourcegraph.com/.api/graphql + headers: + Authorization: "token {{ TOKEN }}" + Content-Type: application/json + body: | + { "query": "query ValidateToken { site { id } }" } + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['"site":{'] + match_all_words: true + + - name: Sourcegraph _Legacy_ API Key + id: kingfisher.sourcegraph.2 + pattern: | + (?xi) + \b + (?:sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40}|[a-f0-9]{40}) + \b + min_entropy: 3.5 + confidence: medium + examples: + - sgp_abcdef1234567890_local_abcdef12345678901234567890abcdef12345678 + validation: + type: Http + content: + request: + method: POST + url: https://sourcegraph.com/.api/graphql + headers: + Authorization: "token {{ TOKEN }}" + Content-Type: application/json + body: | + { "query": "query ValidateToken { site { id } }" } + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['"site":{'] + + - name: Sourcegraph Cody Gateway Key + id: kingfisher.sourcegraph.3 + pattern: | + (?xi) + \b + slk_[a-f0-9]{64} + \b + min_entropy: 3.5 + confidence: medium + examples: + - slk_27b0a1f1926e7376dd8bdfcb0ade3c397c462b6e68c854a5521a17dd2b704ce6 + validation: + type: Http + content: + request: + method: GET + url: https://cody-gateway.sourcegraph.com/v1/limits + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: ['"token"', '"limit"'] + match_all_words: true diff --git a/src/matcher.rs b/src/matcher.rs index d7ae76d..d90c5ef 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -1,6 +1,5 @@ use std::{ borrow::Cow, - // collections::{HashMap, HashSet}, hash::{Hash, Hasher}, io::Write, str, @@ -60,7 +59,6 @@ pub struct OwnedBlobMatch { pub blob_id: BlobId, /// The unique content-based identifier of this match pub finding_fingerprint: u64, - // pub matching_input: Vec, pub matching_input_offset_span: OffsetSpan, pub captures: SerializableCaptures, pub validation_response_body: String, From 0a5cabbb5113de6d2632c3fc2a590be14afc985c Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 08:44:36 -0700 Subject: [PATCH 3/9] Added rules for shopify, truenas --- data/rules/shopify.yml | 40 ++++++++++++++++++++++++++++++++++ data/rules/truenas.yml | 49 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 data/rules/shopify.yml create mode 100644 data/rules/truenas.yml diff --git a/data/rules/shopify.yml b/data/rules/shopify.yml new file mode 100644 index 0000000..da340e5 --- /dev/null +++ b/data/rules/shopify.yml @@ -0,0 +1,40 @@ +rules: + - name: Shopify access token + id: kingfisher.shopify.1 + pattern: | + (?xi) + \b + ( + (?:shpat|shpca|shppa|shpss)_[a-f0-9]{30,34} + ) + \b + min_entropy: 3.5 + examples: + - 'shopify_app_secret: "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"' + validation: + type: Http + content: + request: + headers: + X-Shopify-Access-Token: '{{ TOKEN }}' + method: GET + response_matcher: + - report_response: true + - type: StatusMatch + status: + - 200 + - type: WordMatch + match_all_words: true + words: ['"shop":'] + url: https://{{ DOMAIN }}/admin/api/2024-10/shop.json + depends_on_rule: + - rule_id: "kingfisher.shopify.2" + variable: DOMAIN + + - name: Shopify Domain + id: kingfisher.shopify.2 + pattern: (?xi)([a-z0-9-]+\.myshopify\.com) + min_entropy: 3.0 + visible: false + examples: + - example.myshopify.com \ No newline at end of file diff --git a/data/rules/truenas.yml b/data/rules/truenas.yml new file mode 100644 index 0000000..9e0067a --- /dev/null +++ b/data/rules/truenas.yml @@ -0,0 +1,49 @@ +rules: + - name: TrueNAS API Key (WebSocket) + id: kingfisher.truenas.1 + pattern: | + (?x) + "params"\s*:\s*\[\s*" + (\d+-[a-zA-Z0-9]{64}) + "\s*\] + min_entropy: 3.3 + confidence: medium + examples: + - '{"id":"3286a508-a6ca-278a-c078-85b2b515d8d2", "msg":"method", "method":"auth.login_with_api_key", "params":["8-Lp22ov7halMBLUpG97Wg4y7fibQi3CW19VJiZcCu746zgCs0mdDdTCoOcpgEucgu"]}' + - '{"id":"677d9914-f598-f497-e77e-2a3aadbb822e", "msg":"method", "method":"auth.login_with_api_key", "params" : ["9-hTSZDBPyg0PjRZvWb8omoxJ7X2gAjRGmiPKql9ENGIUP9OPtEAzz5f6g9YIMVbZT"]}' + - '{"id":"2755dad4-cc12-94bb-a894-ba0f85c3fdbf", "msg":"method", "method":"auth.login_with_api_key", "params" : [ "10-6LZBVhNq8zze0rzXJptfSWDBoskWuThnQb3fUVw4sVNgJ7GKT3ITVIovhwPf34oL" ]}' + - | + { + "id": "2755dad4-cc12-94bb-a894-ba0f85c3fdbf", + "msg": "method", + "method": "auth.login_with_api_key", + "params": [ + "10-6LZBVhNq8zze0rzXJptfSWDBoskWuThnQb3fUVw4sVNgJ7GKT3ITVIovhwPf34oL" + ] + } + references: + - https://www.truenas.com/docs/api/core_websocket_api.html + - https://www.truenas.com/docs/api/scale_rest_api.html + - https://www.truenas.com/docs/scale/scaletutorials/toptoolbar/managingapikeys/ + - https://www.truenas.com/docs/scale/scaleclireference/auth/cliapikey/ + - https://www.truenas.com/docs/scale/api/ + - https://www.truenas.com/community/threads/api-examples-in-perl-python.108053/ + + - name: TrueNAS API Key (REST API) + id: kingfisher.truenas.2 + pattern: | + (?x) + Bearer\s* + (\d+-[a-zA-Z0-9]{64}) + \b + min_entropy: 3.3 + confidence: medium + examples: + - 'curl -X POST "http://192.168.0.30/api/v2.0/device/get_info" -H "Content-Type: application/json" -H "Authorization: Bearer 8-Lp22ov7halMBLUpG97Wg4y7fibQi3CW19VJiZcCu746zgCs0mdDdTCoOcpgEucgu" -d "\"SERIAL\""' + references: + - https://www.truenas.com/docs/api/core_websocket_api.html + - https://www.truenas.com/docs/api/scale_rest_api.html + - https://www.truenas.com/docs/scale/scaletutorials/toptoolbar/managingapikeys/ + - https://www.truenas.com/docs/scale/scaleclireference/auth/cliapikey/ + - https://www.truenas.com/docs/scale/api/ + - https://www.truenas.com/community/threads/api-examples-in-perl-python.108053/ \ No newline at end of file From 1bf87935c862e186d1f3b2c0115f60c4ccf57d0b Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 09:05:19 -0700 Subject: [PATCH 4/9] added rules for nasa, teamcity --- CHANGELOG.md | 4 +++ Cargo.toml | 2 +- data/rules/nasa.yml | 35 ++++++++++++++++++ data/rules/sendgrid.yml | 38 ++++++++++++++++++++ data/rules/square.yml | 80 +++++++++++++++++++++++++++++++++++++++++ data/rules/teamcity.yml | 18 ++++++++++ 6 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 data/rules/nasa.yml create mode 100644 data/rules/sendgrid.yml create mode 100644 data/rules/square.yml create mode 100644 data/rules/teamcity.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index a9a31a6..05e4d68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. +## [1.17.0] +- Updated README to give proper attribution to Nosey Parker! +- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, squaare, sendgrid + ## [1.16.0] - Fix: HTML detection now requires both HTML content-type and "' + references: + - https://www.jetbrains.com/help/teamcity/rest/teamcity-rest-api-documentation.html From 28af26b23aaa9c60c9ab446b9d45d9099c0ad67a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 09:16:42 -0700 Subject: [PATCH 5/9] =?UTF-8?q?Introduced=20=20flag=20=E2=80=93=20skip=20f?= =?UTF-8?q?iles/dirs=20whose=20path=20resembles=20tests=20(,=20,=20,=20,?= =?UTF-8?q?=20),=20reducing=20noise.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 2 +- README.md | 1 + data/rules/nasa.yml | 14 +- src/cli/commands/scan.rs | 4 + src/main.rs | 1 + src/reporter/json_format.rs | 1 + src/reporter/pretty_format.rs | 1 + src/update.rs | 37 ++---- src/util.rs | 62 ++++++++- src/validation.rs | 217 ++++++++++++++----------------- src/validation/httpvalidation.rs | 13 +- tests/int_dedup.rs | 1 + tests/int_github.rs | 1 + tests/int_gitlab.rs | 1 + tests/int_validation_cache.rs | 1 + tests/int_vulnerable_files.rs | 2 + tests/smoke_check_rules.rs | 2 - 17 files changed, 201 insertions(+), 160 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e4d68..3c2c6ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. ## [1.17.0] - Updated README to give proper attribution to Nosey Parker! - Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, squaare, sendgrid - +- Introduced `--ignore-tests` flag – skip files/dirs whose path resembles tests (`test`, `spec`, `fixture`, `example`, `sample`), reducing noise. ## [1.16.0] - Fix: HTML detection now requires both HTML content-type and "`: Specifies how deep nested archives should be extracted and scanned (default: 2) - `--redact`: Replaces discovered secrets with a one-way hash for secure output +- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive) ## Finding Fingerprint diff --git a/data/rules/nasa.yml b/data/rules/nasa.yml index c7b56d6..efbcc42 100644 --- a/data/rules/nasa.yml +++ b/data/rules/nasa.yml @@ -26,10 +26,10 @@ rules: url: https://api.nasa.gov/planetary/apod?date=1995-06-16&api_key={{ TOKEN }} headers: Accept: application/json - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - - type: WordMatch - words: - - '"url":' + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: WordMatch + words: + - '"url":' diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 9d3fc1f..36726e0 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -88,6 +88,10 @@ pub struct ScanArgs { #[arg(long, default_value_t = false)] pub no_dedup: bool, + /// Ignore matches that appear to come from test files + #[arg(long, default_value_t = false)] + pub ignore_tests: bool, + /// Redact findings values using a secure hash #[arg(long, short = 'r', default_value_t = false)] pub redact: bool, diff --git a/src/main.rs b/src/main.rs index 0a1e07b..280422c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -289,6 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { redact: false, git_repo_timeout: 1800, no_dedup: false, + ignore_tests: false, snippet_length: 256, output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 9ccbc29..ec45e83 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -389,6 +389,7 @@ mod tests { cli::commands::scan::ScanArgs { num_jobs: 1, no_dedup: false, + ignore_tests: false, rules: RuleSpecifierArgs { rules_path: Vec::new(), rule: vec!["all".into()], diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index 6d6f398..740f82a 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -312,6 +312,7 @@ fn test_pretty_format_with_nan_entropy_panics() { // core execution / performance num_jobs: 1, no_dedup: false, + ignore_tests: false, // rule selection rules: RuleSpecifierArgs { diff --git a/src/update.rs b/src/update.rs index 4758ab1..bd0f212 100644 --- a/src/update.rs +++ b/src/update.rs @@ -7,8 +7,8 @@ // // Version handling logic covers three scenarios: // 1. Running version == latest release → "up to date". -// 2. Running version > latest release → print a notice that the binary is -// **newer** than anything on GitHub (e.g. a dev build). +// 2. Running version > latest release → print a notice that the binary is **newer** than +// anything on GitHub (e.g. a dev build). // 3. Latest release > running version → offer to self‑update. // // All informational messages are printed with the @@ -25,10 +25,7 @@ use self_update::{backends::github::Update, cargo_crate_version, errors::Error a use semver::Version; use tracing::{error, info, warn}; -use crate::{ - cli::global::GlobalArgs, - reporter::styles::Styles, -}; +use crate::{cli::global::GlobalArgs, reporter::styles::Styles}; /// Return `true` when the canonical executable path lives inside a Homebrew Cellar. /// Works for Intel macOS (/usr/local/Cellar), Apple‑Silicon macOS (/opt/homebrew/Cellar) @@ -38,16 +35,13 @@ fn installed_via_homebrew() -> bool { std::env::current_exe().ok().and_then(|p| fs::canonicalize(p).ok()) } - canonical_exe() - .map(|p| p.components().any(|c| c.as_os_str() == "Cellar")) - .unwrap_or(false) + canonical_exe().map(|p| p.components().any(|c| c.as_os_str() == "Cellar")).unwrap_or(false) } /// Check GitHub for a newer Kingfisher release and optionally self‑update. /// /// * `base_url` lets tests point at a mock server. -/// * Self‑update is skipped when the user disabled it **or** the binary is a -/// Homebrew install. +/// * Self‑update is skipped when the user disabled it **or** the binary is a Homebrew install. pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Option { if global_args.no_update_check { return None; @@ -61,14 +55,13 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt if is_brew { info!( "{}", - styles - .style_finding_active_heading - .apply_to("Homebrew install detected – will notify about updates but not self‑update") + styles.style_finding_active_heading.apply_to( + "Homebrew install detected – will notify about updates but not self‑update" + ) ); } - info!( - "{}","Checking for updates…"); + info!("{}", "Checking for updates…"); let mut builder = Update::configure(); builder @@ -124,15 +117,11 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt // Try semantic version comparison. If parsing fails, fall back to the // self‑update code‑path (which will treat the strings lexicographically). - if let (Ok(curr), Ok(latest)) = ( - Version::parse(running_v), - Version::parse(&release.version), - ) { + if let (Ok(curr), Ok(latest)) = (Version::parse(running_v), Version::parse(&release.version)) { // ───────── Case 2: running > latest (dev build) ───────── if curr > latest { - let plain = format!( - "Running Kingfisher {curr} which is newer than latest released {latest}" - ); + let plain = + format!("Running Kingfisher {curr} which is newer than latest released {latest}"); info!("{}", styles.style_finding_active_heading.apply_to(&plain)); return Some(plain); } @@ -171,7 +160,7 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt "{}", styles .style_finding_active_heading - .apply_to("Run `brew upgrade kingfisher` to install the new version.") + .apply_to("Run `brew upgrade kingfisher` to install the new version.") ); } diff --git a/src/util.rs b/src/util.rs index 3edf138..153e1a1 100644 --- a/src/util.rs +++ b/src/util.rs @@ -108,6 +108,27 @@ pub fn is_base64(input: &str) -> bool { .bytes() .all(|b| matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'=')) } + +/// Heuristic check whether a path points to test files or directories. +/// +/// Looks for common substrings like "test", "tests", "spec", "fixture", or +/// "example" in any path component. Case-insensitive. +pub fn is_test_like_path(path: &Path) -> bool { + path.components().any(|c| { + if let std::path::Component::Normal(os) = c { + if let Some(name) = os.to_str() { + let name = name.to_ascii_lowercase(); + return name.contains("test") + || name.contains("spec") + || name.contains("fixture") + || name.contains("example") + || name.contains("sample"); + } + } + false + }) +} + #[cfg(test)] mod tests { use std::{ @@ -115,7 +136,46 @@ mod tests { path::PathBuf, }; - use super::*; + use super::{is_test_like_path, *}; + + /// Paths that **should** be classified as test-like. + #[test] + fn test_is_test_like_path_positive() { + let positives = [ + "src/tests/helpers.rs", + "/project/spec/controllers/user_spec.rb", + "C:\\repo\\fixtures\\config.json", + "examples/hello_world/main.go", + "samples/data/sample_input.txt", + ]; + + for p in positives { + assert!( + is_test_like_path(Path::new(p)), + "Path {p:?} was expected to be test-like but was not" + ); + } + } + + /// Paths that **should not** be classified as test-like. + #[test] + fn test_is_test_like_path_negative() { + let negatives = [ + "src/main.rs", + "/opt/service/config/production.yml", + "C:\\Program Files\\app\\README.md", + "docs/architecture/overview.md", + "assets/images/logo.png", + ]; + + for p in negatives { + assert!( + !is_test_like_path(Path::new(p)), + "Path {p:?} was incorrectly classified as test-like" + ); + } + } + #[test] fn test_counted_display_regular() { let single = Counted::regular(1, "rule"); diff --git a/src/validation.rs b/src/validation.rs index b45ff3b..4a056f3 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -990,131 +990,116 @@ rules: Ok(()) } -// // ──────────────────────────────────────────────────────────────── -// // Slack Webhook – end-to-end validation test -// // ──────────────────────────────────────────────────────────────── -// #[tokio::test] -// async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> { -// use std::sync::Arc; + // // ──────────────────────────────────────────────────────────────── + // // Slack Webhook – end-to-end validation test + // // ──────────────────────────────────────────────────────────────── + // #[tokio::test] + // async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> { + // use std::sync::Arc; -// use crossbeam_skiplist::SkipMap; -// use http::StatusCode; -// use rustc_hash::FxHashMap; + // use crossbeam_skiplist::SkipMap; + // use http::StatusCode; + // use rustc_hash::FxHashMap; -// use crate::{ -// blob::BlobId, -// liquid_filters::register_all, -// location::OffsetSpan, -// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, -// rules::{ -// rule::{Confidence, Rule}, -// Rules, -// }, -// validation::{validate_single_match, Cache}, -// }; + // use crate::{ + // blob::BlobId, + // liquid_filters::register_all, + // location::OffsetSpan, + // matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures}, + // rules::{ + // rule::{Confidence, Rule}, + // Rules, + // }, + // validation::{validate_single_match, Cache}, + // }; -// // 1️⃣ YAML snippet with the **exact** Slack rule -// let slack_yaml = r#" -// rules: -// - name: Slack Webhook -// id: kingfisher.slack.4 -// pattern: | -// (?xi) -// \b -// ( -// https://hooks\.slack\.com/services/ -// T[a-z0-9_-]{8,12}/ -// B[a-z0-9_-]{8,12}/ -// [a-z0-9_-]{20,30} -// ) -// \b -// min_entropy: 3.3 -// confidence: medium -// examples: -// - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV -// - https://hooks.slack.com/services/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 -// validation: -// type: Http -// content: -// request: -// headers: -// Content-Type: application/json -// method: POST -// response_matcher: -// - report_response: true -// - type: WordMatch -// words: -// - invalid_payload -// - type: WordMatch -// words: -// - "invalid_token" -// negative: true -// url: "{{ TOKEN }}" -// "#; + // // 1️⃣ YAML snippet with the **exact** Slack rule + // let slack_yaml = r#" + // rules: + // - name: Slack Webhook id: kingfisher.slack.4 pattern: | (?xi) \b ( https://hooks\.slack\.com/services/ + // T[a-z0-9_-]{8,12}/ B[a-z0-9_-]{8,12}/ [a-z0-9_-]{20,30} ) \b min_entropy: 3.3 confidence: + // medium examples: + // - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV + // - https://hooks.slack.com/services/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 + // validation: + // type: Http + // content: + // request: + // headers: + // Content-Type: application/json + // method: POST + // response_matcher: + // - report_response: true + // - type: WordMatch words: + // - invalid_payload + // - type: WordMatch words: + // - "invalid_token" + // negative: true + // url: "{{ TOKEN }}" + // "#; -// // 2️⃣ Load that YAML into a Rules object -// let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())]; -// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; + // // 2️⃣ Load that YAML into a Rules object + // let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())]; + // let rules = Rules::from_paths_and_contents(data, Confidence::Low)?; -// // 3️⃣ Pull the rule syntax & wrap into a Rule -// let slack_rule_syntax = rules -// .rules -// .iter() -// .find(|r| r.id == "kingfisher.slack.4") -// .expect("Slack rule not found") -// .clone(); -// let slack_rule = Rule::new(slack_rule_syntax); + // // 3️⃣ Pull the rule syntax & wrap into a Rule + // let slack_rule_syntax = rules + // .rules + // .iter() + // .find(|r| r.id == "kingfisher.slack.4") + // .expect("Slack rule not found") + // .clone(); + // let slack_rule = Rule::new(slack_rule_syntax); -// // 4️⃣ Provide a real-looking webhook URL (use one of the examples) -// let token = "ENTER YOUR SLACK WEBHOOK URL HERE"; + // // 4️⃣ Provide a real-looking webhook URL (use one of the examples) + // let token = "ENTER YOUR SLACK WEBHOOK URL HERE"; -// // 5️⃣ Build OwnedBlobMatch stub -// let blob_id = BlobId::new(&token.as_bytes()); -// let mut owned_blob_match = OwnedBlobMatch { -// rule: slack_rule.into(), -// blob_id, -// finding_fingerprint: 0, -// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, -// captures: SerializableCaptures { -// captures: vec![SerializableCapture { -// name: Some("TOKEN".to_string()), -// match_number: -1, -// start: 0, -// end: token.len(), -// value: token.into(), -// }], -// }, -// validation_response_body: String::new(), -// validation_response_status: StatusCode::OK, -// validation_success: false, -// calculated_entropy: 5.0, -// }; + // // 5️⃣ Build OwnedBlobMatch stub + // let blob_id = BlobId::new(&token.as_bytes()); + // let mut owned_blob_match = OwnedBlobMatch { + // rule: slack_rule.into(), + // blob_id, + // finding_fingerprint: 0, + // matching_input_offset_span: OffsetSpan { start: 0, end: token.len() }, + // captures: SerializableCaptures { + // captures: vec![SerializableCapture { + // name: Some("TOKEN".to_string()), + // match_number: -1, + // start: 0, + // end: token.len(), + // value: token.into(), + // }], + // }, + // validation_response_body: String::new(), + // validation_response_status: StatusCode::OK, + // validation_success: false, + // calculated_entropy: 5.0, + // }; -// // 6️⃣ Prepare helpers and run validation -// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; -// let client = reqwest::Client::new(); -// let cache: Cache = Arc::new(SkipMap::new()); -// let dependent_vars = FxHashMap::default(); -// let missing_deps = FxHashMap::default(); + // // 6️⃣ Prepare helpers and run validation + // let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; + // let client = reqwest::Client::new(); + // let cache: Cache = Arc::new(SkipMap::new()); + // let dependent_vars = FxHashMap::default(); + // let missing_deps = FxHashMap::default(); -// validate_single_match( -// &mut owned_blob_match, -// &parser, -// &client, -// &dependent_vars, -// &missing_deps, -// &cache, -// ) -// .await; + // validate_single_match( + // &mut owned_blob_match, + // &parser, + // &client, + // &dependent_vars, + // &missing_deps, + // &cache, + // ) + // .await; -// // 7️⃣ Inspect outcome (true ⇒ credential considered ACTIVE) -// assert!( -// owned_blob_match.validation_success, -// "Slack webhook should be reported ACTIVE; body was {:?}", -// owned_blob_match.validation_response_body -// ); - -// Ok(()) -// } + // // 7️⃣ Inspect outcome (true ⇒ credential considered ACTIVE) + // assert!( + // owned_blob_match.validation_success, + // "Slack webhook should be reported ACTIVE; body was {:?}", + // owned_blob_match.validation_response_body + // ); + // Ok(()) + // } } diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs index d2e9290..03fc10f 100644 --- a/src/validation/httpvalidation.rs +++ b/src/validation/httpvalidation.rs @@ -269,7 +269,6 @@ pub async fn retry_request( } /// Return `true` when the body is very likely HTML. -/// fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool { // ---- 1. header heuristic --------------------------------------------- let header_says_html = headers @@ -289,7 +288,6 @@ fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool { header_says_html && body_looks_htmlish } - /// Validate the response by checking word and status matchers. pub fn validate_response( matchers: &[ResponseMatcher], @@ -298,7 +296,8 @@ pub fn validate_response( headers: &HeaderMap, html_allowed: bool, ) -> bool { - // Since match_all_types is always true here, we simply require all word and status conditions to hold. + // Since match_all_types is always true here, we simply require all word and status conditions + // to hold. let word_ok = matchers .iter() .filter_map(|m| { @@ -490,7 +489,7 @@ mod tests { r#type: "word-match".to_string(), words: vec!["invalid_token".to_string()], match_all_words: false, - negative: true, // body must *not* contain “invalid_token” + negative: true, // body must *not* contain “invalid_token” }, ]; @@ -498,10 +497,7 @@ mod tests { let body = "invalid_payload"; let status = StatusCode::BAD_REQUEST; // 400 let mut headers = HeaderMap::new(); - headers.insert( - header::CONTENT_TYPE, - HeaderValue::from_static("text/plain"), - ); + headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain")); // 3️⃣ Call validate_response with html_allowed = false let ok = validate_response(&matchers, body, &status, &headers, false); @@ -509,5 +505,4 @@ mod tests { // 4️⃣ It *should* be valid (true) because all matcher conditions hold assert!(ok, "Slack webhook response should be considered ACTIVE"); } - } diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 739765a..94206d0 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -101,6 +101,7 @@ rules: git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup, + ignore_tests: false, snippet_length: 64, }; diff --git a/tests/int_github.rs b/tests/int_github.rs index 19763ff..e541812 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -88,6 +88,7 @@ fn test_github_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, + ignore_tests: false, snippet_length: 256, }; // Create global arguments diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index fa13f05..ea0acaf 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -85,6 +85,7 @@ fn test_gitlab_remote_scan() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, + ignore_tests: false, snippet_length: 256, }; diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 4b9af83..7f9ca80 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -144,6 +144,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, // keep duplicates so the cache is stressed + ignore_tests: false, snippet_length: 128, }; diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 6ff3672..20ad438 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -87,6 +87,7 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, + ignore_tests: false, snippet_length: 256, }; @@ -150,6 +151,7 @@ impl TestContext { git_repo_timeout: 1800, // 30 minutes output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_dedup: true, + ignore_tests: false, snippet_length: 256, }; diff --git a/tests/smoke_check_rules.rs b/tests/smoke_check_rules.rs index d220285..7dff499 100644 --- a/tests/smoke_check_rules.rs +++ b/tests/smoke_check_rules.rs @@ -4,10 +4,8 @@ use std::process::Command; use assert_cmd::prelude::*; use predicates::prelude::*; - #[test] fn check_rules() -> anyhow::Result<()> { - // ── run kingfisher ──────────────────────────────────────────────── Command::cargo_bin("kingfisher")? .args([ From 441595689c671236977596c5288c8b2065936531 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 09:18:28 -0700 Subject: [PATCH 6/9] =?UTF-8?q?Introduced=20=20flag=20=E2=80=93=20skip=20f?= =?UTF-8?q?iles/dirs=20whose=20path=20resembles=20tests=20(,=20,=20,=20,?= =?UTF-8?q?=20),=20reducing=20noise.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 75fa268..96e91f3 100644 --- a/README.md +++ b/README.md @@ -101,7 +101,7 @@ kingfisher scan ~/src/myrepo --no-validate ### Display only secrets confirmed active by third‑party APIs ```bash -kingfisher scan ./service --only-valid +kingfisher scan /path/to/repo --only-valid ``` ### Output JSON and capture to a file @@ -113,7 +113,7 @@ kingfisher scan . --format json | tee kingfisher.json ### Output SARIF directly to disk ```bash -kingfisher scan . --format sarif --output findings.sarif +kingfisher scan /path/to/repo --format sarif --output findings.sarif ``` ### Pipe any text directly into Kingfisher by passing `-` @@ -137,6 +137,12 @@ kingfisher scan /path/to/repo --rule kingfisher.aws kingfisher scan /path/to/repo --rule-stats ``` +### Scan while ignoring likely test files +```bash +# Scan source but skip unit / integration tests +kingfisher scan ./my-project --ignore-tests +``` + --- ## Scanning GitHub From 488635e9c6e902e67195c05d91572bc7a196ee6a Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 09:20:44 -0700 Subject: [PATCH 7/9] updated test --- src/util.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util.rs b/src/util.rs index 153e1a1..0113df4 100644 --- a/src/util.rs +++ b/src/util.rs @@ -146,6 +146,7 @@ mod tests { "/project/spec/controllers/user_spec.rb", "C:\\repo\\fixtures\\config.json", "examples/hello_world/main.go", + "/home/user/scripts/local-testCert.pem", "samples/data/sample_input.txt", ]; From 331c04823909edb5e64cc224a787db3d1983b6e3 Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Jun 2025 09:22:13 -0700 Subject: [PATCH 8/9] updated test --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c2c6ab..c583719 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. ## [1.17.0] - Updated README to give proper attribution to Nosey Parker! -- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, squaare, sendgrid +- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, square, sendgrid, nasa, teamcity, truenas, shopify - Introduced `--ignore-tests` flag – skip files/dirs whose path resembles tests (`test`, `spec`, `fixture`, `example`, `sample`), reducing noise. ## [1.16.0] - Fix: HTML detection now requires both HTML content-type and " Date: Sat, 28 Jun 2025 09:30:22 -0700 Subject: [PATCH 9/9] updated test --- src/scanner/enumerate.rs | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index d58b50c..d9ca0f8 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -37,6 +37,7 @@ use crate::{ util::is_compressed_file, }, scanner_pool::ScannerPool, + util::is_test_like_path, EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator, FoundInput, GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator, PathBuf, }; @@ -188,11 +189,23 @@ pub fn enumerate_filesystem_inputs( Ok(Some((origin_set, blob_metadata, vec_of_matches))) => { for (_, single_match) in vec_of_matches { // Send each match - send_ds.send(( - Arc::new(origin_set.clone()), - Arc::new(blob_metadata.clone()), - single_match, - ))?; + let is_test = if args.ignore_tests { + origin_set + .iter() + .filter_map(|o| o.full_path()) + .any(|p| is_test_like_path(&p)) + } else { + false + }; + + if !is_test { + // Send each match + send_ds.send(( + Arc::new(origin_set.clone()), + Arc::new(blob_metadata.clone()), + single_match, + ))?; + } } } Err(e) => {