From edc88c4714563c4d9444b7c49a90a7a2a67e778b Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 07:08:22 -0700
Subject: [PATCH 1/9] Updated README
---
Makefile | 7 ++++
README.md | 107 ++++++++++++++++++++++++++++++++++--------------------
2 files changed, 75 insertions(+), 39 deletions(-)
diff --git a/Makefile b/Makefile
index fb8b5e3..da375d4 100644
--- a/Makefile
+++ b/Makefile
@@ -400,6 +400,13 @@ check-rust:
fi
tests:
+ @echo "🔍 checking for cargo-nextest …"
+ @if command -v cargo-nextest >/dev/null 2>&1; then \
+ echo "✅ cargo-nextest already present"; \
+ else \
+ echo "📦 installing cargo-nextest …"; \
+ cargo install --locked cargo-nextest || true; \
+ fi
@echo "▶ running tests …"; \
if command -v cargo-nextest >/dev/null 2>&1; then \
cargo nextest run --workspace --all-targets; \
diff --git a/README.md b/README.md
index 67098a3..3af9c20 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,13 @@
[](https://opensource.org/licenses/Apache-2.0)
Kingfisher is a blazingly fast secret‑scanning and validation tool built in Rust. It combines Intel’s hardware‑accelerated Hyperscan regex engine with language‑aware parsing via Tree‑Sitter, and **ships with hundreds of built‑in rules** to detect, validate, and triage secrets before they ever reach production
+
-**MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
+Kingfisher originated as a fork of **[Nosey Parker](https://github.com/praetorian-inc/noseyparker)** by Praetorian Security, Inc, and is built atop the incredible work contributed by the Nosey Parker community.
+
+
+- **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
## Key Features
@@ -17,12 +21,12 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru
- **Built-In Validation**: Hundreds of built-in detection rules, many with live-credential validators that call the relevant service APIs (AWS, Azure, GCP, Stripe, etc.) to confirm a secret is active. You can extend or override the library by adding YAML-defined rules on the command line—see [docs/RULES.md](/docs/RULES.md) for details
- **Git History Scanning**: Scan local repos, remote GitHub/GitLab orgs/users, or arbitrary GitHub/GitLab repos
-
## Getting Started
### Installation
On macOS, you can simply
+
```bash
brew install kingfisher
```
@@ -53,7 +57,6 @@ make darwin-all # builds both x64 and arm64
make all # builds for every OS and architecture supported
```
-
# Write Custom Rules!
Kingfisher ships with hundreds of rules with HTTP and service‑specific validation checks (AWS, Azure, GCP, etc.) to confirm if a detected string is a live credential.
@@ -71,6 +74,7 @@ Once you've done that, you can provide your custom rules (defined in a YAML file
> **Note** `kingfisher scan` detects whether the input is a Git repository or a plain directory—no extra flags required.
### Scan with secret validation
+
```bash
kingfisher scan /path/to/code
## NOTE: This path can refer to:
@@ -82,28 +86,32 @@ kingfisher scan /path/to/code
# `--git-history=none`
```
-
### Scan a directory containing multiple Git repositories
+
```bash
kingfisher scan /projects/mono‑repo‑dir
```
### Scan a Git repository without validation
+
```bash
kingfisher scan ~/src/myrepo --no-validate
```
### Display only secrets confirmed active by third‑party APIs
+
```bash
kingfisher scan ./service --only-valid
```
### Output JSON and capture to a file
+
```bash
kingfisher scan . --format json | tee kingfisher.json
```
### Output SARIF directly to disk
+
```bash
kingfisher scan . --format sarif --output findings.sarif
```
@@ -114,8 +122,9 @@ kingfisher scan . --format sarif --output findings.sarif
cat /path/to/file.py | kingfisher scan -
```
-### Scan using a rule *family* with one flag
-*(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws.*`)*
+### Scan using a rule _family_ with one flag
+
+_(prefix matching: `--rule kingfisher.aws` loads `kingfisher.aws._`)\*
```bash
# Only apply AWS-related rules (kingfisher.aws.1 + kingfisher.aws.2)
@@ -123,19 +132,23 @@ kingfisher scan /path/to/repo --rule kingfisher.aws
```
### Display rule performance statistics
+
```bash
kingfisher scan /path/to/repo --rule-stats
```
---
+
## Scanning GitHub
### Scan GitHub organisation (requires `KF_GITHUB_TOKEN`)
+
```bash
kingfisher scan --github-organization my-org
```
### Scan remote GitHub repository
+
```bash
kingfisher scan --git-url https://github.com/org/repo.git
@@ -143,70 +156,84 @@ kingfisher scan --git-url https://github.com/org/repo.git
KF_GITHUB_TOKEN="ghp_…" kingfisher scan --git-url https://github.com/org/private_repo.git
```
+
---
+
## Scanning GitLab
### Scan GitLab group (requires `KF_GITLAB_TOKEN`)
+
```bash
kingfisher scan --gitlab-group my-group
```
### Scan GitLab user
+
```bash
kingfisher scan --gitlab-user johndoe
```
### Scan remote GitLab repository by URL
+
```bash
kingfisher scan --git-url https://gitlab.com/group/project.git
```
### List GitLab repositories
+
```bash
kingfisher gitlab repos list --group my-group
```
---
+
## Environment Variables for Tokens
-| Variable | Purpose |
-|---------------------|---------------------------------------|
-| `KF_GITHUB_TOKEN` | GitHub Personal Access Token |
-| `KF_GITLAB_TOKEN` | GitLab Personal Access Token |
+| Variable | Purpose |
+| ----------------- | ---------------------------- |
+| `KF_GITHUB_TOKEN` | GitHub Personal Access Token |
+| `KF_GITLAB_TOKEN` | GitLab Personal Access Token |
Set them temporarily per command:
+
```bash
KF_GITLAB_TOKEN="glpat-…" kingfisher scan --gitlab-group my-group
```
+
Or export for the session:
+
```bash
export KF_GITLAB_TOKEN="glpat-…"
```
-*If no token is provided Kingfisher still works for public repositories.*
+_If no token is provided Kingfisher still works for public repositories._
---
+
## Exit Codes
-| Code | Meaning |
-|------|-------------------------------------|
-| 0 | No findings |
-| 200 | Findings discovered |
-| 205 | Validated findings discovered |
+| Code | Meaning |
+| ---- | ----------------------------- |
+| 0 | No findings |
+| 200 | Findings discovered |
+| 205 | Validated findings discovered |
---
### Update Checks
+
Kingfisher checks for newer releases on GitHub each time it starts and exits, printing whether a new version is available. Use `--self-update` to automatically download and replace the binary when an update is found. Add `--no-update-check` to disable these checks entirely.
---
-
### List Builtin Rules
+
```bash
kingfisher rules list
```
+
### To scan using **only** your own `my_rules.yaml` you could run:
+
```bash
kingfisher scan \
--load-builtins=false \
@@ -224,6 +251,7 @@ kingfisher scan \
```
## Other Examples
+
```bash
# Check custom rules - this ensures all regular expressions compile, and can match the rule's `examples` in the YML file
kingfisher rules check --rules-path ./my_rules.yml
@@ -235,6 +263,7 @@ kingfisher github repos list --organization my-org
```
## Notable Scan Options
+
- `--no-dedup`: Report every occurrence of a finding (disable the default de-duplicate behavior)
- `--confidence `: (low|medium|high)
- `--min-entropy `: Override default threshold
@@ -245,15 +274,15 @@ kingfisher github repos list --organization my-org
## Finding Fingerprint
-The document below details the four-field formula (rule SHA-1, origin label, start & end offsets) hashed with XXH3-64 to create Kingfisher’s 64-bit finding fingerprint, and explains how this ID powers safe deduplication; plus how `--no-dedup` can be used shows every raw match.
+The document below details the four-field formula (rule SHA-1, origin label, start & end offsets) hashed with XXH3-64 to create Kingfisher’s 64-bit finding fingerprint, and explains how this ID powers safe deduplication; plus how `--no-dedup` can be used shows every raw match.
See ([docs/FINGERPRINT.md](docs/FINGERPRINT.md))
-
## Rule Performance Profiling
+
Use `--rule-stats` to collect timing information for every rule. After scanning, the summary prints a **Rule Performance Stats** section showing how many matches each rule produced along with its slowest and average match times. Useful when creating rules or debugging rules.
-
## CLI Options
+
```bash
kingfisher scan --help
```
@@ -265,36 +294,39 @@ By integrating Kingfisher into your development lifecycle, you can:
- **Prevent Costly Breaches**
Early detection of embedded credentials avoids expensive incident response, legal fees, and reputation damage
- **Automate Compliance**
- Enforce secret‑scanning policies across GitOps, CI/CD, and pull requests to help satisfy SOC 2, PCI‑DSS, GDPR, and other standards
+ Enforce secret‑scanning policies across GitOps, CI/CD, and pull requests to help satisfy SOC 2, PCI‑DSS, GDPR, and other standards
- **Reduce Noise, Focus on Real Threats**
Validation logic filters out false positives and highlights only active, valid secrets (`--only-valid`)
- **Accelerate Dev Workflows**
Run in parallel across dozens of languages, integrate with GitHub Actions or any pipeline, and shift security left to minimize delays
-
## The Risk of Leaked Secrets
Embedding credentials in code repositories is a pervasive, ever‑present risk that leads directly to data breaches:
1. **Uber (2016)**
- - *Incident*: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers.
- - *Sources*: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/)
+
+ - _Incident_: Attackers stole GitHub credentials, retrieved an AWS key from a developer’s private repo, and accessed data on 57 million riders and 600 000 drivers.
+ - _Sources_: [BBC News](https://www.bbc.com/news/technology-42075306), [Ars Technica](https://arstechnica.com/tech-policy/2017/11/report-uber-paid-hackers-100000-to-keep-2016-data-breach-quiet/)
2. **AWS**
- - *Incident*: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo.
- - *Sources*: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more)
+
+ - _Incident_: An AWS engineer accidentally published log files and CloudFormation templates containing AWS key pairs (including “rootkey.csv”) to a public GitHub repo.
+ - _Sources_: [The Register](https://www.theregister.com/2020/01/23/aws_engineer_credentials_github/), [UpGuard](https://www.upguard.com/breaches/identity-and-access-misstep-how-an-amazon-engineer-exposed-credentials-and-more)
3. **Infosys**
- - *Incident*: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year.
- - *Sources*: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/)
+
+ - _Incident_: Infosys published an internal PyPI package embedding a FullAdminAccess AWS key for a Johns Hopkins data bucket; the key remained active for over a year.
+ - _Sources_: [The Stack](https://www.thestack.technology/infosys-leak-aws-key-exposed-on-pypi/), [Tom Forbes Blog](https://tomforb.es/blog/infosys-leak/)
4. **Microsoft**
- - *Incident*: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages).
- - *Sources*: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/)
+
+ - _Incident_: Microsoft’s AI research GitHub repo included an overly permissive Azure SAS token, exposing 38 TB of private data (workstation backups, 30,000+ Teams messages).
+ - _Sources_: [Wiz Blog](https://www.wiz.io/blog/38-terabytes-of-private-data-accidentally-exposed-by-microsoft-ai-researchers), [TechCrunch](https://techcrunch.com/2023/09/18/microsoft-ai-researchers-accidentally-exposed-terabytes-of-internal-sensitive-data/)
5. **GitHub**
- - *Incident*: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution.
- - *Sources*: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/)
+ - _Incident_: GitHub discovered its RSA SSH host private key was briefly exposed in a public repository and rotated it out of caution.
+ - _Sources_: [GitHub Blog](https://github.blog/news-insights/company-news/we-updated-our-rsa-ssh-host-key/)
Left unchecked, leaked secrets can lead to unauthorized access, pivoting within your environment, regulatory fines, and brand‑damaging incident response costs.
@@ -304,14 +336,11 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md))
# Roadmap
- - More rules
- - Auto-updater
- - Packages for Linux (deb, rpm)
- - Please file a [feature request](https://github.com/mongodb/kingfisher/issues) if you have specific features you'd like added
-
+- More rules
+- Auto-updater
+- Packages for Linux (deb, rpm)
+- Please file a [feature request](https://github.com/mongodb/kingfisher/issues) if you have specific features you'd like added
# License
[Apache2 License](LICENSE)
-
-
From 26b5b7918fa87cc88dc0ae75ba365fba62863704 Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 08:34:15 -0700
Subject: [PATCH 2/9] Added ruels for sonarcloud, sonarqube, sourcegraph
---
README.md | 4 +-
data/rules/netlify.yml | 8 ++--
data/rules/sonarcloud.yml | 35 ++++++++++++++++
data/rules/sonarqube.yml | 58 ++++++++++++++++++++++++++
data/rules/sourcegraph.yml | 85 ++++++++++++++++++++++++++++++++++++++
src/matcher.rs | 2 -
6 files changed, 183 insertions(+), 9 deletions(-)
create mode 100644 data/rules/sonarcloud.yml
create mode 100644 data/rules/sonarqube.yml
create mode 100644 data/rules/sourcegraph.yml
diff --git a/README.md b/README.md
index 3af9c20..8f7af32 100644
--- a/README.md
+++ b/README.md
@@ -9,10 +9,10 @@ Kingfisher is a blazingly fast secret‑scanning and validation tool built in Ru
-Kingfisher originated as a fork of **[Nosey Parker](https://github.com/praetorian-inc/noseyparker)** by Praetorian Security, Inc, and is built atop the incredible work contributed by the Nosey Parker community.
+Kingfisher originated as a fork of **[Nosey Parker](https://github.com/praetorian-inc/noseyparker)** by Praetorian Security, Inc, and is built atop their incredible work and the work contributed by the Nosey Parker community.
-- **MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
+**MongoDB Blog**: [Introducing Kingfisher: Real-Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
## Key Features
diff --git a/data/rules/netlify.yml b/data/rules/netlify.yml
index eaf8399..0a29d25 100644
--- a/data/rules/netlify.yml
+++ b/data/rules/netlify.yml
@@ -14,8 +14,6 @@ rules:
examples:
- netlify_token=3cdfad7b885a6daceff3fb820389115750b373763fb30b10ca0382648b55872d
- netlify_secret=7a9ef2c84d6b3e5f1c8a0b9d2e4f6a8c7b3d5e9f2a1c8b4d6e3f5a9c7b2d8e4
- references:
- - https://howtorotate.com/docs/tutorials/netlify/
validation:
type: Http
content:
@@ -39,15 +37,15 @@ rules:
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
(?:.|[\n\r]){0,32}?
\b
- ([A-Z0-9_-]{43,45})
+ (
+ [A-Z0-9_-]{43,45}
+ )
\b
min_entropy: 3.5
confidence: medium
examples:
- netlify_token=G5yT54abRasekrOpe7SaArsowiuHTeR45sfEhsH-K1L2
- netlify_key=H7xZ98cdWbsemqNpv8UaXtsnyjKgVeQ34rsDkpM-N5P6
- references:
- - https://howtorotate.com/docs/tutorials/netlify/
validation:
type: Http
content:
diff --git a/data/rules/sonarcloud.yml b/data/rules/sonarcloud.yml
new file mode 100644
index 0000000..f0b874d
--- /dev/null
+++ b/data/rules/sonarcloud.yml
@@ -0,0 +1,35 @@
+rules:
+ - name: SonarCloud API Token
+ id: kingfisher.sonarcloud.1
+ pattern: |
+ (?xi)
+ \b
+ sonar
+ (?:.|[\n\r]){0,32}?
+ (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
+ (?:.|[\n\r]){0,32}?
+ \b
+ (
+ [0-9a-z]{40}
+ )
+ \b
+ min_entropy: 2.5
+ examples:
+ - sonar_api_token=abcdef0123456789abcdef0123456789abcdef23
+ validation:
+ type: Http
+ content:
+ request:
+ headers:
+ Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}"
+ Accept: application/json
+ method: GET
+ url: https://sonarcloud.io/api/user_tokens/search
+ response_matcher:
+ - report_response: true
+ - type: StatusMatch
+ status: [200]
+ - type: WordMatch
+ match_all_words: true
+ words:
+ - '"tokens":'
diff --git a/data/rules/sonarqube.yml b/data/rules/sonarqube.yml
new file mode 100644
index 0000000..ceddbee
--- /dev/null
+++ b/data/rules/sonarqube.yml
@@ -0,0 +1,58 @@
+rules:
+ - name: SonarQube API Key
+ id: kingfisher.sonarqube.1
+ pattern: |
+ (?xi)
+ \b
+ (
+ (?:sq[pua])
+ _[a-z0-9]{40}
+ )
+ min_entropy: 3.5
+ examples:
+ - sonar.login=sqp_4b78f8494075e310d62dfdcaeb14be2c78fca2fc
+ - sonar.login=squ_4b78f8494075e310d62dfdcaeb14be2c78fca2fc
+ validation:
+ type: Http
+ content:
+ request:
+ headers:
+ Authorization: "Basic {{ TOKEN | append: ':' | b64enc }}"
+ method: GET
+ response_matcher:
+ - report_response: true
+ - status:
+ - 200
+ type: StatusMatch
+ url: '{{ SONARHOST }}/api/user_tokens/search'
+ depends_on_rule:
+ - rule_id: kingfisher.sonarqube.2
+ variable: SONARHOST
+
+ - name: SonarQube Host
+ id: kingfisher.sonarqube.2
+ pattern: |
+ (?xi)
+ sonar.{0,8}host
+ (?:.|[\n\r]){0,64}?
+ \b
+ (
+ https?://.*?:\d{2,6}
+ )
+ \b
+ min_entropy: 3.5
+ visible: false
+ examples:
+ - sonar.host=https://sonar.internal.company.com:9000
+
+ - name: SonarQube Token
+ id: kingfisher.sonarqube.3
+ pattern: '(?i)sonar.{0,5}login.{0,5}\s*\b([a-f0-9]{40})\b'
+ min_entropy: 3.3
+ confidence: medium
+ examples:
+ - 'sonar.host.url=https://sonarcloud.io -Dsonar.login=5524bf449ca45fcace54698371466398321f3a82'
+ - "sonar.login', '826de5590c75919a8317fdface58206eebe7ebbc"
+ - '$sonarLogin = "4924be8f51f3e738c97db2c4ace51db7e938f28b"'
+ references:
+ - https://docs.sonarqube.org/latest/user-guide/user-token/
\ No newline at end of file
diff --git a/data/rules/sourcegraph.yml b/data/rules/sourcegraph.yml
new file mode 100644
index 0000000..965d99a
--- /dev/null
+++ b/data/rules/sourcegraph.yml
@@ -0,0 +1,85 @@
+rules:
+ - name: Sourcegraph Access Token
+ id: kingfisher.sourcegraph.1
+ pattern: |
+ (?xi)
+ \b
+ sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40}
+ \b
+ min_entropy: 3.3
+ examples:
+ - sgp_210f1131b08e93adcfc3f05faa2d768ff883a61f
+ validation:
+ type: Http
+ content:
+ request:
+ method: POST
+ url: https://sourcegraph.com/.api/graphql
+ headers:
+ Authorization: "token {{ TOKEN }}"
+ Content-Type: application/json
+ body: |
+ { "query": "query ValidateToken { site { id } }" }
+ response_matcher:
+ - report_response: true
+ - type: StatusMatch
+ status: [200]
+ - type: WordMatch
+ words: ['"site":{']
+ match_all_words: true
+
+ - name: Sourcegraph _Legacy_ API Key
+ id: kingfisher.sourcegraph.2
+ pattern: |
+ (?xi)
+ \b
+ (?:sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40}|[a-f0-9]{40})
+ \b
+ min_entropy: 3.5
+ confidence: medium
+ examples:
+ - sgp_abcdef1234567890_local_abcdef12345678901234567890abcdef12345678
+ validation:
+ type: Http
+ content:
+ request:
+ method: POST
+ url: https://sourcegraph.com/.api/graphql
+ headers:
+ Authorization: "token {{ TOKEN }}"
+ Content-Type: application/json
+ body: |
+ { "query": "query ValidateToken { site { id } }" }
+ response_matcher:
+ - report_response: true
+ - type: StatusMatch
+ status: [200]
+ - type: WordMatch
+ words: ['"site":{']
+
+ - name: Sourcegraph Cody Gateway Key
+ id: kingfisher.sourcegraph.3
+ pattern: |
+ (?xi)
+ \b
+ slk_[a-f0-9]{64}
+ \b
+ min_entropy: 3.5
+ confidence: medium
+ examples:
+ - slk_27b0a1f1926e7376dd8bdfcb0ade3c397c462b6e68c854a5521a17dd2b704ce6
+ validation:
+ type: Http
+ content:
+ request:
+ method: GET
+ url: https://cody-gateway.sourcegraph.com/v1/limits
+ headers:
+ Authorization: "Bearer {{ TOKEN }}"
+ response_matcher:
+ - report_response: true
+ - type: StatusMatch
+ status: [200]
+ - type: WordMatch
+ words: ['"token"', '"limit"']
+ match_all_words: true
diff --git a/src/matcher.rs b/src/matcher.rs
index d7ae76d..d90c5ef 100644
--- a/src/matcher.rs
+++ b/src/matcher.rs
@@ -1,6 +1,5 @@
use std::{
borrow::Cow,
- // collections::{HashMap, HashSet},
hash::{Hash, Hasher},
io::Write,
str,
@@ -60,7 +59,6 @@ pub struct OwnedBlobMatch {
pub blob_id: BlobId,
/// The unique content-based identifier of this match
pub finding_fingerprint: u64,
- // pub matching_input: Vec,
pub matching_input_offset_span: OffsetSpan,
pub captures: SerializableCaptures,
pub validation_response_body: String,
From 0a5cabbb5113de6d2632c3fc2a590be14afc985c Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 08:44:36 -0700
Subject: [PATCH 3/9] Added rules for shopify, truenas
---
data/rules/shopify.yml | 40 ++++++++++++++++++++++++++++++++++
data/rules/truenas.yml | 49 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+)
create mode 100644 data/rules/shopify.yml
create mode 100644 data/rules/truenas.yml
diff --git a/data/rules/shopify.yml b/data/rules/shopify.yml
new file mode 100644
index 0000000..da340e5
--- /dev/null
+++ b/data/rules/shopify.yml
@@ -0,0 +1,40 @@
+rules:
+ - name: Shopify access token
+ id: kingfisher.shopify.1
+ pattern: |
+ (?xi)
+ \b
+ (
+ (?:shpat|shpca|shppa|shpss)_[a-f0-9]{30,34}
+ )
+ \b
+ min_entropy: 3.5
+ examples:
+ - 'shopify_app_secret: "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"'
+ validation:
+ type: Http
+ content:
+ request:
+ headers:
+ X-Shopify-Access-Token: '{{ TOKEN }}'
+ method: GET
+ response_matcher:
+ - report_response: true
+ - type: StatusMatch
+ status:
+ - 200
+ - type: WordMatch
+ match_all_words: true
+ words: ['"shop":']
+ url: https://{{ DOMAIN }}/admin/api/2024-10/shop.json
+ depends_on_rule:
+ - rule_id: "kingfisher.shopify.2"
+ variable: DOMAIN
+
+ - name: Shopify Domain
+ id: kingfisher.shopify.2
+ pattern: (?xi)([a-z0-9-]+\.myshopify\.com)
+ min_entropy: 3.0
+ visible: false
+ examples:
+ - example.myshopify.com
\ No newline at end of file
diff --git a/data/rules/truenas.yml b/data/rules/truenas.yml
new file mode 100644
index 0000000..9e0067a
--- /dev/null
+++ b/data/rules/truenas.yml
@@ -0,0 +1,49 @@
+rules:
+ - name: TrueNAS API Key (WebSocket)
+ id: kingfisher.truenas.1
+ pattern: |
+ (?x)
+ "params"\s*:\s*\[\s*"
+ (\d+-[a-zA-Z0-9]{64})
+ "\s*\]
+ min_entropy: 3.3
+ confidence: medium
+ examples:
+ - '{"id":"3286a508-a6ca-278a-c078-85b2b515d8d2", "msg":"method", "method":"auth.login_with_api_key", "params":["8-Lp22ov7halMBLUpG97Wg4y7fibQi3CW19VJiZcCu746zgCs0mdDdTCoOcpgEucgu"]}'
+ - '{"id":"677d9914-f598-f497-e77e-2a3aadbb822e", "msg":"method", "method":"auth.login_with_api_key", "params" : ["9-hTSZDBPyg0PjRZvWb8omoxJ7X2gAjRGmiPKql9ENGIUP9OPtEAzz5f6g9YIMVbZT"]}'
+ - '{"id":"2755dad4-cc12-94bb-a894-ba0f85c3fdbf", "msg":"method", "method":"auth.login_with_api_key", "params" : [ "10-6LZBVhNq8zze0rzXJptfSWDBoskWuThnQb3fUVw4sVNgJ7GKT3ITVIovhwPf34oL" ]}'
+ - |
+ {
+ "id": "2755dad4-cc12-94bb-a894-ba0f85c3fdbf",
+ "msg": "method",
+ "method": "auth.login_with_api_key",
+ "params": [
+ "10-6LZBVhNq8zze0rzXJptfSWDBoskWuThnQb3fUVw4sVNgJ7GKT3ITVIovhwPf34oL"
+ ]
+ }
+ references:
+ - https://www.truenas.com/docs/api/core_websocket_api.html
+ - https://www.truenas.com/docs/api/scale_rest_api.html
+ - https://www.truenas.com/docs/scale/scaletutorials/toptoolbar/managingapikeys/
+ - https://www.truenas.com/docs/scale/scaleclireference/auth/cliapikey/
+ - https://www.truenas.com/docs/scale/api/
+ - https://www.truenas.com/community/threads/api-examples-in-perl-python.108053/
+
+ - name: TrueNAS API Key (REST API)
+ id: kingfisher.truenas.2
+ pattern: |
+ (?x)
+ Bearer\s*
+ (\d+-[a-zA-Z0-9]{64})
+ \b
+ min_entropy: 3.3
+ confidence: medium
+ examples:
+ - 'curl -X POST "http://192.168.0.30/api/v2.0/device/get_info" -H "Content-Type: application/json" -H "Authorization: Bearer 8-Lp22ov7halMBLUpG97Wg4y7fibQi3CW19VJiZcCu746zgCs0mdDdTCoOcpgEucgu" -d "\"SERIAL\""'
+ references:
+ - https://www.truenas.com/docs/api/core_websocket_api.html
+ - https://www.truenas.com/docs/api/scale_rest_api.html
+ - https://www.truenas.com/docs/scale/scaletutorials/toptoolbar/managingapikeys/
+ - https://www.truenas.com/docs/scale/scaleclireference/auth/cliapikey/
+ - https://www.truenas.com/docs/scale/api/
+ - https://www.truenas.com/community/threads/api-examples-in-perl-python.108053/
\ No newline at end of file
From 1bf87935c862e186d1f3b2c0115f60c4ccf57d0b Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 09:05:19 -0700
Subject: [PATCH 4/9] added rules for nasa, teamcity
---
CHANGELOG.md | 4 +++
Cargo.toml | 2 +-
data/rules/nasa.yml | 35 ++++++++++++++++++
data/rules/sendgrid.yml | 38 ++++++++++++++++++++
data/rules/square.yml | 80 +++++++++++++++++++++++++++++++++++++++++
data/rules/teamcity.yml | 18 ++++++++++
6 files changed, 176 insertions(+), 1 deletion(-)
create mode 100644 data/rules/nasa.yml
create mode 100644 data/rules/sendgrid.yml
create mode 100644 data/rules/square.yml
create mode 100644 data/rules/teamcity.yml
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a9a31a6..05e4d68 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,10 @@
All notable changes to this project will be documented in this file.
+## [1.17.0]
+- Updated README to give proper attribution to Nosey Parker!
+- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, squaare, sendgrid
+
## [1.16.0]
- Fix: HTML detection now requires both HTML content-type and "'
+ references:
+ - https://www.jetbrains.com/help/teamcity/rest/teamcity-rest-api-documentation.html
From 28af26b23aaa9c60c9ab446b9d45d9099c0ad67a Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 09:16:42 -0700
Subject: [PATCH 5/9] =?UTF-8?q?Introduced=20=20flag=20=E2=80=93=20skip=20f?=
=?UTF-8?q?iles/dirs=20whose=20path=20resembles=20tests=20(,=20,=20,=20,?=
=?UTF-8?q?=20),=20reducing=20noise.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
CHANGELOG.md | 2 +-
README.md | 1 +
data/rules/nasa.yml | 14 +-
src/cli/commands/scan.rs | 4 +
src/main.rs | 1 +
src/reporter/json_format.rs | 1 +
src/reporter/pretty_format.rs | 1 +
src/update.rs | 37 ++----
src/util.rs | 62 ++++++++-
src/validation.rs | 217 ++++++++++++++-----------------
src/validation/httpvalidation.rs | 13 +-
tests/int_dedup.rs | 1 +
tests/int_github.rs | 1 +
tests/int_gitlab.rs | 1 +
tests/int_validation_cache.rs | 1 +
tests/int_vulnerable_files.rs | 2 +
tests/smoke_check_rules.rs | 2 -
17 files changed, 201 insertions(+), 160 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05e4d68..3c2c6ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file.
## [1.17.0]
- Updated README to give proper attribution to Nosey Parker!
- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, squaare, sendgrid
-
+- Introduced `--ignore-tests` flag – skip files/dirs whose path resembles tests (`test`, `spec`, `fixture`, `example`, `sample`), reducing noise.
## [1.16.0]
- Fix: HTML detection now requires both HTML content-type and "`: Specifies how deep nested archives should be extracted and scanned (default: 2)
- `--redact`: Replaces discovered secrets with a one-way hash for secure output
+- `--ignore-tests`: Skip files or directories whose path component contains _test_, _spec_, _fixture_, _example_, or _sample_ (case-insensitive)
## Finding Fingerprint
diff --git a/data/rules/nasa.yml b/data/rules/nasa.yml
index c7b56d6..efbcc42 100644
--- a/data/rules/nasa.yml
+++ b/data/rules/nasa.yml
@@ -26,10 +26,10 @@ rules:
url: https://api.nasa.gov/planetary/apod?date=1995-06-16&api_key={{ TOKEN }}
headers:
Accept: application/json
- response_matcher:
- - report_response: true
- - type: StatusMatch
- status: [200]
- - type: WordMatch
- words:
- - '"url":'
+ response_matcher:
+ - report_response: true
+ - type: StatusMatch
+ status: [200]
+ - type: WordMatch
+ words:
+ - '"url":'
diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs
index 9d3fc1f..36726e0 100644
--- a/src/cli/commands/scan.rs
+++ b/src/cli/commands/scan.rs
@@ -88,6 +88,10 @@ pub struct ScanArgs {
#[arg(long, default_value_t = false)]
pub no_dedup: bool,
+ /// Ignore matches that appear to come from test files
+ #[arg(long, default_value_t = false)]
+ pub ignore_tests: bool,
+
/// Redact findings values using a secure hash
#[arg(long, short = 'r', default_value_t = false)]
pub redact: bool,
diff --git a/src/main.rs b/src/main.rs
index 0a1e07b..280422c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -289,6 +289,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
redact: false,
git_repo_timeout: 1800,
no_dedup: false,
+ ignore_tests: false,
snippet_length: 256,
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
}
diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs
index 9ccbc29..ec45e83 100644
--- a/src/reporter/json_format.rs
+++ b/src/reporter/json_format.rs
@@ -389,6 +389,7 @@ mod tests {
cli::commands::scan::ScanArgs {
num_jobs: 1,
no_dedup: false,
+ ignore_tests: false,
rules: RuleSpecifierArgs {
rules_path: Vec::new(),
rule: vec!["all".into()],
diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs
index 6d6f398..740f82a 100644
--- a/src/reporter/pretty_format.rs
+++ b/src/reporter/pretty_format.rs
@@ -312,6 +312,7 @@ fn test_pretty_format_with_nan_entropy_panics() {
// core execution / performance
num_jobs: 1,
no_dedup: false,
+ ignore_tests: false,
// rule selection
rules: RuleSpecifierArgs {
diff --git a/src/update.rs b/src/update.rs
index 4758ab1..bd0f212 100644
--- a/src/update.rs
+++ b/src/update.rs
@@ -7,8 +7,8 @@
//
// Version handling logic covers three scenarios:
// 1. Running version == latest release → "up to date".
-// 2. Running version > latest release → print a notice that the binary is
-// **newer** than anything on GitHub (e.g. a dev build).
+// 2. Running version > latest release → print a notice that the binary is **newer** than
+// anything on GitHub (e.g. a dev build).
// 3. Latest release > running version → offer to self‑update.
//
// All informational messages are printed with the
@@ -25,10 +25,7 @@ use self_update::{backends::github::Update, cargo_crate_version, errors::Error a
use semver::Version;
use tracing::{error, info, warn};
-use crate::{
- cli::global::GlobalArgs,
- reporter::styles::Styles,
-};
+use crate::{cli::global::GlobalArgs, reporter::styles::Styles};
/// Return `true` when the canonical executable path lives inside a Homebrew Cellar.
/// Works for Intel macOS (/usr/local/Cellar), Apple‑Silicon macOS (/opt/homebrew/Cellar)
@@ -38,16 +35,13 @@ fn installed_via_homebrew() -> bool {
std::env::current_exe().ok().and_then(|p| fs::canonicalize(p).ok())
}
- canonical_exe()
- .map(|p| p.components().any(|c| c.as_os_str() == "Cellar"))
- .unwrap_or(false)
+ canonical_exe().map(|p| p.components().any(|c| c.as_os_str() == "Cellar")).unwrap_or(false)
}
/// Check GitHub for a newer Kingfisher release and optionally self‑update.
///
/// * `base_url` lets tests point at a mock server.
-/// * Self‑update is skipped when the user disabled it **or** the binary is a
-/// Homebrew install.
+/// * Self‑update is skipped when the user disabled it **or** the binary is a Homebrew install.
pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Option {
if global_args.no_update_check {
return None;
@@ -61,14 +55,13 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt
if is_brew {
info!(
"{}",
- styles
- .style_finding_active_heading
- .apply_to("Homebrew install detected – will notify about updates but not self‑update")
+ styles.style_finding_active_heading.apply_to(
+ "Homebrew install detected – will notify about updates but not self‑update"
+ )
);
}
- info!(
- "{}","Checking for updates…");
+ info!("{}", "Checking for updates…");
let mut builder = Update::configure();
builder
@@ -124,15 +117,11 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt
// Try semantic version comparison. If parsing fails, fall back to the
// self‑update code‑path (which will treat the strings lexicographically).
- if let (Ok(curr), Ok(latest)) = (
- Version::parse(running_v),
- Version::parse(&release.version),
- ) {
+ if let (Ok(curr), Ok(latest)) = (Version::parse(running_v), Version::parse(&release.version)) {
// ───────── Case 2: running > latest (dev build) ─────────
if curr > latest {
- let plain = format!(
- "Running Kingfisher {curr} which is newer than latest released {latest}"
- );
+ let plain =
+ format!("Running Kingfisher {curr} which is newer than latest released {latest}");
info!("{}", styles.style_finding_active_heading.apply_to(&plain));
return Some(plain);
}
@@ -171,7 +160,7 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Opt
"{}",
styles
.style_finding_active_heading
- .apply_to("Run `brew upgrade kingfisher` to install the new version.")
+ .apply_to("Run `brew upgrade kingfisher` to install the new version.")
);
}
diff --git a/src/util.rs b/src/util.rs
index 3edf138..153e1a1 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -108,6 +108,27 @@ pub fn is_base64(input: &str) -> bool {
.bytes()
.all(|b| matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'+' | b'/' | b'='))
}
+
+/// Heuristic check whether a path points to test files or directories.
+///
+/// Looks for common substrings like "test", "tests", "spec", "fixture", or
+/// "example" in any path component. Case-insensitive.
+pub fn is_test_like_path(path: &Path) -> bool {
+ path.components().any(|c| {
+ if let std::path::Component::Normal(os) = c {
+ if let Some(name) = os.to_str() {
+ let name = name.to_ascii_lowercase();
+ return name.contains("test")
+ || name.contains("spec")
+ || name.contains("fixture")
+ || name.contains("example")
+ || name.contains("sample");
+ }
+ }
+ false
+ })
+}
+
#[cfg(test)]
mod tests {
use std::{
@@ -115,7 +136,46 @@ mod tests {
path::PathBuf,
};
- use super::*;
+ use super::{is_test_like_path, *};
+
+ /// Paths that **should** be classified as test-like.
+ #[test]
+ fn test_is_test_like_path_positive() {
+ let positives = [
+ "src/tests/helpers.rs",
+ "/project/spec/controllers/user_spec.rb",
+ "C:\\repo\\fixtures\\config.json",
+ "examples/hello_world/main.go",
+ "samples/data/sample_input.txt",
+ ];
+
+ for p in positives {
+ assert!(
+ is_test_like_path(Path::new(p)),
+ "Path {p:?} was expected to be test-like but was not"
+ );
+ }
+ }
+
+ /// Paths that **should not** be classified as test-like.
+ #[test]
+ fn test_is_test_like_path_negative() {
+ let negatives = [
+ "src/main.rs",
+ "/opt/service/config/production.yml",
+ "C:\\Program Files\\app\\README.md",
+ "docs/architecture/overview.md",
+ "assets/images/logo.png",
+ ];
+
+ for p in negatives {
+ assert!(
+ !is_test_like_path(Path::new(p)),
+ "Path {p:?} was incorrectly classified as test-like"
+ );
+ }
+ }
+
#[test]
fn test_counted_display_regular() {
let single = Counted::regular(1, "rule");
diff --git a/src/validation.rs b/src/validation.rs
index b45ff3b..4a056f3 100644
--- a/src/validation.rs
+++ b/src/validation.rs
@@ -990,131 +990,116 @@ rules:
Ok(())
}
-// // ────────────────────────────────────────────────────────────────
-// // Slack Webhook – end-to-end validation test
-// // ────────────────────────────────────────────────────────────────
-// #[tokio::test]
-// async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> {
-// use std::sync::Arc;
+ // // ────────────────────────────────────────────────────────────────
+ // // Slack Webhook – end-to-end validation test
+ // // ────────────────────────────────────────────────────────────────
+ // #[tokio::test]
+ // async fn test_actual_slack_webhook_validation() -> anyhow::Result<()> {
+ // use std::sync::Arc;
-// use crossbeam_skiplist::SkipMap;
-// use http::StatusCode;
-// use rustc_hash::FxHashMap;
+ // use crossbeam_skiplist::SkipMap;
+ // use http::StatusCode;
+ // use rustc_hash::FxHashMap;
-// use crate::{
-// blob::BlobId,
-// liquid_filters::register_all,
-// location::OffsetSpan,
-// matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
-// rules::{
-// rule::{Confidence, Rule},
-// Rules,
-// },
-// validation::{validate_single_match, Cache},
-// };
+ // use crate::{
+ // blob::BlobId,
+ // liquid_filters::register_all,
+ // location::OffsetSpan,
+ // matcher::{OwnedBlobMatch, SerializableCapture, SerializableCaptures},
+ // rules::{
+ // rule::{Confidence, Rule},
+ // Rules,
+ // },
+ // validation::{validate_single_match, Cache},
+ // };
-// // 1️⃣ YAML snippet with the **exact** Slack rule
-// let slack_yaml = r#"
-// rules:
-// - name: Slack Webhook
-// id: kingfisher.slack.4
-// pattern: |
-// (?xi)
-// \b
-// (
-// https://hooks\.slack\.com/services/
-// T[a-z0-9_-]{8,12}/
-// B[a-z0-9_-]{8,12}/
-// [a-z0-9_-]{20,30}
-// )
-// \b
-// min_entropy: 3.3
-// confidence: medium
-// examples:
-// - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV
-// - https://hooks.slack.com/services/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
-// validation:
-// type: Http
-// content:
-// request:
-// headers:
-// Content-Type: application/json
-// method: POST
-// response_matcher:
-// - report_response: true
-// - type: WordMatch
-// words:
-// - invalid_payload
-// - type: WordMatch
-// words:
-// - "invalid_token"
-// negative: true
-// url: "{{ TOKEN }}"
-// "#;
+ // // 1️⃣ YAML snippet with the **exact** Slack rule
+ // let slack_yaml = r#"
+ // rules:
+ // - name: Slack Webhook id: kingfisher.slack.4 pattern: | (?xi) \b ( https://hooks\.slack\.com/services/
+ // T[a-z0-9_-]{8,12}/ B[a-z0-9_-]{8,12}/ [a-z0-9_-]{20,30} ) \b min_entropy: 3.3 confidence:
+ // medium examples:
+ // - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV
+ // - https://hooks.slack.com/services/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
+ // validation:
+ // type: Http
+ // content:
+ // request:
+ // headers:
+ // Content-Type: application/json
+ // method: POST
+ // response_matcher:
+ // - report_response: true
+ // - type: WordMatch words:
+ // - invalid_payload
+ // - type: WordMatch words:
+ // - "invalid_token"
+ // negative: true
+ // url: "{{ TOKEN }}"
+ // "#;
-// // 2️⃣ Load that YAML into a Rules object
-// let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())];
-// let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
+ // // 2️⃣ Load that YAML into a Rules object
+ // let data = vec![(std::path::Path::new("slack_test.yaml"), slack_yaml.as_bytes())];
+ // let rules = Rules::from_paths_and_contents(data, Confidence::Low)?;
-// // 3️⃣ Pull the rule syntax & wrap into a Rule
-// let slack_rule_syntax = rules
-// .rules
-// .iter()
-// .find(|r| r.id == "kingfisher.slack.4")
-// .expect("Slack rule not found")
-// .clone();
-// let slack_rule = Rule::new(slack_rule_syntax);
+ // // 3️⃣ Pull the rule syntax & wrap into a Rule
+ // let slack_rule_syntax = rules
+ // .rules
+ // .iter()
+ // .find(|r| r.id == "kingfisher.slack.4")
+ // .expect("Slack rule not found")
+ // .clone();
+ // let slack_rule = Rule::new(slack_rule_syntax);
-// // 4️⃣ Provide a real-looking webhook URL (use one of the examples)
-// let token = "ENTER YOUR SLACK WEBHOOK URL HERE";
+ // // 4️⃣ Provide a real-looking webhook URL (use one of the examples)
+ // let token = "ENTER YOUR SLACK WEBHOOK URL HERE";
-// // 5️⃣ Build OwnedBlobMatch stub
-// let blob_id = BlobId::new(&token.as_bytes());
-// let mut owned_blob_match = OwnedBlobMatch {
-// rule: slack_rule.into(),
-// blob_id,
-// finding_fingerprint: 0,
-// matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
-// captures: SerializableCaptures {
-// captures: vec![SerializableCapture {
-// name: Some("TOKEN".to_string()),
-// match_number: -1,
-// start: 0,
-// end: token.len(),
-// value: token.into(),
-// }],
-// },
-// validation_response_body: String::new(),
-// validation_response_status: StatusCode::OK,
-// validation_success: false,
-// calculated_entropy: 5.0,
-// };
+ // // 5️⃣ Build OwnedBlobMatch stub
+ // let blob_id = BlobId::new(&token.as_bytes());
+ // let mut owned_blob_match = OwnedBlobMatch {
+ // rule: slack_rule.into(),
+ // blob_id,
+ // finding_fingerprint: 0,
+ // matching_input_offset_span: OffsetSpan { start: 0, end: token.len() },
+ // captures: SerializableCaptures {
+ // captures: vec![SerializableCapture {
+ // name: Some("TOKEN".to_string()),
+ // match_number: -1,
+ // start: 0,
+ // end: token.len(),
+ // value: token.into(),
+ // }],
+ // },
+ // validation_response_body: String::new(),
+ // validation_response_status: StatusCode::OK,
+ // validation_success: false,
+ // calculated_entropy: 5.0,
+ // };
-// // 6️⃣ Prepare helpers and run validation
-// let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
-// let client = reqwest::Client::new();
-// let cache: Cache = Arc::new(SkipMap::new());
-// let dependent_vars = FxHashMap::default();
-// let missing_deps = FxHashMap::default();
+ // // 6️⃣ Prepare helpers and run validation
+ // let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
+ // let client = reqwest::Client::new();
+ // let cache: Cache = Arc::new(SkipMap::new());
+ // let dependent_vars = FxHashMap::default();
+ // let missing_deps = FxHashMap::default();
-// validate_single_match(
-// &mut owned_blob_match,
-// &parser,
-// &client,
-// &dependent_vars,
-// &missing_deps,
-// &cache,
-// )
-// .await;
+ // validate_single_match(
+ // &mut owned_blob_match,
+ // &parser,
+ // &client,
+ // &dependent_vars,
+ // &missing_deps,
+ // &cache,
+ // )
+ // .await;
-// // 7️⃣ Inspect outcome (true ⇒ credential considered ACTIVE)
-// assert!(
-// owned_blob_match.validation_success,
-// "Slack webhook should be reported ACTIVE; body was {:?}",
-// owned_blob_match.validation_response_body
-// );
-
-// Ok(())
-// }
+ // // 7️⃣ Inspect outcome (true ⇒ credential considered ACTIVE)
+ // assert!(
+ // owned_blob_match.validation_success,
+ // "Slack webhook should be reported ACTIVE; body was {:?}",
+ // owned_blob_match.validation_response_body
+ // );
+ // Ok(())
+ // }
}
diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs
index d2e9290..03fc10f 100644
--- a/src/validation/httpvalidation.rs
+++ b/src/validation/httpvalidation.rs
@@ -269,7 +269,6 @@ pub async fn retry_request(
}
/// Return `true` when the body is very likely HTML.
-///
fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool {
// ---- 1. header heuristic ---------------------------------------------
let header_says_html = headers
@@ -289,7 +288,6 @@ fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool {
header_says_html && body_looks_htmlish
}
-
/// Validate the response by checking word and status matchers.
pub fn validate_response(
matchers: &[ResponseMatcher],
@@ -298,7 +296,8 @@ pub fn validate_response(
headers: &HeaderMap,
html_allowed: bool,
) -> bool {
- // Since match_all_types is always true here, we simply require all word and status conditions to hold.
+ // Since match_all_types is always true here, we simply require all word and status conditions
+ // to hold.
let word_ok = matchers
.iter()
.filter_map(|m| {
@@ -490,7 +489,7 @@ mod tests {
r#type: "word-match".to_string(),
words: vec!["invalid_token".to_string()],
match_all_words: false,
- negative: true, // body must *not* contain “invalid_token”
+ negative: true, // body must *not* contain “invalid_token”
},
];
@@ -498,10 +497,7 @@ mod tests {
let body = "invalid_payload";
let status = StatusCode::BAD_REQUEST; // 400
let mut headers = HeaderMap::new();
- headers.insert(
- header::CONTENT_TYPE,
- HeaderValue::from_static("text/plain"),
- );
+ headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain"));
// 3️⃣ Call validate_response with html_allowed = false
let ok = validate_response(&matchers, body, &status, &headers, false);
@@ -509,5 +505,4 @@ mod tests {
// 4️⃣ It *should* be valid (true) because all matcher conditions hold
assert!(ok, "Slack webhook response should be considered ACTIVE");
}
-
}
diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs
index 739765a..94206d0 100644
--- a/tests/int_dedup.rs
+++ b/tests/int_dedup.rs
@@ -101,6 +101,7 @@ rules:
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup,
+ ignore_tests: false,
snippet_length: 64,
};
diff --git a/tests/int_github.rs b/tests/int_github.rs
index 19763ff..e541812 100644
--- a/tests/int_github.rs
+++ b/tests/int_github.rs
@@ -88,6 +88,7 @@ fn test_github_remote_scan() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
+ ignore_tests: false,
snippet_length: 256,
};
// Create global arguments
diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs
index fa13f05..ea0acaf 100644
--- a/tests/int_gitlab.rs
+++ b/tests/int_gitlab.rs
@@ -85,6 +85,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
+ ignore_tests: false,
snippet_length: 256,
};
diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs
index 4b9af83..7f9ca80 100644
--- a/tests/int_validation_cache.rs
+++ b/tests/int_validation_cache.rs
@@ -144,6 +144,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true, // keep duplicates so the cache is stressed
+ ignore_tests: false,
snippet_length: 128,
};
diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs
index 6ff3672..20ad438 100644
--- a/tests/int_vulnerable_files.rs
+++ b/tests/int_vulnerable_files.rs
@@ -87,6 +87,7 @@ impl TestContext {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
+ ignore_tests: false,
snippet_length: 256,
};
@@ -150,6 +151,7 @@ impl TestContext {
git_repo_timeout: 1800, // 30 minutes
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
no_dedup: true,
+ ignore_tests: false,
snippet_length: 256,
};
diff --git a/tests/smoke_check_rules.rs b/tests/smoke_check_rules.rs
index d220285..7dff499 100644
--- a/tests/smoke_check_rules.rs
+++ b/tests/smoke_check_rules.rs
@@ -4,10 +4,8 @@ use std::process::Command;
use assert_cmd::prelude::*;
use predicates::prelude::*;
-
#[test]
fn check_rules() -> anyhow::Result<()> {
-
// ── run kingfisher ────────────────────────────────────────────────
Command::cargo_bin("kingfisher")?
.args([
From 441595689c671236977596c5288c8b2065936531 Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 09:18:28 -0700
Subject: [PATCH 6/9] =?UTF-8?q?Introduced=20=20flag=20=E2=80=93=20skip=20f?=
=?UTF-8?q?iles/dirs=20whose=20path=20resembles=20tests=20(,=20,=20,=20,?=
=?UTF-8?q?=20),=20reducing=20noise.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
README.md | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 75fa268..96e91f3 100644
--- a/README.md
+++ b/README.md
@@ -101,7 +101,7 @@ kingfisher scan ~/src/myrepo --no-validate
### Display only secrets confirmed active by third‑party APIs
```bash
-kingfisher scan ./service --only-valid
+kingfisher scan /path/to/repo --only-valid
```
### Output JSON and capture to a file
@@ -113,7 +113,7 @@ kingfisher scan . --format json | tee kingfisher.json
### Output SARIF directly to disk
```bash
-kingfisher scan . --format sarif --output findings.sarif
+kingfisher scan /path/to/repo --format sarif --output findings.sarif
```
### Pipe any text directly into Kingfisher by passing `-`
@@ -137,6 +137,12 @@ kingfisher scan /path/to/repo --rule kingfisher.aws
kingfisher scan /path/to/repo --rule-stats
```
+### Scan while ignoring likely test files
+```bash
+# Scan source but skip unit / integration tests
+kingfisher scan ./my-project --ignore-tests
+```
+
---
## Scanning GitHub
From 488635e9c6e902e67195c05d91572bc7a196ee6a Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 09:20:44 -0700
Subject: [PATCH 7/9] updated test
---
src/util.rs | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/util.rs b/src/util.rs
index 153e1a1..0113df4 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -146,6 +146,7 @@ mod tests {
"/project/spec/controllers/user_spec.rb",
"C:\\repo\\fixtures\\config.json",
"examples/hello_world/main.go",
+ "/home/user/scripts/local-testCert.pem",
"samples/data/sample_input.txt",
];
From 331c04823909edb5e64cc224a787db3d1983b6e3 Mon Sep 17 00:00:00 2001
From: Mick Grove
Date: Sat, 28 Jun 2025 09:22:13 -0700
Subject: [PATCH 8/9] updated test
---
CHANGELOG.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3c2c6ab..c583719 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file.
## [1.17.0]
- Updated README to give proper attribution to Nosey Parker!
-- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, squaare, sendgrid
+- Added rules for sonarcloud, sonarqube, sourcegraph, shopify, truenas, square, sendgrid, nasa, teamcity, truenas, shopify
- Introduced `--ignore-tests` flag – skip files/dirs whose path resembles tests (`test`, `spec`, `fixture`, `example`, `sample`), reducing noise.
## [1.16.0]
- Fix: HTML detection now requires both HTML content-type and "
Date: Sat, 28 Jun 2025 09:30:22 -0700
Subject: [PATCH 9/9] updated test
---
src/scanner/enumerate.rs | 23 ++++++++++++++++++-----
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs
index d58b50c..d9ca0f8 100644
--- a/src/scanner/enumerate.rs
+++ b/src/scanner/enumerate.rs
@@ -37,6 +37,7 @@ use crate::{
util::is_compressed_file,
},
scanner_pool::ScannerPool,
+ util::is_test_like_path,
EnumeratorConfig, EnumeratorFileResult, FileResult, FilesystemEnumerator, FoundInput,
GitRepoEnumerator, GitRepoResult, GitRepoWithMetadataEnumerator, PathBuf,
};
@@ -188,11 +189,23 @@ pub fn enumerate_filesystem_inputs(
Ok(Some((origin_set, blob_metadata, vec_of_matches))) => {
for (_, single_match) in vec_of_matches {
// Send each match
- send_ds.send((
- Arc::new(origin_set.clone()),
- Arc::new(blob_metadata.clone()),
- single_match,
- ))?;
+ let is_test = if args.ignore_tests {
+ origin_set
+ .iter()
+ .filter_map(|o| o.full_path())
+ .any(|p| is_test_like_path(&p))
+ } else {
+ false
+ };
+
+ if !is_test {
+ // Send each match
+ send_ds.send((
+ Arc::new(origin_set.clone()),
+ Arc::new(blob_metadata.clone()),
+ single_match,
+ ))?;
+ }
}
}
Err(e) => {