diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4debe1d..f2940cf 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -119,7 +119,7 @@ jobs: macos-x64: name: macOS x64 - runs-on: macos-13 + runs-on: macos-15-intel steps: - uses: actions/checkout@v4 diff --git a/.gitignore b/.gitignore index 8cd8f36..e552e7c 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ custom.py logs/* *.patch +*.orig +*.rej ### macOS ### # General diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e23102..8f41796 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,18 @@ All notable changes to this project will be documented in this file. +## [v1.62.0] +- Added `pattern_requirements` checks to rules, providing lightweight post-regex character-class validation without lookarounds. See docs/RULES.md for detail +- Added an `ignore_if_contains` option to `pattern_requirements` to drop matches containing case-insensitive placeholder words, with tests covering the new behavior. +- Updated rules to adopt the new `pattern_requirements` support. +- Added checksum comparisons to `pattern_requirements`, new `suffix`, `crc32`, and `base62` Liquid filters, and verbose logging so mismatched checksums are skipped with context rather than reported as findings. +- Split GitHub token detections into fine-grained/fixed-format variants and enforce checksum validation for modern GitHub token families (PAT, OAuth, App, refresh) while preserving legacy coverage. +- Added a rule for Zuplo tokens. +- Added checksum calculation for Confluent, GitHub, and Zuplo tokens, which can drastically reduce false positive reports. +- Improved OpsGenie validation. +- Automatically enable `--no-dedup` when `--manage-baseline` is supplied so baseline management keeps every finding. +- This release is focused on further improving detection accuracy, before even attempting to validate findings. + ## [v1.61.0] - Fixed local filesystem scans to keep `open_path_as_is` enabled when opening Git repositories and only disable it for diff-based scans. - Created Linux and Windows specific installer script diff --git a/Cargo.toml b/Cargo.toml index d85f76f..44afc1d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.61.0" +version = "1.62.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -75,9 +75,9 @@ include_dir = { version = "0.7", features = ["glob"] } strum = { version = "0.26", features = ["derive"] } sysinfo = "0.31.4" reqwest = { version = "0.12", default-features = false, features = [ - "json", - "gzip", - "brotli", + "json", + "gzip", + "brotli", "deflate", "stream", "rustls-tls", @@ -196,6 +196,7 @@ gcloud-storage = { version = "1.1.1", default-features = false, features = [ "auth", ] } tokei = "12.1.2" +crc32fast = "1.4.0" [target.'cfg(not(windows))'.dependencies] sha1 = { version = "0.10.6", features = ["asm"] } diff --git a/README.md b/README.md index 29a11e8..81b38b1 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ For a look at how Kingfisher has grown from its early foundations into today's f - **Broad AI SaaS coverage**: finds and validates tokens for OpenAI, Anthropic, Google Gemini, Cohere, Mistral, Stability AI, Replicate, xAI (Grok), Ollama, Langchain, Perplexity, Weights & Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, Together.ai, Zhipu, and many more - **Compressed Files**: Supports extracting and scanning compressed files for secrets - **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md)) +- **Checksum-aware detection**: verifies tokens with built-in checksums (e.g., GitHub, Confluent, Zuplo) β€” no API calls required **Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation) @@ -67,6 +68,8 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) - [ Run Kingfisher in Docker](#-run-kingfisher-in-docker) - [πŸ” Detection Rules at a Glance](#-detection-rules-at-a-glance) - [πŸ“ Write Custom Rules!](#-write-custom-rules) + - [Pattern requirements and placeholder filtering](#pattern-requirements-and-placeholder-filtering) + - [πŸ” Checksum Intelligence (New!)](#-checksum-intelligence-new) - [πŸŽ‰ Usage](#-usage) - [Basic Examples](#basic-examples) - [Scan with secret validation](#scan-with-secret-validation) @@ -323,8 +326,45 @@ However, you may want to add your own custom rules, or modify a detection to bet First, review [docs/RULES.md](/docs/RULES.md) to learn how to create custom Kingfisher rules. +### Pattern requirements and placeholder filtering + +Every rule can declare optional `pattern_requirements` to enforce additional character checks after a regex matches. Each field +is independent: + +- `min_digits`, `min_uppercase`, `min_lowercase`, and `min_special_chars` enforce complexity thresholds. +- `special_chars` lets you override the set of characters counted as "special" when `min_special_chars` is used. +- `ignore_if_contains` lists case-insensitive substrings that should cause a match to be discarded (for example, to drop + `test`, `demo`, or `localhost` values). +- `checksum` lets you compare an extracted portion of the match against a Liquid-rendered expectation. Provide `actual.template` + and `expected` Liquid snippets (with access to `{{ MATCH }}`, `{{ FULL_MATCH }}`, and any named capture as both its original + case and uppercase alias) and Kingfisher will skip the finding when the rendered values differ. Optional keys such as + `requires_capture` and `skip_if_missing` help you guard against legacy formats while onboarding the checksum-aware variant. + +When a match is skipped because of `ignore_if_contains` or a checksum mismatch, Kingfisher logs the event at the `DEBUG` level alongside the rule that was evaluated. If you need to keep those matches for a particular scan, pass `--no-ignore-if-contains` to `kingfisher scan` to disable the substring filter without editing any rule files. Verbose mode (`-v`) will also show you the +checksum mismatch lengths so you can confirm why a finding was suppressed. + Once you've done that, you can provide your custom rules (defined in a YAML file) and provide it to Kingfisher at runtime --- no recompiling required! +### πŸ” Checksum Intelligence (New!) + +Modern API tokens increasingly include **built-in checksums**, short internal digests that make each credential self-verifiable. (For background, see [GitHub’s write-up on their newer token formats](https://github.blog/engineering/platform-security/behind-githubs-new-authentication-token-formats/) and why checksums slash false positives.) + +Kingfisher supports **checksum-aware matching** in rules, enabling **offline structural verification** of credentials *without* calling third-party APIs. + +By validating each token’s internal checksum (for tokens that support checksums), Kingfisher eliminates nearly all false positivesβ€”automatically skipping structurally invalid or fake tokens before validation ever runs. + +**Why this matters** +- βœ… **Offline verification** β€” no API call required +- 🧠 **Industry-aligned** β€” compatible with prefix + checksum token designs (e.g., modern PATs) +- ⚑ **Lower false positives** β€” invalid tokens are filtered out by structure alone + +**Learn more**: implementation details and templating are documented in **[docs/RULES.md](docs/RULES.md)** + +--- + + +- **Checksum-aware detection**: verifies tokens with embedded checksums (offline) to cut false positives β€” see [docs/RULES.md](docs/RULES.md) + # πŸŽ‰ Usage ## Basic Examples @@ -1083,6 +1123,8 @@ kingfisher scan /path/to/code \ --baseline-file ./baseline-file.yml ``` +`--manage-baseline` automatically enables `--no-dedup` so the baseline captures every individual occurrence. + Use the same YAML file with the `--baseline-file` option on future scans to hide all recorded findings: ```bash @@ -1159,13 +1201,15 @@ leaves the default unchanged. - `--redact`: Replaces discovered secrets with a one-way hash for secure output - `--exclude `: Skip any file or directory whose path matches this glob pattern (repeatable, uses gitignore-style syntax, case sensitive) - `--baseline-file `: Ignore matches listed in a baseline YAML file -- `--manage-baseline`: Create or update the baseline file with current findings +- `--manage-baseline`: Create or update the baseline file with current findings (automatically enables `--no-dedup`) - `--skip-regex `: Ignore findings whose text matches this regex (repeatable) - `--skip-word `: Ignore findings containing this case-insensitive word (repeatable) - `--skip-aws-account `: Skip live AWS validation for findings tied to the specified AWS account number (repeatable, accepts comma-separated lists) - `--skip-aws-account-file `: Load AWS account numbers to skip from a file (one account per line; `#` comments allowed) - `--ignore-comment `: Honor additional inline directives from other scanners (repeatable; e.g. `--ignore-comment "gitleaks:allow"`) - `--no-ignore`: Disable inline directives entirely so every match is reported +- `--no-ignore-if-contains`: Ignore the `ignore_if_contains` filter in rules so placeholder words still produce findings + ## Understanding `--confidence` The `--confidence` flag sets a minimum confidence threshold, not an exact match. diff --git a/data/rules/adobe.yml b/data/rules/adobe.yml index 03b54d0..3eb79a2 100644 --- a/data/rules/adobe.yml +++ b/data/rules/adobe.yml @@ -6,11 +6,13 @@ rules: \b adobe (?:.|[\n\r]){0,32}? - \b + \b ( [A-F0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - adobeKey = 1a2b3c4d5e6f7890abcdef1234567890 @@ -60,7 +62,7 @@ rules: ( p8e-[A-Z0-9-]{32} ) - (?:[^A-Z0-9-]|$) + (?:[^A-Z0-9-]) min_entropy: 3.5 examples: - | diff --git a/data/rules/age.yml b/data/rules/age.yml index f296a26..e024a63 100644 --- a/data/rules/age.yml +++ b/data/rules/age.yml @@ -3,11 +3,14 @@ rules: id: kingfisher.age.1 pattern: | (?xi) - \b ( age1[0-9a-z]{58} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -21,11 +24,9 @@ rules: id: kingfisher.age.2 pattern: | (?xi) - \b ( AGE-SECRET-KEY-1[0-9A-Z]{58} ) - \b min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/ai21.yml b/data/rules/ai21.yml index 7a400eb..899e91c 100644 --- a/data/rules/ai21.yml +++ b/data/rules/ai21.yml @@ -19,6 +19,10 @@ rules: [0-9a-f]{12} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.2 confidence: medium examples: diff --git a/data/rules/airbrake.yml b/data/rules/airbrake.yml index d54d261..8a25076 100644 --- a/data/rules/airbrake.yml +++ b/data/rules/airbrake.yml @@ -9,7 +9,11 @@ rules: ( [A-Z0-9-]{40} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 4.5 confidence: medium examples: diff --git a/data/rules/airtable.yml b/data/rules/airtable.yml index 7b8b037..abb100d 100644 --- a/data/rules/airtable.yml +++ b/data/rules/airtable.yml @@ -10,7 +10,11 @@ rules: \. [a-z0-9]{62,66} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -37,7 +41,6 @@ rules: id: kingfisher.airtable.2 pattern: | (?xi) - \b ( [A-Z0-9]+\.v1\.[A-Z0-9_-]+\.[a-f0-9]+ ) diff --git a/data/rules/aiven.yml b/data/rules/aiven.yml index 09a7269..0b98079 100644 --- a/data/rules/aiven.yml +++ b/data/rules/aiven.yml @@ -9,6 +9,11 @@ rules: ( [a-z0-9/+=]{372} ) + (?:[^A-Za-z0-9/+=]) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/algolia.yml b/data/rules/algolia.yml index dd65cf7..150cda9 100644 --- a/data/rules/algolia.yml +++ b/data/rules/algolia.yml @@ -5,11 +5,13 @@ rules: (?xi) algolia (?:.|[\n\r]){0,32}? - \b ( [a-z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: @@ -38,11 +40,12 @@ rules: (?xi) algolia (?:.|[\n\r]){0,16}? - \b ( [A-Z0-9]{10} ) - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 2.0 visible: false confidence: medium diff --git a/data/rules/alibaba.yml b/data/rules/alibaba.yml index b4807ec..990ab91 100644 --- a/data/rules/alibaba.yml +++ b/data/rules/alibaba.yml @@ -3,11 +3,14 @@ rules: id: kingfisher.alibabacloud.1 pattern: | (?xi) - \b ( LTAI[a-z0-9]{17,21} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 4.0 confidence: medium visible: false @@ -21,11 +24,9 @@ rules: \b alibaba (?:.|[\n\r]){0,32}? - \b ( [a-z0-9]{30} ) - \b min_entropy: 4.2 confidence: medium examples: diff --git a/data/rules/anthropic.yml b/data/rules/anthropic.yml index 4f51c88..8907056 100644 --- a/data/rules/anthropic.yml +++ b/data/rules/anthropic.yml @@ -10,15 +10,16 @@ rules: - [\w\-]{93} AA - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: - sk-ant-api668-Clm512odot9WDD7itfUU9R880nefA1EtYZDbpE-C9b0XQEWpqFKf9DQUo03vOfXl16oSmyar1CLF1SzV3YzpZJ6bahcpLAA - categories: - - api - - secret references: - https://docs.anthropic.com/claude/reference/authentication validation: diff --git a/data/rules/anypoint.yml b/data/rules/anypoint.yml index 4b3b8ab..d7d420b 100644 --- a/data/rules/anypoint.yml +++ b/data/rules/anypoint.yml @@ -18,8 +18,12 @@ rules: [0-9a-z]{4} - [0-9a-z]{12} - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/apify.yml b/data/rules/apify.yml index 78c99dd..1cf959c 100644 --- a/data/rules/apify.yml +++ b/data/rules/apify.yml @@ -3,11 +3,14 @@ rules: id: kingfisher.apify.1 pattern: | (?xi) - \b ( apify_api_[A-Z0-9]{34,38} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 confidence: medium min_entropy: 3.5 validation: diff --git a/data/rules/artifactory.yml b/data/rules/artifactory.yml index f53174e..ff172e3 100644 --- a/data/rules/artifactory.yml +++ b/data/rules/artifactory.yml @@ -8,6 +8,10 @@ rules: AKC[A-Z0-9]{64,74} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/asana.yml b/data/rules/asana.yml index 85e30c5..2d43c31 100644 --- a/data/rules/asana.yml +++ b/data/rules/asana.yml @@ -10,7 +10,9 @@ rules: ( [0-9]{16} ) - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -30,7 +32,11 @@ rules: ( [a-z0-9]{30,40} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: @@ -54,7 +60,9 @@ rules: [a-z0-9]{32,64} )? ) - \b + \b + pattern_requirements: + min_digits: 4 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/assemblyai.yml b/data/rules/assemblyai.yml index 3c81111..38136d0 100644 --- a/data/rules/assemblyai.yml +++ b/data/rules/assemblyai.yml @@ -11,6 +11,9 @@ rules: [0-9a-z]{32} ) \b + pattern_requirements: + min_digits: 2 + min_lowercase: 1 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/atlassian.yml b/data/rules/atlassian.yml index 32dccb2..5d198ee 100644 --- a/data/rules/atlassian.yml +++ b/data/rules/atlassian.yml @@ -10,7 +10,10 @@ rules: ( [a-z0-9]{24} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/auth0.yml b/data/rules/auth0.yml index 39a0b43..f77497f 100644 --- a/data/rules/auth0.yml +++ b/data/rules/auth0.yml @@ -10,7 +10,10 @@ rules: ( [a-z0-9_-]{32,60} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 1 min_entropy: 3.5 confidence: medium visible: false diff --git a/data/rules/authress.yml b/data/rules/authress.yml index 163d979..1df4922 100644 --- a/data/rules/authress.yml +++ b/data/rules/authress.yml @@ -6,6 +6,10 @@ rules: ( (?:sc|ext|scauth|authress)_[a-z0-9]{5,30}\.[a-z0-9]{4,6}\.acc[_-][a-z0-9-]{10,32}\.[a-z0-9+/_=-]{30,120} ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 1 confidence: medium min_entropy: 4.0 validation: diff --git a/data/rules/aws.yml b/data/rules/aws.yml index f5fcf4a..6cd938a 100644 --- a/data/rules/aws.yml +++ b/data/rules/aws.yml @@ -3,12 +3,14 @@ rules: id: kingfisher.aws.1 pattern: | (?xi) - \b + \b ( (?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA) [2-7A-Z]{16} ) - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.2 visible: false confidence: medium @@ -37,6 +39,8 @@ rules: ([A-Za-z0-9/+]{40}) \b ) + pattern_requirements: + min_digits: 2 min_entropy: 4.5 confidence: medium examples: @@ -60,6 +64,8 @@ rules: - name: AWS Session Token id: kingfisher.aws.4 pattern: '(?i)(?:aws.?session|aws.?session.?token|aws.?token)["''`]?\s{0,30}(?::|=>|=)\s{0,30}["''`]?([a-z0-9/+=]{16,200})[^a-z0-9/+=]' + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/azure.yml b/data/rules/azure.yml index 94a9902..1a1d650 100644 --- a/data/rules/azure.yml +++ b/data/rules/azure.yml @@ -90,6 +90,8 @@ rules: [A-Z0-9+/]{42}\+ACR[A-Z0-9]{6} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 validation: diff --git a/data/rules/azuredevops.yml b/data/rules/azuredevops.yml index a607bc9..90fa4e8 100644 --- a/data/rules/azuredevops.yml +++ b/data/rules/azuredevops.yml @@ -21,9 +21,11 @@ rules: (?xi) \b ( - [a-z0-9]{75,76}AZDO[a-z0-9]{4,5} + [a-z0-9]{76}AZDO[a-z0-9]{4,5} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3 confidence: medium examples: diff --git a/data/rules/azureopenai.yml b/data/rules/azureopenai.yml index 87e8127..51cba8c 100644 --- a/data/rules/azureopenai.yml +++ b/data/rules/azureopenai.yml @@ -14,6 +14,9 @@ rules: [a-f0-9]{32} ) \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/azuresearchquery.yml b/data/rules/azuresearchquery.yml index 45b84b3..6e37d82 100644 --- a/data/rules/azuresearchquery.yml +++ b/data/rules/azuresearchquery.yml @@ -12,6 +12,10 @@ rules: [0-9A-Z]{52} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/azurestorage.yml b/data/rules/azurestorage.yml index aea15a9..1fe3c44 100644 --- a/data/rules/azurestorage.yml +++ b/data/rules/azurestorage.yml @@ -33,13 +33,25 @@ rules: (?:.|[\n\r]){0,128}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,128}? + ["':\s=}\]\)] ( - [A-Z0-9+\\/-]{86,88}={0,2} + (?: + [A-Z0-9+\-]{86,88}={1,2} + ) + | + (?: + [A-Z0-9+\-]{86,88}\b + ) ) + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 min_entropy: 4.0 confidence: medium examples: - - Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1eF0gH9iJ8kL7mN6oP5q==\ + - Azure AccountKey=Xy9aB8cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1q + - Azure AccountKey=Ky7aC1cD7eF6gH5iJ4kL3mN2oP1qR0sT9uV8wX7yZ6aB5cD4eF3gH2iJ1kL0mN9oP8qR7sT6uV5wX4yZ3aB2cD1g==\ validation: type: AzureStorage depends_on_rule: diff --git a/data/rules/baremetrics.yml b/data/rules/baremetrics.yml index ce0f37a..2844cd3 100644 --- a/data/rules/baremetrics.yml +++ b/data/rules/baremetrics.yml @@ -10,7 +10,9 @@ rules: ( [a-z0-9_-]{25} ) - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium references: diff --git a/data/rules/baseten.yml b/data/rules/baseten.yml index 8773d6c..b3ba12c 100644 --- a/data/rules/baseten.yml +++ b/data/rules/baseten.yml @@ -13,6 +13,10 @@ rules: [A-Za-z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.4 confidence: medium examples: diff --git a/data/rules/beamer.yml b/data/rules/beamer.yml index f051ff2..bbc2029 100644 --- a/data/rules/beamer.yml +++ b/data/rules/beamer.yml @@ -9,7 +9,12 @@ rules: \b ( b_[A-Z0-9=_\\/\\\-+]{44} - ) + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/bitbucket.yml b/data/rules/bitbucket.yml index ad7e74a..37912de 100644 --- a/data/rules/bitbucket.yml +++ b/data/rules/bitbucket.yml @@ -8,9 +8,10 @@ rules: (?:.|[\n\r]){0,16}? (?:client|id) (?:.|[\n\r]){0,16}? - \b ([a-z0-9]{30,40}) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -43,11 +44,14 @@ rules: ( [a-z0-9+_\-+]{44} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: - bitbucket_key=HedmnK9h6KD_eh9KK8FlI9ahUc8WfaNZ4gulbrtN2ouV - - bitbucket_secret=kd8j2h4jf9s8mf6l4k9j2h4jf9s8mf6l4k9j2h4jf9s8mf6l + - bitbucket_secret=kd8j2h4jf9s8mf6l4k9j2h4jf9s8mf6l4k9j2h4jf9s8 validation: type: Http content: diff --git a/data/rules/bitly.yml b/data/rules/bitly.yml index 3f4ca02..df79eb3 100644 --- a/data/rules/bitly.yml +++ b/data/rules/bitly.yml @@ -8,11 +8,13 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [a-f0-9]{40} ) \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 confidence: medium min_entropy: 3.0 validation: diff --git a/data/rules/blynk.yml b/data/rules/blynk.yml index 4af12c6..1f58e85 100644 --- a/data/rules/blynk.yml +++ b/data/rules/blynk.yml @@ -6,6 +6,10 @@ rules: https://(?:fra1\.|lon1\.|ny3\.|sgp1\.|blr1\.)*blynk\.cloud/external/api/[A-Z0-9/]*\?token= ([A-Z0-9_\-]{32}) & + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -21,6 +25,10 @@ rules: -H\s*"Authorization:\s*Bearer\s* ([A-Z0-9_\-]{40}) " + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -35,6 +43,10 @@ rules: -H\s*"Authorization:\s*Bearer\s* ([A-Z0-9_\-]{40}) "[\s\\]*https://(?:fra1\.|lon1\.|ny3\.|sgp1\.|blr1\.)*blynk\.cloud/api + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -50,6 +62,10 @@ rules: (oa2-client-id_[A-Z0-9_\-]{32}) (?: : | &client_secret= ) ([A-Z0-9_\-]{40}) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -66,6 +82,10 @@ rules: (oa2-client-id_[A-Z0-9_\-]{32}) :([A-Z0-9_\-]{40}) [\s\\]*https://(fra1\.|lon1\.|ny3\.|sgp1\.|blr1\.)*blynk\.cloud/oauth2 + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/buildkite.yml b/data/rules/buildkite.yml index 3728e98..045fa94 100644 --- a/data/rules/buildkite.yml +++ b/data/rules/buildkite.yml @@ -3,11 +3,13 @@ rules: id: kingfisher.buildkite.1 pattern: | (?xi) - \b ( bkua_[a-z0-9]{40} ) \b + pattern_requirements: + min_digits: 2 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/cerebras.yml b/data/rules/cerebras.yml index 73760a6..044506e 100644 --- a/data/rules/cerebras.yml +++ b/data/rules/cerebras.yml @@ -3,11 +3,13 @@ rules: id: kingfisher.cerebras.1 pattern: | (?xi) - \b ( csk-[a-z0-9]{48} ) \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 confidence: medium min_entropy: 3.0 validation: diff --git a/data/rules/circleci.yml b/data/rules/circleci.yml index f3f2d2a..da20a80 100644 --- a/data/rules/circleci.yml +++ b/data/rules/circleci.yml @@ -20,7 +20,9 @@ rules: _ [a-z0-9]{40} ) - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -56,7 +58,10 @@ rules: ( [a-f0-9]{40} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/ciscomeraki.yml b/data/rules/ciscomeraki.yml index a8cc4f1..03f9d3a 100644 --- a/data/rules/ciscomeraki.yml +++ b/data/rules/ciscomeraki.yml @@ -5,11 +5,12 @@ rules: (?xi) meraki (?:.|[\n\r]){0,32}? - \b ( [0-9a-f]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/clarifai.yml b/data/rules/clarifai.yml index d72c9f2..0942f19 100644 --- a/data/rules/clarifai.yml +++ b/data/rules/clarifai.yml @@ -11,6 +11,8 @@ rules: [0-9a-f]{32,36} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/clearbit.yml b/data/rules/clearbit.yml index caa9e7a..02e4469 100644 --- a/data/rules/clearbit.yml +++ b/data/rules/clearbit.yml @@ -13,6 +13,8 @@ rules: [0-9a-z_]{35} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/clickhouse.yml b/data/rules/clickhouse.yml index 57317b9..9f22697 100644 --- a/data/rules/clickhouse.yml +++ b/data/rules/clickhouse.yml @@ -8,6 +8,8 @@ rules: 4b1d[a-z0-9]{38} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 examples: @@ -43,11 +45,11 @@ rules: (?:.|[\n\r]){0,16}? (?:ID|USER) (?:.|[\n\r]){0,16}? - \b ( [a-z0-9]{20} ) - \b + pattern_requirements: + min_digits: 2 confidence: medium visible: false min_entropy: 3.0 diff --git a/data/rules/clojars.yml b/data/rules/clojars.yml index 58fb19d..5dfaf53 100644 --- a/data/rules/clojars.yml +++ b/data/rules/clojars.yml @@ -10,9 +10,11 @@ rules: (?:.|[\n\r]){0,16}? \b ( - [a-zA-Z0-9_-]{3,} + [a-z0-9_-]{3,} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 1.5 visible: false @@ -28,6 +30,8 @@ rules: CLOJARS_[a-z0-9]{60} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 examples: diff --git a/data/rules/cloudflare.yml b/data/rules/cloudflare.yml index 776c1bd..aaf9d34 100644 --- a/data/rules/cloudflare.yml +++ b/data/rules/cloudflare.yml @@ -12,7 +12,11 @@ rules: ( [a-z0-9_-]{38,42} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: @@ -47,8 +51,9 @@ rules: ( v1\.0-[a-z0-9._-]{160,} ) - ["'`]? - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 4.5 confidence: medium examples: @@ -58,9 +63,6 @@ rules: references: - https://developers.cloudflare.com/api/keys/ - https://developers.cloudflare.com/fundamentals/api/get-started/keys/ - categories: - - api - - secret validation: type: Http content: diff --git a/data/rules/cloudsight.yml b/data/rules/cloudsight.yml index b06e922..f3bd7f6 100644 --- a/data/rules/cloudsight.yml +++ b/data/rules/cloudsight.yml @@ -11,8 +11,10 @@ rules: \b ( [a-z0-9]{20,24} - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/codacy.yml b/data/rules/codacy.yml index 121dbf4..e0a68d5 100644 --- a/data/rules/codacy.yml +++ b/data/rules/codacy.yml @@ -12,6 +12,9 @@ rules: ( [0-9A-Z]{20,24} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/codeclimate.yml b/data/rules/codeclimate.yml index 677cbc5..aeaa805 100644 --- a/data/rules/codeclimate.yml +++ b/data/rules/codeclimate.yml @@ -5,10 +5,13 @@ rules: (?xi) (?: CODECLIMATE| CC_TEST_REPORTER_ID) (?:.|[\n\r]){0,64}? + \b ( [a-f0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/codecov.yml b/data/rules/codecov.yml index 1bc5e7a..a2ab1e7 100644 --- a/data/rules/codecov.yml +++ b/data/rules/codecov.yml @@ -8,10 +8,13 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? + \b ( [A-Z0-9-]{36} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 validation: diff --git a/data/rules/cohere.yml b/data/rules/cohere.yml index a362570..f8136f4 100644 --- a/data/rules/cohere.yml +++ b/data/rules/cohere.yml @@ -6,10 +6,13 @@ rules: \b cohere (?:.|[\n\r]){0,16}? + \b ( [A-Z0-9]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/coinbase.yml b/data/rules/coinbase.yml index c5a5763..159dc90 100644 --- a/data/rules/coinbase.yml +++ b/data/rules/coinbase.yml @@ -10,9 +10,11 @@ rules: (?:.|[\n\r]){0,16}? \b ( - [a-zA-Z-0-9]{32} + [a-z-0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - coinbase_token = 32iAkQCcHHYxXGx20VogBZoj27PC1ouI diff --git a/data/rules/confluent.yml b/data/rules/confluent.yml index 7deda55..49e9b69 100644 --- a/data/rules/confluent.yml +++ b/data/rules/confluent.yml @@ -10,6 +10,8 @@ rules: [A-Z0-9]{16} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3 confidence: medium visible: false @@ -33,8 +35,45 @@ rules: min_entropy: 3.3 confidence: medium examples: - - confluent secret=abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890ab - - kafka_token=ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyzABCD + - confluent secret=cbadefghijklmnopqrstuvwxyzcbaDEFGHIJKLMNOPQRSTUVWXYZ3214567890ab + - kafka_token=cbaDEFGHIJKLMNOPQRSTUVWXYZ3214567890cbadefghijklmnopqrstuvwxyzAB + references: + - https://docs.confluent.io/cloud/current/api.html#tag/API-Keys-(iamv2)/operation/getIamV2ApiKey + validation: + type: Http + content: + request: + headers: + Authorization: 'Basic {{ CLIENTID | append: ":" | append: TOKEN | b64enc }}' + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: https://api.confluent.cloud/iam/v2/api-keys/{{ CLIENTID }} + depends_on_rule: + - rule_id: "kingfisher.confluent.1" + variable: CLIENTID + - name: Confluent API Secret - Updated Format + id: kingfisher.confluent.3 + pattern: | + (?xi) + \b + ( + cflt(?P[A-Za-z0-9\+/]{54})(?P[A-Za-z0-9\+/]{6}) + ) + pattern_requirements: + checksum: + actual: + template: "{{ MATCH | suffix: 6 }}" + requires_capture: checksum + expected: "{{ BODY | crc32_le_b64: 6 }}" + skip_if_missing: true + min_entropy: 3.3 + confidence: medium + examples: + - confluent secret=cfltqPLd2lLPAtWtHGNhN32WlZxoEj30pcg8mzaPlPJ937JlMa7n9YCRLooqgifw references: - https://docs.confluent.io/cloud/current/api.html#tag/API-Keys-(iamv2)/operation/getIamV2ApiKey validation: diff --git a/data/rules/contentful.yml b/data/rules/contentful.yml index 3846abf..3c91b77 100644 --- a/data/rules/contentful.yml +++ b/data/rules/contentful.yml @@ -10,9 +10,12 @@ rules: (?:.|[\n\r]){0,32}? \b ( - [a-zA-Z0-9_-]{43,45} + [A-Z0-9_-]{43,45} ) \b + pattern_requirements: + min_digits: 2 + min_special_chars: 1 confidence: medium min_entropy: 4.0 validation: @@ -42,11 +45,11 @@ rules: id: kingfisher.contentful.2 pattern: | (?xi) - \b ( CFPAT-[A-Z0-9_-]{43} ) - \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 validation: diff --git a/data/rules/coze.yml b/data/rules/coze.yml index df3692a..c9783f3 100644 --- a/data/rules/coze.yml +++ b/data/rules/coze.yml @@ -1,7 +1,17 @@ rules: - name: Coze Personal Access Token id: kingfisher.coze.1 - pattern: '(?i)\b(pat_[a-zA-Z0-9]{64})\b' + pattern: | + (?xi) + coze + (?:.|[\n\r]){0,32}? + \b + ( + pat_[A-Z0-9]{64} + ) + \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 5.0 validation: @@ -26,6 +36,6 @@ rules: - https://www.coze.com/docs/developer_guides/coze_api_overview - https://www.coze.com/docs/developer_guides/retrieve_files examples: - - "pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f" - - "pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI" - - "pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV" + - "key_coze = pat_DlOG7fNcVfmw8cYhPWNcdfwrjjzwDr9EkV8EBjzHdgRWU2DzqHC1pPe0x590NN5f" + - "coze_token = pat_93QiTdIvZGuRCFcfGTQJJ1VIYZ9dNHanX88wKoMojwMk3tX5tKqfFtxUp0ux8CjI" + - "coze-key: pat_WvUTLYq5yZyaqegkyLSxXJMjXAJotjYEuC1sqT8daFlfwM3BiaRVJIZsER42DnhV" diff --git a/data/rules/crates.io.yml b/data/rules/crates.io.yml index 7f6b8f2..4aa7ef6 100644 --- a/data/rules/crates.io.yml +++ b/data/rules/crates.io.yml @@ -8,6 +8,8 @@ rules: cio[A-Z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/databricks.yml b/data/rules/databricks.yml index 9cec5c3..294cb20 100644 --- a/data/rules/databricks.yml +++ b/data/rules/databricks.yml @@ -2,12 +2,17 @@ rules: - name: Databricks API token id: kingfisher.databricks.1 pattern: | - (?xi) + (?xi) + \b ( dapi [a-f0-9]{32} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/datadog.yml b/data/rules/datadog.yml index 79ff171..6958bf8 100644 --- a/data/rules/datadog.yml +++ b/data/rules/datadog.yml @@ -13,6 +13,8 @@ rules: [a-z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -47,11 +49,13 @@ rules: (?:.|[\n\r]){0,64}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,16}? - \b ( [a-z0-9]{40} ) - \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/deepgram.yml b/data/rules/deepgram.yml index 850afa9..5a474f6 100644 --- a/data/rules/deepgram.yml +++ b/data/rules/deepgram.yml @@ -6,11 +6,13 @@ rules: \b deepgram (?:.|[\n\r]){0,32}? - \b + \b ( [0-9a-f]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/deepseek.yml b/data/rules/deepseek.yml index f303c48..233ae52 100644 --- a/data/rules/deepseek.yml +++ b/data/rules/deepseek.yml @@ -6,8 +6,10 @@ rules: \b ( sk-[a-f0-9]{32} - ) + ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.7 confidence: medium examples: diff --git a/data/rules/dependency_track.yml b/data/rules/dependency_track.yml index e41aa32..10f397c 100644 --- a/data/rules/dependency_track.yml +++ b/data/rules/dependency_track.yml @@ -8,6 +8,8 @@ rules: odt_[A-Z0-9]{32,255} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/diffbot.yml b/data/rules/diffbot.yml index 231109b..01c4114 100644 --- a/data/rules/diffbot.yml +++ b/data/rules/diffbot.yml @@ -11,6 +11,8 @@ rules: [0-9a-z]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 examples: - diffbot_key = a7424adbafc4624e61482d0f60e43016 diff --git a/data/rules/digitalocean.yml b/data/rules/digitalocean.yml index b02bb52..b6ca932 100644 --- a/data/rules/digitalocean.yml +++ b/data/rules/digitalocean.yml @@ -9,6 +9,8 @@ rules: [a-f0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -33,12 +35,10 @@ rules: id: kingfisher.digitalocean.2 pattern: | (?x) - \b ( dor_v1_ [a-f0-9]{64} ) - \b min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/discord.yml b/data/rules/discord.yml index 6b8809b..f350c29 100644 --- a/data/rules/discord.yml +++ b/data/rules/discord.yml @@ -10,6 +10,9 @@ rules: ( [0-9a-z_\-]{68} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -32,7 +35,6 @@ rules: id: kingfisher.discord.2 pattern: | (?xi) - \b ( [MNO][A-Z0-9_-]{23}\.[A-Z0-9_-]{6}\.[A-Z0-9_-]{27} ) @@ -61,14 +63,11 @@ rules: id: kingfisher.discord.3 pattern: | (?xi) - \b (?:discord|botid|bot_id) (?:.|[\n\r]){0,64}? - \b ( \d{17,19} ) - \b min_entropy: 3.5 visible: false confidence: medium diff --git a/data/rules/django.yml b/data/rules/django.yml index c335542..c9efa82 100644 --- a/data/rules/django.yml +++ b/data/rules/django.yml @@ -5,7 +5,6 @@ rules: (?x) [DJANGO]\w{0,8}SECRET_KEY .{1,16}? - \b ( [A-Za-z0-9*!$@\#&_%^-]{45,55} ) diff --git a/data/rules/docker.yml b/data/rules/docker.yml index 1d37ab8..1503cd4 100644 --- a/data/rules/docker.yml +++ b/data/rules/docker.yml @@ -12,6 +12,8 @@ rules: \} [^}]*? \} + pattern_requirements: + min_digits: 2 min_entropy: 2.0 confidence: medium examples: diff --git a/data/rules/dockerhub.yml b/data/rules/dockerhub.yml index e26d108..45c740b 100644 --- a/data/rules/dockerhub.yml +++ b/data/rules/dockerhub.yml @@ -8,6 +8,8 @@ rules: dckr_pat_[A-Z0-9_-]{27} ) (?: $ | [^A-Z0-9_-] ) + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/doppler.yml b/data/rules/doppler.yml index c8c302a..fde9282 100644 --- a/data/rules/doppler.yml +++ b/data/rules/doppler.yml @@ -4,8 +4,12 @@ rules: pattern: | (?xi) \b - (dp\.ct\.[A-Z0-9]{40,44}) + ( + dp\.ct\.[A-Z0-9]{40,44} + ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/droneci.yml b/data/rules/droneci.yml index 8e26b66..e6a4bd1 100644 --- a/data/rules/droneci.yml +++ b/data/rules/droneci.yml @@ -15,6 +15,8 @@ rules: [a-f0-9]{32,64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/dropbox.yml b/data/rules/dropbox.yml index d7d4766..d21eb80 100644 --- a/data/rules/dropbox.yml +++ b/data/rules/dropbox.yml @@ -4,11 +4,12 @@ rules: pattern: | (?xi) \b - sl\. ( - [A-Z0-9\-\_]{130,152} + sl\.[A-Z0-9\-\_]{130,152} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/duffel.yml b/data/rules/duffel.yml index 9b52965..da0a928 100644 --- a/data/rules/duffel.yml +++ b/data/rules/duffel.yml @@ -8,6 +8,8 @@ rules: duffel_(?:test|live)_[a-z0-9_\-=]{43} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.2 confidence: medium examples: diff --git a/data/rules/dynatrace.yml b/data/rules/dynatrace.yml index af8a415..29226f4 100644 --- a/data/rules/dynatrace.yml +++ b/data/rules/dynatrace.yml @@ -12,6 +12,8 @@ rules: [A-Z0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/easypost.yml b/data/rules/easypost.yml index 757a70a..0c85fb6 100644 --- a/data/rules/easypost.yml +++ b/data/rules/easypost.yml @@ -9,6 +9,8 @@ rules: [A-Za-z0-9]{54} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/elevenlabs.yml b/data/rules/elevenlabs.yml index ade342c..9378a34 100644 --- a/data/rules/elevenlabs.yml +++ b/data/rules/elevenlabs.yml @@ -2,13 +2,15 @@ rules: - name: ElevenLabs API Key id: kingfisher.elevenlabs.1 pattern: | - (?xi) - \b + (?xi) + \b ( sk_ [0-9a-f]{48} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/facebook.yml b/data/rules/facebook.yml index d3ef582..841cda9 100644 --- a/data/rules/facebook.yml +++ b/data/rules/facebook.yml @@ -11,8 +11,10 @@ rules: \b ( \d{15} - ) - \b + ) + \b + pattern_requirements: + min_digits: 15 min_entropy: 2.0 visible: false confidence: medium @@ -37,6 +39,8 @@ rules: - " var fbApiKey = '0278fc1adf6dc1d82a156f306ce2c5cc';" - ' fbApiKey: "171e84fd57f430fc59afa8fad3dbda2a",' - '"facebook appSecret = "ce3f9f0362bbe5ab01dfc8ee565e4372"' + pattern_requirements: + min_digits: 2 validation: type: Http content: @@ -69,9 +73,12 @@ rules: (?:access_token|access[\s-]token) (?:.|[\n\r]){0,32}? )? + ( + EAACEdEose0cBA[A-Z0-9]{20,} + ) \b - (EAACEdEose0cBA[A-Z0-9]{20,}) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/fastly.yml b/data/rules/fastly.yml index 77618a6..07cadce 100644 --- a/data/rules/fastly.yml +++ b/data/rules/fastly.yml @@ -13,6 +13,8 @@ rules: [a-z0-9_-]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/figma.yml b/data/rules/figma.yml index 7cc6f00..b5faf90 100644 --- a/data/rules/figma.yml +++ b/data/rules/figma.yml @@ -8,6 +8,8 @@ rules: figd_[A-Z0-9_-]{38,42} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -36,14 +38,14 @@ rules: (?xi) figma (?:.|[\n\r]){0,32}? - \b ( [0-9A-F]{4} -[0-9A-F]{8} (?:-[0-9A-F]{4}){3} -[0-9A-F]{12} ) - \b + pattern_requirements: + min_digits: 2 examples: - "--header='X-Figma-Token: 1394-0ca7a5be-8e22-40ee-8c40-778d41ab2313'" references: diff --git a/data/rules/fileio.yml b/data/rules/fileio.yml index 7bf255e..804fcd4 100644 --- a/data/rules/fileio.yml +++ b/data/rules/fileio.yml @@ -14,6 +14,8 @@ rules: \.[A-Z0-9]{20} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/finicity.yml b/data/rules/finicity.yml index 5273fbf..4a29310 100644 --- a/data/rules/finicity.yml +++ b/data/rules/finicity.yml @@ -13,6 +13,8 @@ rules: [a-f0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/finnhub.yml b/data/rules/finnhub.yml index 01e8c0a..3d71fb6 100644 --- a/data/rules/finnhub.yml +++ b/data/rules/finnhub.yml @@ -11,6 +11,8 @@ rules: [a-z0-9]{20} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/firecrawl.yml b/data/rules/firecrawl.yml index fc729c6..80665b3 100644 --- a/data/rules/firecrawl.yml +++ b/data/rules/firecrawl.yml @@ -8,6 +8,8 @@ rules: fc-[a-f0-9]{32} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 validation: diff --git a/data/rules/fireworksai.yml b/data/rules/fireworksai.yml index 0933441..f96d251 100644 --- a/data/rules/fireworksai.yml +++ b/data/rules/fireworksai.yml @@ -8,6 +8,8 @@ rules: fw_[A-Z0-9]{24} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 validation: diff --git a/data/rules/flickr.yml b/data/rules/flickr.yml index d789348..fb104eb 100644 --- a/data/rules/flickr.yml +++ b/data/rules/flickr.yml @@ -13,6 +13,8 @@ rules: [a-f0-9]{32} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 validation: @@ -43,11 +45,11 @@ rules: (?:.|[\n\r]){0,32}? (?:OAUTH|ACCESS|TOKEN)? (?:.|[\n\r]){0,32}? - \b ( [a-f0-9]{32} ) - \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 validation: diff --git a/data/rules/flyio.yml b/data/rules/flyio.yml index 98b3a6a..4764279 100644 --- a/data/rules/flyio.yml +++ b/data/rules/flyio.yml @@ -8,6 +8,8 @@ rules: FlyV1\s[A-Za-z0-9=_\-,/+]{100,} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 validation: diff --git a/data/rules/frame.io.yml b/data/rules/frame.io.yml index a5ef94b..035aefc 100644 --- a/data/rules/frame.io.yml +++ b/data/rules/frame.io.yml @@ -7,7 +7,11 @@ rules: ( fio-u-(?:[A-Z0-9_-]{16}){4} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/frameio.yml b/data/rules/frameio.yml index 1910965..cfbfe10 100644 --- a/data/rules/frameio.yml +++ b/data/rules/frameio.yml @@ -7,6 +7,9 @@ rules: ( fio-u-[a-z0-9\-_=]{64} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/freshbooks.yml b/data/rules/freshbooks.yml index 95fadce..d63c14a 100644 --- a/data/rules/freshbooks.yml +++ b/data/rules/freshbooks.yml @@ -11,6 +11,8 @@ rules: [a-z0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/friendli.yml b/data/rules/friendli.yml index ec5f3ec..6881ffc 100644 --- a/data/rules/friendli.yml +++ b/data/rules/friendli.yml @@ -8,6 +8,8 @@ rules: flp_[A-Z0-9]{46} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 validation: diff --git a/data/rules/gcp.yml b/data/rules/gcp.yml index e23acb4..30d4d1d 100644 --- a/data/rules/gcp.yml +++ b/data/rules/gcp.yml @@ -15,6 +15,8 @@ rules: "auth_provider_x509_cert_url":\s*".+?" (?:[^{}]|\{(?:[^{}]|\{[^{}]*\})*\})* \} + pattern_requirements: + min_digits: 2 min_entropy: 4.5 confidence: high examples: @@ -48,9 +50,13 @@ rules: [=:] \s{0,8} ["']? - ([0-9a-z]{35,40}) + ( + [0-9a-z]{35,40} + ) ["']? - \b + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/generic.yml b/data/rules/generic.yml index be42e5c..75d8fc5 100644 --- a/data/rules/generic.yml +++ b/data/rules/generic.yml @@ -5,9 +5,12 @@ rules: (?xi) secret .{0,20} + ( + [0-9a-z]{32,64} + ) \b - ([0-9a-z]{32,64}) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: low examples: @@ -18,9 +21,12 @@ rules: (?xi) (?: api_key | apikey | access_key | accesskey ) (?:.|[\n\r]){0,8}? + ( + [0-9a-z][0-9a-z\-._/+]{30,62}[0-9a-z] + ) \b - ([0-9a-z][0-9a-z\-._/+]{30,62}[0-9a-z]) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: low examples: @@ -31,7 +37,6 @@ rules: pattern: | (?xi) (?: username | user) - \b (?:.|[\n\r]){0,16}? (?: password | pass ) (?:.|[\n\r]){0,16}? @@ -66,7 +71,6 @@ rules: pattern: | (?xi) (?: username | user) - \b (?:.|[\n\r]){0,16}? (?: password | pass ) (?:.|[\n\r]){0,16}? @@ -89,7 +93,6 @@ rules: pattern: | (?xi) password - \b (?:.|[\n\r]){0,16}? ["'] ([^$<%@.,\s'"(){}&/\#\-][^\s'"(){}/]{4,}) (?# password ) @@ -130,7 +133,6 @@ rules: id: kingfisher.generic.6 pattern: | (?xi) - \b ( blink\d{3,6} |correcthorsebatterystaple\d{0,6} @@ -144,7 +146,6 @@ rules: |qwerty\d{3,6} |trustno\d{1,6} ) - \b min_entropy: 1.0 confidence: low examples: @@ -158,7 +159,6 @@ rules: pattern: | (?xi) (?: db_user | db_USERNAME | db_name) - \b (?:.|[\n\r]){0,8}? ["'] ([^"']{5,40}) ["'] (?:.|[\n\r]){0,32}? @@ -197,7 +197,6 @@ rules: id: kingfisher.generic.9 pattern: | (?xi) - \b ( (?P [a-z0-9._-]+ \+ [a-z0-9._-]+ ) : diff --git a/data/rules/gitalk.yml b/data/rules/gitalk.yml index 94574a4..d052855 100644 --- a/data/rules/gitalk.yml +++ b/data/rules/gitalk.yml @@ -7,6 +7,8 @@ rules: new \s+ Gitalk \s* \( \s* \{ \s* clientID: \s* '([a-f0-9]{20})', \s* clientSecret: \s* '([a-f0-9]{40})', + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 references: diff --git a/data/rules/github.yml b/data/rules/github.yml index 971f10d..3aa8d7d 100644 --- a/data/rules/github.yml +++ b/data/rules/github.yml @@ -1,23 +1,58 @@ rules: - - name: GitHub Personal Access Token + - name: GitHub Personal Access Token - fine-grained permissions id: kingfisher.github.1 + pattern: | + (?xi) + ( + github_pat_ + [A-Z0-9_+]{82,84} + ) + \b + pattern_requirements: + min_digits: 2 + min_lowercase: 2 + min_entropy: 3.5 + examples: + - "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" + references: + - https://docs.github.com/en/rest/users?apiVersion=2022-11-28 + validation: + type: Http + content: + request: + method: POST + url: https://api.github.com/graphql + headers: + Authorization: token {{ TOKEN }} + Accept: application/vnd.github+json + Content-Type: application/json + body: | + { + "query": "{ viewer { login } }" + } + response_matcher: + - report_response: true + - match_all_words: true + type: WordMatch + words: + - '"login"' + - name: GitHub Personal Access Token + id: kingfisher.github.2 pattern: | (?xi) \b - ( - (?: # for token prefixes - ghp| # Personal Access Token - gho| # OAuth Token - ghu| # GitHub App User-to-Server Token - ghs| # GitHub App Server-to-Server Token - ghr| # Refresh Token - github_pat # Alternative format for Personal Access Token - )_ - (?: # for token body - [a-z0-9_]{35,235} # 35 to 235 lowercase alphanumeric characters or underscores - ) - ) - \b + ( + ghp_(?P[A-Z0-9]{30})(?P[A-Z0-9]{6}) + ) + pattern_requirements: + min_digits: 2 + min_lowercase: 2 + checksum: + actual: + template: "{{ MATCH | suffix: 6 }}" + requires_capture: checksum + expected: "{{ BODY | crc32 | base62: 6 }}" + skip_if_missing: true min_entropy: 3.5 examples: - "GITHUB_KEY=ghp_XIxB7KMNdAr3zqWtQqhE94qglHqOzn1D1stg" @@ -25,8 +60,6 @@ rules: - | ## git developer settings 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 - - "oauth_token: gho_fq75OMU7UVbS9pTZmoCCzJT6TM5d1w099FgG" - - "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" references: - https://docs.github.com/en/rest/users?apiVersion=2022-11-28 validation: @@ -50,15 +83,21 @@ rules: words: - '"login"' - name: GitHub OAuth Access Token - id: kingfisher.github.2 + id: kingfisher.github.3 pattern: | (?xi) \b ( - gho_ - [A-Z0-9]{36} - ) - \b + gho_(?P[A-Z0-9]{30})(?P[A-Z0-9]{6}) + ) + pattern_requirements: + min_digits: 2 + checksum: + actual: + template: "{{ MATCH | suffix: 6 }}" + requires_capture: checksum + expected: "{{ BODY | crc32 | base62: 6 }}" + skip_if_missing: true min_entropy: 3.5 confidence: medium examples: @@ -86,17 +125,49 @@ rules: type: WordMatch words: - '"login"' - - name: GitHub App Token - id: kingfisher.github.3 + - name: GitHub App User-to-Server Token + id: kingfisher.github.4 pattern: | (?xi) - \b ( - (?:ghu|ghs)_[A-Z0-9]{36} + ghu_(?P[A-Z0-9]{30})(?P[A-Z0-9]{6}) ) - \b examples: - ' "token": "ghu_16C7e42F292c69C2E7C10c838347Ae178B4a",' + - | + Example usage: + git clone http://ghu_RguXIkihJjwHAP6eXEYxaPNvywurTr5IOAbg@github.com/username/repo.git + references: + - https://docs.github.com/en/rest/users?apiVersion=2022-11-28 + validation: + type: Http + content: + request: + method: POST + url: https://api.github.com/graphql + headers: + Authorization: token {{ TOKEN }} + Accept: application/vnd.github+json + Content-Type: application/json + body: | + { + "query": "{ viewer { login } }" + } + response_matcher: + - report_response: true + - match_all_words: true + type: WordMatch + words: + - '"login"' + - name: GitHub App Server-to-Server Token + id: kingfisher.github.5 + pattern: | + (?xi) + ( + ghs_(?P[A-Z0-9]{30})(?P[A-Z0-9]{6}) + ) + examples: + - ' "token": "ghs_16C7e42F292c69C2E7C10c838347Ae178B4a",' - | Example usage: git clone http://ghs_RguXIkihJjwHAP6eXEYxaPNvywurTr5IOAbg@github.com/username/repo.git @@ -123,14 +194,12 @@ rules: words: - '"login"' - name: GitHub Refresh Token - id: kingfisher.github.4 + id: kingfisher.github.6 pattern: | (?xi) - \b ( - ghr_[A-Z0-9]{76} + ghr_(?P[A-Z0-9]{30})(?P[A-Z0-9]{6}) ) - \b examples: - ' "refresh_token": "ghr_1B4a2e77838347a7E420ce178F2E7c6912E169246c3CE1ccbF66C46812d16D5B1A9Dc86A1498",' references: @@ -156,7 +225,7 @@ rules: words: - '"login"' - name: GitHub Client ID - id: kingfisher.github.5 + id: kingfisher.github.7 pattern: | (?xi) (?:github) @@ -172,16 +241,18 @@ rules: GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7 GITHUB_SECRET=37d02377a3e9d849e18704c3ec883f9c5787d857 - name: GitHub Secret Key - id: kingfisher.github.6 + id: kingfisher.github.8 pattern: | (?xi) github - .? - (?: api | app | application | client | consumer | customer | secret | key ) - .? - (?: key | oauth | sec | secret )? - .{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2} - \b ([a-z0-9]{40}) \b + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [a-z0-9]{40} + ) + \b depends_on_rule: - rule_id: "kingfisher.github.5" variable: GITHUB_CLIENT_ID @@ -206,34 +277,3 @@ rules: - | GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7 GITHUB_SECRET=37d02377a3e9d849e18704c3ec883f9c5787d857 - - name: GitHub Personal Access Token (fine-grained permissions) - id: kingfisher.github.7 - pattern: | - (?xi) - \b - ( - github_pat_[0-9A-Z_]{82} - ) - \b - examples: - - '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' - validation: - type: Http - content: - request: - method: POST - url: https://api.github.com/graphql - headers: - Authorization: token {{ TOKEN }} - Accept: application/vnd.github+json - Content-Type: application/json - body: | - { - "query": "{ viewer { login } }" - } - response_matcher: - - report_response: true - - match_all_words: true - type: WordMatch - words: - - '"login"' \ No newline at end of file diff --git a/data/rules/gitlab.yml b/data/rules/gitlab.yml index 1cdf48c..96a98bc 100644 --- a/data/rules/gitlab.yml +++ b/data/rules/gitlab.yml @@ -2,12 +2,15 @@ rules: - name: GitLab Private Token id: kingfisher.gitlab.1 pattern: | - (?xi) - \b + (?xi) + \b ( glpat- [0-9A-Z_-]{20} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -42,6 +45,8 @@ rules: GR1348941[0-9A-Z_-]{20} ) \b + pattern_requirements: + min_digits: 2 examples: - | sudo gitlab-runner register \ @@ -85,7 +90,8 @@ rules: ( glptt-[0-9a-f]{40} ) - \b + pattern_requirements: + min_digits: 2 examples: - | curl \ @@ -117,11 +123,14 @@ rules: - name: GitLab Private Token - Updated Format id: kingfisher.gitlab.4 pattern: | - (?x) + (?x) \b ( glpat-[A-Za-z0-9_-]{36,38}\.01\.[a-z0-9]{9} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/gitter.yml b/data/rules/gitter.yml index e2b261c..3da37a3 100644 --- a/data/rules/gitter.yml +++ b/data/rules/gitter.yml @@ -11,6 +11,8 @@ rules: [a-z0-9_-]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.2 confidence: medium examples: diff --git a/data/rules/gocardless.yml b/data/rules/gocardless.yml index e261372..525b20b 100644 --- a/data/rules/gocardless.yml +++ b/data/rules/gocardless.yml @@ -13,7 +13,11 @@ rules: (?:[A-Z0-9=_-]{8}){3} [A-Z0-9=_-]{0,2} ) - \b + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/google.yml b/data/rules/google.yml index d7d4aed..51d5103 100644 --- a/data/rules/google.yml +++ b/data/rules/google.yml @@ -15,9 +15,12 @@ rules: id: kingfisher.google.2 pattern: | (?xi) - \b (GOCSPX-[A-Z0-9_-]{28}) (?:[^A-Z0-9_-] | $) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -28,9 +31,14 @@ rules: pattern: | (?xi) client.?secret .{0,10} - \b - ([a-z0-9_-]{24}) + ( + [a-z0-9_-]{24} + ) (?: [^a-z0-9_-] |$) + pattern_requirements: + min_digits: 4 + min_uppercase: 3 + min_lowercase: 3 min_entropy: 3.3 confidence: medium examples: @@ -42,9 +50,12 @@ rules: id: kingfisher.google.4 pattern: | (?xi) - \b (ya29\.[0-9A-Z_-]{20,1024}) - (?: [^0-9A-Z_-]|$) + (?: [^0-9A-Z_-]) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -66,12 +77,10 @@ rules: id: kingfisher.google.6 pattern: | (?xi) - \b ([0-9]+-[a-z0-9_]{32}\.apps\.googleusercontent\.com) (?: (?s).{0,40} ) - \b (?: (GOCSPX-[A-Z0-9_-]{28}) | @@ -80,6 +89,10 @@ rules: ) ) (?:[^A-Z0-9_-] | $) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: @@ -98,11 +111,16 @@ rules: id: kingfisher.google.7 pattern: | (?xi) + \b ( AIza [A-Za-z0-9_-]{35} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/grafana.yml b/data/rules/grafana.yml index 4cb5461..a7c9726 100644 --- a/data/rules/grafana.yml +++ b/data/rules/grafana.yml @@ -8,6 +8,8 @@ rules: eyJrIjoi[a-z0-9]{60,100} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -20,12 +22,15 @@ rules: id: kingfisher.grafana.2 pattern: | (?xi) - \b + \b ( glc_ [a-z0-9+/]{40,150} ={0,2} ) + pattern_requirements: + min_digits: 2 + min_lowercase: 2 min_entropy: 3.3 confidence: medium examples: @@ -56,6 +61,8 @@ rules: glsa_[A-Z0-9]{32}_[A-F0-9]{8} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/groq.yml b/data/rules/groq.yml index b232ddd..deab409 100644 --- a/data/rules/groq.yml +++ b/data/rules/groq.yml @@ -8,6 +8,8 @@ rules: gsk_[a-zA-Z0-9]{52} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.5 validation: diff --git a/data/rules/hashes.yml b/data/rules/hashes.yml index 60b0f4a..bf6f5af 100644 --- a/data/rules/hashes.yml +++ b/data/rules/hashes.yml @@ -7,6 +7,8 @@ rules: - https://unix.stackexchange.com/a/511017 - https://hashcat.net/wiki/doku.php?id=example_hashes - https://passwordvillage.org/salted.html#md5crypt + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: # generated with `openssl passwd -1 -salt 'OKgLCmVl' 'a'` @@ -38,6 +40,8 @@ rules: \$ [./A-Za-z0-9]{8,16} \$ [./A-Za-z0-9]{43} ) + pattern_requirements: + min_digits: 2 references: - https://en.wikipedia.org/wiki/Crypt_(C)#Key_derivation_functions_supported_by_crypt - https://hashcat.net/wiki/doku.php?id=example_hashes @@ -58,6 +62,8 @@ rules: \$ [./A-Za-z0-9]{8,16} \$ [./A-Za-z0-9]{86} ) + pattern_requirements: + min_digits: 2 references: - https://en.wikipedia.org/wiki/Crypt_(C)#Key_derivation_functions_supported_by_crypt - https://hashcat.net/wiki/doku.php?id=example_hashes @@ -76,6 +82,8 @@ rules: \$ [./A-Za-z0-9]{8,16} \$ [./A-Za-z0-9]{43} ) + pattern_requirements: + min_digits: 2 references: - https://en.wikipedia.org/wiki/Crypt_(C)#Key_derivation_functions_supported_by_crypt - https://hashcat.net/wiki/doku.php?id=example_hashes @@ -96,7 +104,8 @@ rules: [0-9a-f]{32} \$ [0-9a-f]{64,} ) - \b + pattern_requirements: + min_digits: 2 references: - https://hashcat.net/wiki/doku.php?id=example_hashes min_entropy: 3.3 diff --git a/data/rules/hashicorp.yml b/data/rules/hashicorp.yml index 20adfc1..b6a0de1 100644 --- a/data/rules/hashicorp.yml +++ b/data/rules/hashicorp.yml @@ -5,16 +5,20 @@ rules: pattern: | (?x) (?i: hashicorp | vault | token | key | secret ) - ["':=\ ]{0,5} + (?:.|[\n\r]){0,32}? \b - (s\.[A-Za-z0-9_-]{24,128}) - (?: [^A-Za-z0-9_-] | $ ) + ( + s\.[A-Za-z0-9_-]{24,128} + ) + \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 examples: - 'VAULT_CLIENT_TOKEN="s.Z4bTMtngfLeQ18AqVoBBkUAOD1"' - - '`-vaultToken s.CAESIP2jTxc9S3K7Z6CtcFWQv7-044m_oSsxkingfisher.0H3nF89l3GiYKHGh3cy5sQmlIZVNyTWJNcDRsYWJpQjlhYjVlb2cQh6PL8wEYAg"`' + - 'vaultToken="s.CAESIP2jTxc9S3K7Z6CtcFWQv7-044m_oS.0H3nF89l3GiYKHGh3cy5sQmlIZVNyTWJNcDRsYWJpQjlhYjVlb2cQh6PL8wEYAg"`' references: - https://developer.hashicorp.com/vault/docs/concepts/tokens @@ -26,9 +30,10 @@ rules: (?x) (?i: hashicorp | vault | token | key | secret ) ["':=\ ]{0,5} - \b (b\.[A-Za-z0-9_-]{24,500}) (?: [^A-Za-z0-9_-] | $ ) + pattern_requirements: + min_digits: 2 examples: - 'VAULT_CLIENT_TOKEN="b.Z4bTMtngfLeQ18AqVoBBkUAOD1"' @@ -44,9 +49,10 @@ rules: (?x) (?i: hashicorp | vault | token | key | secret ) ["':=\ ]{0,5} - \b (r\.[A-Za-z0-9_-]{24,500}) (?: [^A-Za-z0-9_-] | $ ) + pattern_requirements: + min_digits: 2 examples: - 'VAULT_CLIENT_TOKEN="r.Z4bTMtngfLeQ18AqVoBBkUAOD1"' @@ -62,9 +68,10 @@ rules: pattern: | (?x) - \b (hvs\.[A-Za-z0-9]{24,130}) (?: [^A-Za-z0-9_-] | $ ) + pattern_requirements: + min_digits: 2 examples: - "apikey: hvs.JGbZZaCkOSgsZ56uhGlTK2zyC1j2mwhy0VLp4" @@ -79,9 +86,10 @@ rules: pattern: | (?x) - \b (hvb\.[A-Za-z0-9_-]{24,500}) (?: [^A-Za-z0-9_-] | $ ) + pattern_requirements: + min_digits: 2 examples: - "apikey: hvb.JGbZZaCkOSgsZ56uhGlTK2zyC1j2mwhy0VLp4" @@ -98,9 +106,10 @@ rules: pattern: | (?x) - \b (hvr\.[A-Za-z0-9]{24,130}) (?: [^A-Za-z0-9_-] | $ ) + pattern_requirements: + min_digits: 2 examples: - "apikey: hvr.JGbZZaCkOSgsZ56uhGlTK2zyC1j2mwhy0VLp4" @@ -119,9 +128,10 @@ rules: (?i: unseal ) \b .{1,10} - \b ([a-zA-Z0-9+/]{44}) (?: [^a-zA-Z0-9+/] | $ ) + pattern_requirements: + min_digits: 2 examples: - "Unseal Key 2: 0tZn+7QQCxphpHwTm7/dC3LpP5JGIbYl3PK8Sy81R+P2" diff --git a/data/rules/heroku.yml b/data/rules/heroku.yml index b06d58e..96e94e4 100644 --- a/data/rules/heroku.yml +++ b/data/rules/heroku.yml @@ -5,13 +5,15 @@ rules: (?xi) \b heroku - (?:.|[\n\r]){0,32}? + (?:.|[\n\r]){0,32}? \b ( [0-9a-f]{8}-[0-9a-f]{4}- [0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: @@ -35,10 +37,11 @@ rules: id: kingfisher.heroku.2 pattern: | (?xi) - \b ( HRKU-[A-Z0-9_]{60} ) + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 validation: diff --git a/data/rules/hubspot.yml b/data/rules/hubspot.yml index 2840203..5cb5372 100644 --- a/data/rules/hubspot.yml +++ b/data/rules/hubspot.yml @@ -8,6 +8,8 @@ rules: pat-[a-z0-9]{2,3}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 validation: diff --git a/data/rules/huggingface.yml b/data/rules/huggingface.yml index d103438..c70b993 100644 --- a/data/rules/huggingface.yml +++ b/data/rules/huggingface.yml @@ -3,7 +3,6 @@ rules: id: kingfisher.huggingface.1 pattern: | (?xi) - \b (?: ( (?:api_org|hf)_ @@ -11,6 +10,8 @@ rules: ) ) \b + pattern_requirements: + min_digits: 2 references: - https://huggingface.co/docs/hub/security-tokens min_entropy: 3.3 diff --git a/data/rules/ibm.yml b/data/rules/ibm.yml index 55a33be..390d927 100644 --- a/data/rules/ibm.yml +++ b/data/rules/ibm.yml @@ -8,17 +8,17 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [0-9A-Z_-]{42,44} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium - examples: - ibmcloud_apikey = abcdef0123_56789abcdef0123456789abcdef01234 - ibm_platform_key="f-_RrJDVnuVh07HNTcmnQx_b6CbcQsxmEarVm9P_RWtF" - references: - https://cloud.ibm.com/docs/account?topic=account-userapikey - https://cloud.ibm.com/apidocs/iam-identity-token-api diff --git a/data/rules/infracost.yml b/data/rules/infracost.yml index 598b6c3..42dc8d6 100644 --- a/data/rules/infracost.yml +++ b/data/rules/infracost.yml @@ -8,6 +8,8 @@ rules: ico-[a-z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/intercom.yml b/data/rules/intercom.yml index e521759..0b408ae 100644 --- a/data/rules/intercom.yml +++ b/data/rules/intercom.yml @@ -7,15 +7,18 @@ rules: (?:.|[\n\r]){0,16}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,16}? + \b ( [0-9A-Z+/]{59}= ) + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: - "intercom_access_token: dG9rOvI0NmJlMTA5XzQwM2NfNDVlM184MjQzXzkwMDnmOTE1NGIyONoxOjA=" - - ic_token = "g1ZsclJXTjNfc1pBSzJDemE0eFVDU0U5c25CeDN4Vm9hQ2Zac0hXemZHNGVDPQ==" + - ic_token = "g1ZsclJXTjNfc1pBSzJDemE0eFVDU0U5c25CeDN4Vm9hQ2Zac0hXemZHNPQ==" references: - https://developers.intercom.com/docs/build-an-integration/learn-more/rest-apis diff --git a/data/rules/ionic.yml b/data/rules/ionic.yml index 1130a38..b37b83b 100644 --- a/data/rules/ionic.yml +++ b/data/rules/ionic.yml @@ -7,8 +7,10 @@ rules: ( ion_ [a-z0-9]{42} - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/ipstack.yml b/data/rules/ipstack.yml index a70e43e..1dbb576 100644 --- a/data/rules/ipstack.yml +++ b/data/rules/ipstack.yml @@ -13,6 +13,8 @@ rules: (?:[0-9a-f]{16}){2} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/jenkins.yml b/data/rules/jenkins.yml index f8fbb77..36a1d49 100644 --- a/data/rules/jenkins.yml +++ b/data/rules/jenkins.yml @@ -2,7 +2,8 @@ rules: - name: Jenkins Token or Crumb id: kingfisher.jenkins.1 pattern: '(?i)jenkins.{0,10}(?:crumb)?.{0,10}\b([0-9a-f]{32,36})\b' - categories: [api, fuzzy, secret] + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -17,8 +18,6 @@ rules: export JENKINS=jenkins-cicd.apps.sno.openshiftlabs.net - | sh "curl -X POST 'http://jenkins.lsfusion.luxsoft.by/job/${Paths.updateParentVersionsJob}/build' --user ${USERPASS} -H 'Jenkins-Crumb:440561953171ba44ace9740562d172bb'" - negative_examples: - - '1. ~~Does not play well with [Build Token Root Plugin](https://wiki.jenkins-ci.org/display/JENKINS/Build+Token+Root+Plugin) URL formats.~~ (added with [this commit](https://github.com/morficus/Parameterized-Remote-Trigger-Plugin/commit/f687dbe75d1c4f39f7e14b68220890384d7c5674) )' references: - https://www.jenkins.io/blog/2018/07/02/new-api-token-system/ - https://www.jenkins.io/doc/book/security/csrf-protection/ \ No newline at end of file diff --git a/data/rules/jina.yml b/data/rules/jina.yml index efe5b10..807f5a7 100644 --- a/data/rules/jina.yml +++ b/data/rules/jina.yml @@ -4,8 +4,12 @@ rules: pattern: | (?x) \b - (jina_[a-zA-Z0-9]{60}) + ( + jina_[a-zA-Z0-9]{60} + ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 examples: diff --git a/data/rules/jira.yml b/data/rules/jira.yml index a555d85..2be3a53 100644 --- a/data/rules/jira.yml +++ b/data/rules/jira.yml @@ -2,11 +2,14 @@ rules: - name: Jira Domain id: kingfisher.jira.1 pattern: | - (?xi) + (?xi) + \b ( [a-z][a-z0-9-]{5,24}\.atlassian\.net ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 visible: false confidence: medium @@ -26,8 +29,12 @@ rules: \b ( [a-z0-9-]{24} - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/jwt.yml b/data/rules/jwt.yml index b237b4c..cbd5b46 100644 --- a/data/rules/jwt.yml +++ b/data/rules/jwt.yml @@ -3,7 +3,6 @@ rules: id: kingfisher.jwt.1 pattern: | (?x) - \b ( (?:ey|ewogIC)[A-Za-z0-9_-]{12,} (?# header ) \. @@ -11,7 +10,9 @@ rules: \. [A-Za-z0-9_-]{12,} (?# signature ) ) - (?:[^A-Z0-9_-]|$) (?# this instead of a \b anchor because that doesn't play nicely with `-` ) + (?:[^A-Z0-9_-]) + pattern_requirements: + min_digits: 4 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/kagi.yml b/data/rules/kagi.yml index 17732af..b1d311c 100644 --- a/data/rules/kagi.yml +++ b/data/rules/kagi.yml @@ -3,12 +3,12 @@ rules: id: kingfisher.kagi.1 pattern: | (?x)(?s) - \b (?: kagi | KAGI ) .{0,100} - \b ( [a-zA-Z0-9_-]{11}\.[a-zA-Z0-9_-]{43} ) (?: $ | [^a-zA-Z0-9_-] ) + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 references: diff --git a/data/rules/kickbox.yml b/data/rules/kickbox.yml index 35a4e38..897d25d 100644 --- a/data/rules/kickbox.yml +++ b/data/rules/kickbox.yml @@ -13,6 +13,8 @@ rules: [A-Z0-9_]+[A-Z0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/langchain.yml b/data/rules/langchain.yml index cd853e2..6f4da05 100644 --- a/data/rules/langchain.yml +++ b/data/rules/langchain.yml @@ -8,6 +8,8 @@ rules: lsv2_(?:pt)_[0-9a-f]{32}_[0-9a-f]{10} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 examples: - "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" @@ -34,6 +36,8 @@ rules: lsv2_sk_[0-9a-f]{32}_[0-9a-f]{10} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 examples: - "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" diff --git a/data/rules/launchdarkly.yml b/data/rules/launchdarkly.yml index f6d15d5..26a27d3 100644 --- a/data/rules/launchdarkly.yml +++ b/data/rules/launchdarkly.yml @@ -9,6 +9,8 @@ rules: ( [a-z0-9_\-=]{40} ) + pattern_requirements: + min_digits: 2 min_entropy: 3.2 confidence: medium examples: diff --git a/data/rules/line.yml b/data/rules/line.yml index c3b43dd..fc6ec4e 100644 --- a/data/rules/line.yml +++ b/data/rules/line.yml @@ -12,6 +12,8 @@ rules: ( (?:[0-9A-Z+/]{57}){3}=? ) + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/linear.yml b/data/rules/linear.yml index fdaa30d..3b73105 100644 --- a/data/rules/linear.yml +++ b/data/rules/linear.yml @@ -7,8 +7,10 @@ rules: ( lin_api_ (?:[0-9A-Z]{8}){5} - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/linkedin.yml b/data/rules/linkedin.yml index 7663c28..cd293b7 100644 --- a/data/rules/linkedin.yml +++ b/data/rules/linkedin.yml @@ -10,6 +10,8 @@ rules: (?: id | identifier | key ) .{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2} \b ([a-z0-9]{12,14}) \b + pattern_requirements: + min_digits: 2 references: - https://docs.microsoft.com/en-us/linkedin/shared/api-guide/best-practices/secure-applications min_entropy: 2.5 @@ -44,6 +46,8 @@ rules: (?: key | oauth | sec | secret )? .{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2} \b ([a-z0-9]{16}) \b + pattern_requirements: + min_digits: 2 references: - https://docs.microsoft.com/en-us/linkedin/shared/api-guide/best-practices/secure-applications min_entropy: 3.3 diff --git a/data/rules/lob.yml b/data/rules/lob.yml index 152c445..deb8800 100644 --- a/data/rules/lob.yml +++ b/data/rules/lob.yml @@ -10,6 +10,8 @@ rules: (?:live|test)_[a-f0-9]{35} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -42,6 +44,8 @@ rules: (?:test|live)_pub_[a-f0-9]{31} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/mailchimp.yml b/data/rules/mailchimp.yml index ffa5d3a..c3f6962 100644 --- a/data/rules/mailchimp.yml +++ b/data/rules/mailchimp.yml @@ -13,6 +13,9 @@ rules: (?:[0-9a-f]{8}){4} -us\d{1,2} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/mailgun.yml b/data/rules/mailgun.yml index 06a02c2..5181f3b 100644 --- a/data/rules/mailgun.yml +++ b/data/rules/mailgun.yml @@ -11,7 +11,11 @@ rules: \b ( (?:[0-9A-Z-]{24}){3} - ) + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: @@ -41,6 +45,8 @@ rules: key-(?:[0-9a-f]{8}){4} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/mandrill.yml b/data/rules/mandrill.yml index 4abd63d..857fbeb 100644 --- a/data/rules/mandrill.yml +++ b/data/rules/mandrill.yml @@ -11,7 +11,11 @@ rules: \b ( (?:[0-9A-Z_-]{11}){2} - ) + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/mapbox.yml b/data/rules/mapbox.yml index d0253d6..313eb9e 100644 --- a/data/rules/mapbox.yml +++ b/data/rules/mapbox.yml @@ -1,7 +1,9 @@ rules: - name: Mapbox Public Access Token id: kingfisher.mapbox.1 - pattern: '(?i)(?s)mapbox.{0,30}(pk\.[a-z0-9\-+/=]{32,128}\.[a-z0-9\-+/=]{20,30})(?:[^a-z0-9\-+/=]|$)' + pattern: '(?i)(?s)mapbox.{0,30}(pk\.[a-z0-9\-+/=]{32,128}\.[a-z0-9\-+/=]{20,30})\b' + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -26,7 +28,15 @@ rules: - name: Mapbox Secret Access Token id: kingfisher.mapbox.2 - pattern: '(?i)(?s)mapbox.{0,30}(sk\.[a-z0-9\-+/=]{32,128}\.[a-z0-9\-+/=]{20,30})(?:[^a-z0-9\-+/=]|$)' + pattern: | + (?xi)(?s) + mapbox.{0,30} + ( + sk\.[a-z0-9\-+/=]{32,128}\.[a-z0-9\-+/=]{20,30} + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -50,7 +60,9 @@ rules: - name: Mapbox Temporary Access Token id: kingfisher.mapbox.3 - pattern: '(?i)(?s)mapbox.{0,30}(tk\.[a-z0-9\-+/=]{32,128}\.[a-z0-9\-+/=]{20,30})(?:[^a-z0-9\-+/=]|$)' + pattern: '(?i)(?s)mapbox.{0,30}(tk\.[a-z0-9\-+/=]{32,128}\.[a-z0-9\-+/=]{20,30})\b' + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/mattermost.yml b/data/rules/mattermost.yml index 564adb6..ef69c78 100644 --- a/data/rules/mattermost.yml +++ b/data/rules/mattermost.yml @@ -34,6 +34,8 @@ rules: [A-Z0-9]{26} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 examples: diff --git a/data/rules/maxmind.yml b/data/rules/maxmind.yml index c0839b8..9952118 100644 --- a/data/rules/maxmind.yml +++ b/data/rules/maxmind.yml @@ -8,6 +8,8 @@ rules: [a-z0-9]{6}_[a-z0-9]{29}_mmk ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.8 confidence: medium examples: @@ -42,11 +44,9 @@ rules: (?:.|[\n\r]){0,10}? (?:id|number) (?:.|[\n\r]){0,16}? - \b ( \d{4,8} ) - \b min_entropy: 2.0 confidence: medium visible: false diff --git a/data/rules/messagebird.yml b/data/rules/messagebird.yml index 8ae1a58..4b89531 100644 --- a/data/rules/messagebird.yml +++ b/data/rules/messagebird.yml @@ -6,10 +6,13 @@ rules: \b message[_-]?bird (?:.|[\n\r]){0,32}? + \b ( [a-z0-9]{25} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.4 confidence: medium examples: diff --git a/data/rules/microsoft_teams.yml b/data/rules/microsoft_teams.yml index 37e4030..1f7ac49 100644 --- a/data/rules/microsoft_teams.yml +++ b/data/rules/microsoft_teams.yml @@ -26,6 +26,8 @@ rules: [0-9a-f]{4}- [0-9a-f]{12} ) + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/microsoftteamswebhook.yml b/data/rules/microsoftteamswebhook.yml index 4da1c1d..952f1b9 100644 --- a/data/rules/microsoftteamswebhook.yml +++ b/data/rules/microsoftteamswebhook.yml @@ -3,17 +3,23 @@ rules: id: kingfisher.microsoftteamswebhook.1 pattern: | (?xi) - https://[A-Z0-9]+\.webhook\.office\.com/webhookb2 - / - [A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12} - @ - [A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12} - / - IncomingWebhook - / - [A-Z0-9]{32} - / - [A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12} + \b + ( + https://[A-Z0-9]+\.webhook\.office\.com/webhookb2 + / + [A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12} + @ + [A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12} + / + IncomingWebhook + / + [A-Z0-9]{32} + / + [A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12} + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/mistral.yml b/data/rules/mistral.yml index 07b1af8..3e451da 100644 --- a/data/rules/mistral.yml +++ b/data/rules/mistral.yml @@ -13,6 +13,8 @@ rules: [A-Z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/mongodb.yml b/data/rules/mongodb.yml index dc02e94..28b4031 100644 --- a/data/rules/mongodb.yml +++ b/data/rules/mongodb.yml @@ -11,6 +11,7 @@ rules: .{0,1000}? (?:private|priv|secret|auth|pass|key) (?:.|[\n\r]){0,32}? + \b ( [a-fA-F0-9]{8} - @@ -21,7 +22,12 @@ rules: [a-fA-F0-9]{4} - [a-fA-F0-9]{12} - ) + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.7 examples: - ATLAS_PRIVATE_KEY=4b18315e-6b7d-4337-b449-5d38f5a189ec @@ -90,7 +96,6 @@ rules: ( mdb_sa_sk_[0-9A-Z_-]{6}[0-9A-Z]{34} ) - \b min_entropy: 3.5 examples: - mdb_sa_sk_BdIX_jLzut2WTgglKzKvSgWMDDj5hEoTqdwOyLOL diff --git a/data/rules/nasa.yml b/data/rules/nasa.yml index efbcc42..1509b2e 100644 --- a/data/rules/nasa.yml +++ b/data/rules/nasa.yml @@ -8,10 +8,13 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? + \b ( [A-Z0-9]{40} ) \b + pattern_requirements: + min_digits: 2 examples: - | get('https://api.nasa.gov/planetary/earth/imagery?api_key=fWfSMcDzyHfMuH8BW6jiIUBYaj0hKRyKBRTBqgEQ') diff --git a/data/rules/netlify.yml b/data/rules/netlify.yml index 0a29d25..7fae1eb 100644 --- a/data/rules/netlify.yml +++ b/data/rules/netlify.yml @@ -8,8 +8,12 @@ rules: (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? \b - ([a-f0-9]{60,64}) + ( + [a-f0-9]{60,64} + ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 examples: - netlify_token=3cdfad7b885a6daceff3fb820389115750b373763fb30b10ca0382648b55872d @@ -41,6 +45,8 @@ rules: [A-Z0-9_-]{43,45} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/newrelic.yml b/data/rules/newrelic.yml index d9a582c..c208aa0 100644 --- a/data/rules/newrelic.yml +++ b/data/rules/newrelic.yml @@ -14,6 +14,9 @@ rules: - [A-Z0-9_.]{42} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/ngrok.yml b/data/rules/ngrok.yml index 56f1b62..be45f08 100644 --- a/data/rules/ngrok.yml +++ b/data/rules/ngrok.yml @@ -7,8 +7,11 @@ rules: (?:.|[\\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b - (?:[a-z0-9]{25,30}_\d[a-z0-9]{20}|(?:cr_|ak_)[a-z0-9]{25,30}) + ( + (?:[a-z0-9]{25,30}_\d[a-z0-9]{20} + | + (?:cr_|ak_)[a-z0-9]{25,30}) + ) \b min_entropy: 4 examples: diff --git a/data/rules/notion.yml b/data/rules/notion.yml index 642128e..2682c8c 100644 --- a/data/rules/notion.yml +++ b/data/rules/notion.yml @@ -10,6 +10,8 @@ rules: secret_[A-Z0-9]{43} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 confidence: medium examples: @@ -38,11 +40,9 @@ rules: (?xi) notion (?:.|[\\n\r]){0,32}? - \b ( ntn_[A-Z0-9]{40,55} ) - \b min_entropy: 4.0 confidence: medium references: @@ -74,11 +74,9 @@ rules: (?xi) notion (?:.|[\\n\r]){0,32}? - \b ( nrt_[A-Z0-9_]{40,55} ) - \b min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/npm.yml b/data/rules/npm.yml index 5d2e8c8..6132d80 100644 --- a/data/rules/npm.yml +++ b/data/rules/npm.yml @@ -5,9 +5,17 @@ rules: (?xi) \b ( - npm_[A-Z0-9]{36} + npm_(?P[A-Za-z0-9]{30})(?P[A-Za-z0-9]{6}) ) \b + pattern_requirements: + min_digits: 2 + checksum: + actual: + template: "{{ MATCH | suffix: 6 }}" + requires_capture: checksum + expected: "{{ BODY | crc32 | base62: 6 }}" + skip_if_missing: true references: - https://docs.npmjs.com/about-access-tokens - https://github.com/github/roadmap/issues/557 @@ -15,7 +23,7 @@ rules: min_entropy: 3.3 confidence: medium examples: - - 'npm_TCllNwh2WLQlMWVhybM1iQrsTj6rMQ0BOh6d' + - "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" validation: type: Http content: @@ -35,7 +43,6 @@ rules: id: kingfisher.npm.2 pattern: | (?xi) - \b (?:_authToken|NPM_TOKEN) (?:.|[\n\r]){0,16}? ( diff --git a/data/rules/nuget.yml b/data/rules/nuget.yml index 663a415..3ba909e 100644 --- a/data/rules/nuget.yml +++ b/data/rules/nuget.yml @@ -8,6 +8,8 @@ rules: oy2[a-z0-9]{43} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -40,11 +42,9 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [a-z0-9]{46} ) - \b min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/nvidia.yml b/data/rules/nvidia.yml index 1dc7b31..3143a80 100644 --- a/data/rules/nvidia.yml +++ b/data/rules/nvidia.yml @@ -8,6 +8,8 @@ rules: nvapi-[A-Z0-9_-]{60,70} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 examples: diff --git a/data/rules/nytimes.yml b/data/rules/nytimes.yml index 3c0d08a..391dd3f 100644 --- a/data/rules/nytimes.yml +++ b/data/rules/nytimes.yml @@ -10,6 +10,8 @@ rules: [a-z0-9_\-=]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.2 confidence: medium examples: diff --git a/data/rules/odbc.yml b/data/rules/odbc.yml index 6593f92..6f60c6a 100644 --- a/data/rules/odbc.yml +++ b/data/rules/odbc.yml @@ -6,6 +6,10 @@ rules: (?: User | User\ Id | UserId | Uid) \s*=\s* ([^\s;]{3,100}) \s* ; [\ \t]* .{0,10} [\ \t]* (?: Password | Pwd) \s*=\s* ([^\t\ ;]{3,100}) \s* (?: [;] | $) + pattern_requirements: + ignore_if_contains: + - "localhost" + - "127.0.0.1" min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/okta.yml b/data/rules/okta.yml index 65490b4..16511f4 100644 --- a/data/rules/okta.yml +++ b/data/rules/okta.yml @@ -10,6 +10,9 @@ rules: ( 00[a-z0-9_-]{39}[a-z0-9_] ) + \b + pattern_requirements: + min_digits: 4 min_entropy: 3.3 examples: - okta_api_token=00hqNORUpnTcdFWA5WEM4YwOkw6RXeFw21lFDRKmY1 @@ -44,7 +47,6 @@ rules: id: kingfisher.okta.2 pattern: | (?xi) - \b ( [a-z0-9-]{1,40}\.okta(?:preview|-emea)?\.com ) diff --git a/data/rules/ollama.yml b/data/rules/ollama.yml index a2df4ab..ba686c6 100644 --- a/data/rules/ollama.yml +++ b/data/rules/ollama.yml @@ -10,6 +10,8 @@ rules: ( [a-f0-9]{32}\.[a-zA-Z0-9_-]{24} ) + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 validation: diff --git a/data/rules/onepassword.yml b/data/rules/onepassword.yml index b7cc0bc..e7f6183 100644 --- a/data/rules/onepassword.yml +++ b/data/rules/onepassword.yml @@ -6,7 +6,10 @@ rules: \b ( ops_eyj[A-Za-z0-9_-]{80,500} - )\b + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 confidence: medium examples: @@ -37,11 +40,11 @@ rules: id: kingfisher.1password.2 pattern: | (?xi) - \b ( A[0-9]-[A-Z0-9]{6}-[A-Z0-9]{6}-[A-Z0-9]{5}(?:-[A-Z0-9]{5}){3} ) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.8 confidence: medium examples: diff --git a/data/rules/openai.yml b/data/rules/openai.yml index d0b01cb..b6f90e0 100644 --- a/data/rules/openai.yml +++ b/data/rules/openai.yml @@ -8,6 +8,8 @@ rules: sk-[A-Z0-9]{48} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -38,6 +40,8 @@ rules: (sk-(?:proj|svcacct|None)-[A-Z0-9_-]{100,}) ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 confidence: medium examples: diff --git a/data/rules/openweathermap.yml b/data/rules/openweathermap.yml index c6b0251..f2b2743 100644 --- a/data/rules/openweathermap.yml +++ b/data/rules/openweathermap.yml @@ -1,13 +1,9 @@ rules: - # --------------------------------------------------------------------- - # 1. OpenWeather Map API Key (detector unchanged, new validation) - # --------------------------------------------------------------------- - name: OpenWeather Map API Key id: kingfisher.openweather.1 pattern: | (?xi) - \b - (?:pyowm|openweather|owm\b) + (?:pyowm|openweather|\bowm\b) (?:.|[\n\r]){0,64}? \b ( @@ -17,6 +13,8 @@ rules: [a-z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/opsgenie.yml b/data/rules/opsgenie.yml index 012f296..4369d2f 100644 --- a/data/rules/opsgenie.yml +++ b/data/rules/opsgenie.yml @@ -12,20 +12,29 @@ rules: ( [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - opsgenie_api_key = '12345678-9abc-def0-1234-56789abcdef0' + references: + - https://docs.opsgenie.com/docs/api-overview + - https://support.atlassian.com/security-and-access-policies/docs/send-alerts-to-opsgenie/ + - https://support.atlassian.com/opsgenie/docs/european-service-region/ validation: type: Http content: request: - headers: - Authorization: GenieKey {{ TOKEN }} method: GET url: https://api.opsgenie.com/v2/alerts + headers: + Authorization: "GenieKey {{ TOKEN }}" response_matcher: - report_response: true - - type: WordMatch - words: - - "Could not authenticate" + - type: StatusMatch + status: [401, 403] negative: true + - type: WordMatch + words: ["Could not authenticate", "is not valid"] + negative: true \ No newline at end of file diff --git a/data/rules/pagerdutyapikey.yml b/data/rules/pagerdutyapikey.yml index b0a3c9b..7bdf1f0 100644 --- a/data/rules/pagerdutyapikey.yml +++ b/data/rules/pagerdutyapikey.yml @@ -3,7 +3,6 @@ rules: id: kingfisher.pagerduty.1 pattern: | (?xi) - \b (?: pd[_-]? | pagerduty[_-]? | @@ -19,6 +18,8 @@ rules: [a-f0-9]{32} # integration / routing key (32 hex, lower case) ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/particle.io.yml b/data/rules/particle.io.yml index c4fdfaf..eb8dd6b 100644 --- a/data/rules/particle.io.yml +++ b/data/rules/particle.io.yml @@ -5,9 +5,12 @@ rules: (?xi) https://api\.particle\.io/v1/[A-Z0-9_\-\s/"\\?]* (?:access_token=|Authorization:\s*Bearer\s*) + ( + [A-Z0-9]{40} + ) \b - ([A-Z0-9]{40}) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -42,7 +45,6 @@ rules: pattern: | (?xi) (?:access_token=|Authorization:\s*Bearer\s*) - \b ([A-Z0-9]{40}) \b [\s"\\]*https://api\.particle\.io/v1 diff --git a/data/rules/pastebin.yml b/data/rules/pastebin.yml index 1ae302e..f19536c 100644 --- a/data/rules/pastebin.yml +++ b/data/rules/pastebin.yml @@ -13,6 +13,8 @@ rules: [A-Z0-9_]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/paypal.yml b/data/rules/paypal.yml index ef65d2d..47fddb1 100644 --- a/data/rules/paypal.yml +++ b/data/rules/paypal.yml @@ -1,56 +1,59 @@ rules: -- name: PayPal OAuth Client ID - id: kingfisher.paypal.1 - pattern: | - (?xi) - paypal - (?:.|[\n\r]){0,8}? - (?:CLIENT|ID|USER) - (?:.|[\n\r]){0,16}? - \b - ( - A[A-Z0-9_-]{78,99} - ) - \b - min_entropy: 3.5 - visible: false - examples: - - paypal_client_id=AZJ6y8Dpr1TYbqAIdhkPzyhjXoY6mIdhkPzyhjXoY6m8GplL7C3zZ3lPrkTIdhkPzyhjXo_Dx3IdhkPzyhjXoY6m + - name: PayPal OAuth Client ID + id: kingfisher.paypal.1 + pattern: | + (?xi) + paypal + (?:.|[\n\r]){0,8}? + (?:CLIENT|ID|USER) + (?:.|[\n\r]){0,16}? + \b + ( + A[A-Z0-9_-]{78,99} + ) + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + visible: false + examples: + - paypal_client_id=AZJ6y8Dpr1TYbqAIdhkPzyhjXoY6mIdhkPzyhjXoY6m8GplL7C3zZ3lPrkTIdhkPzyhjXo_Dx3IdhkPzyhjXoY6m -- name: PayPal OAuth Secret - id: kingfisher.paypal.2 - pattern: | - (?xi) - paypal - (?:.|[\n\r]){0,16}? - (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) - (?:.|[\n\r]){0,32}? - \b - ( - [A-Z0-9_.-]{78,120} - ) - \b - min_entropy: 3.5 - examples: - - paypal_secret=EP0uwUsACKVPcbDRaXFYerX2ij6nbsha71cSdynuQWoSt1pIy4qtIs7gJQRmHwKXu5Icv3g1YQZzAywf + - name: PayPal OAuth Secret + id: kingfisher.paypal.2 + pattern: | + (?xi) + paypal + (?:.|[\n\r]){0,16}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Z0-9_.-]{78,120} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + examples: + - paypal_secret=EP0uwUsACKVPcbDRaXFYerX2ij6nbsha71cSdynuQWoSt1pIy4qtIs7gJQRmHwKXu5Icv3g1YQZzAywf - validation: - type: Http - content: - request: - method: POST - url: https://api-m.paypal.com/v1/oauth2/token - headers: - Accept: application/json - Accept-Language: en_US - Content-Type: application/x-www-form-urlencoded - Authorization: | - Basic {{ CLIENTID | append: ':' | append: TOKEN | b64enc }} - body: grant_type=client_credentials - response_matcher: - - report_response: true - - type: StatusMatch - status: [200] - depends_on_rule: - - rule_id: kingfisher.paypal.1 - variable: CLIENTID + validation: + type: Http + content: + request: + method: POST + url: https://api-m.paypal.com/v1/oauth2/token + headers: + Accept: application/json + Accept-Language: en_US + Content-Type: application/x-www-form-urlencoded + Authorization: | + Basic {{ CLIENTID | append: ':' | append: TOKEN | b64enc }} + body: grant_type=client_credentials + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + depends_on_rule: + - rule_id: kingfisher.paypal.1 + variable: CLIENTID diff --git a/data/rules/pem.yml b/data/rules/pem.yml index 00d93c6..0c0d921 100644 --- a/data/rules/pem.yml +++ b/data/rules/pem.yml @@ -8,6 +8,8 @@ rules: ( (?: [a-zA-Z0-9+/=\s"',] | \\r | \\n ) {50,} ) \s* -----END\ .{0,20}\ ?PRIVATE\ KEY\ ?.{0,20}----- + pattern_requirements: + min_digits: 4 min_entropy: 4.5 confidence: high examples: @@ -60,6 +62,8 @@ rules: [a-zA-Z0-9+/=]{50,} ) (?: [^a-zA-Z0-9+/=] | $ ) + pattern_requirements: + min_digits: 4 min_entropy: 4.5 confidence: high examples: diff --git a/data/rules/perplexity.yml b/data/rules/perplexity.yml index 6629372..c9ba476 100644 --- a/data/rules/perplexity.yml +++ b/data/rules/perplexity.yml @@ -8,6 +8,8 @@ rules: pplx-[A-Za-z0-9]{48} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.8 confidence: medium examples: diff --git a/data/rules/planetscale.yml b/data/rules/planetscale.yml index ecbee4f..859531b 100644 --- a/data/rules/planetscale.yml +++ b/data/rules/planetscale.yml @@ -8,6 +8,8 @@ rules: pscale_tkn_[a-z0-9-_]{43} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4 examples: - pscale_tkn_abcdefghijklmnopqrstuvwxyZ1234567890_ABCDEF @@ -41,11 +43,11 @@ rules: (?:.|[\n\r]){0,16}? (?:USER|ID|NAME) (?:.|[\n\r]){0,16}? - \b ( [a-z0-9]{12} ) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 visible: false examples: diff --git a/data/rules/postman.yml b/data/rules/postman.yml index ae7ae7c..8b98b2e 100644 --- a/data/rules/postman.yml +++ b/data/rules/postman.yml @@ -8,6 +8,8 @@ rules: PMAK-[A-Z0-9]{24}-[A-Z0-9]{34} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/postmark.yml b/data/rules/postmark.yml index 6e31071..f48aa57 100644 --- a/data/rules/postmark.yml +++ b/data/rules/postmark.yml @@ -10,6 +10,8 @@ rules: [a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 examples: diff --git a/data/rules/prefect.yml b/data/rules/prefect.yml index c04c9cb..beba175 100644 --- a/data/rules/prefect.yml +++ b/data/rules/prefect.yml @@ -8,6 +8,8 @@ rules: pnu_[a-z0-9]{36} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/privkey.yml b/data/rules/privkey.yml index 7b5e551..a936514 100644 --- a/data/rules/privkey.yml +++ b/data/rules/privkey.yml @@ -22,6 +22,8 @@ rules: PRIVATE\sKEY (\sBLOCK)? ----- + pattern_requirements: + min_digits: 2 min_entropy: 4.5 confidence: high examples: @@ -73,7 +75,11 @@ rules: PRIVATE\sKEY (\sBLOCK)? ----- - ) + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 4.5 confidence: high examples: diff --git a/data/rules/psexec.yml b/data/rules/psexec.yml index 57cfd66..3cca3d3 100644 --- a/data/rules/psexec.yml +++ b/data/rules/psexec.yml @@ -8,7 +8,6 @@ rules: -p \s* (\S+) (?# password ) min_entropy: 3.3 confidence: medium - categories: [fuzzy, secret] examples: - 'cmd.exe /C PSEXEC \\10.0.94.120 -u Administrator -p dev_admin CMD /C ECHO' - 'PSEXEC.EXE \\LocalComputerIPAddress -u DOMAIN\my-user -p mypass CMD' diff --git a/data/rules/pubnub.yml b/data/rules/pubnub.yml index f29310a..f759ed7 100644 --- a/data/rules/pubnub.yml +++ b/data/rules/pubnub.yml @@ -8,6 +8,8 @@ rules: pub-c-[a-z0-9]{8}(?:-[a-z0-9]{4}){3}-[a-z0-9]{12} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - pub-c-12345678-1234-1234-1234-123456789012 @@ -34,6 +36,8 @@ rules: sub-c-[a-z0-9]{8}(?:-[a-z0-9]{4}){3}-[a-z0-9]{12} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/pulumi.yml b/data/rules/pulumi.yml index 2298d1d..81980a1 100644 --- a/data/rules/pulumi.yml +++ b/data/rules/pulumi.yml @@ -8,6 +8,8 @@ rules: pul-[a-f0-9]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 examples: - pul-18e13e3eebebeb94eac318d421ca8ecc5ca78d5f diff --git a/data/rules/pypi.yml b/data/rules/pypi.yml index 85884ed..1b441f9 100644 --- a/data/rules/pypi.yml +++ b/data/rules/pypi.yml @@ -3,11 +3,12 @@ rules: id: kingfisher.pypi.1 pattern: | (?xi) - \b ( pypi-AgEIcHlwaS5vcmc[A-Z0-9_-]{50,} ) - (?:[^A-Z0-9_-]|$) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/rabbitmq.yml b/data/rules/rabbitmq.yml index 953f10e..607d5f6 100644 --- a/data/rules/rabbitmq.yml +++ b/data/rules/rabbitmq.yml @@ -3,7 +3,6 @@ rules: id: kingfisher.rabbitmq.1 pattern: | (?xi) - \b (?: amqps? ) @@ -16,6 +15,8 @@ rules: @ [-.%\w\/:]+ \b + pattern_requirements: + min_special_chars: 1 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/react.yml b/data/rules/react.yml index c7d5a40..de22892 100644 --- a/data/rules/react.yml +++ b/data/rules/react.yml @@ -18,9 +18,6 @@ rules: - 'REACT_APP_AUTH_USERNAME=bowie' - ' REACT_APP_AUTH_USERNAME=bowie # some comment' - 'REACT_APP_MAILER_USERNAME=smtp_username # Enter your SMTP email username' - negative_examples: - - 'REACT_APP_FRONTEND_LOGIN_FORGOT_USERNAME=$REACT_APP_MATRIX_BASE_URL/classroom/#/forgot_username' - categories: [fuzzy, identifier] references: - https://create-react-app.dev/docs/adding-custom-environment-variables/ - https://stackoverflow.com/questions/48699820/how-do-i-hide-an-api-key-in-create-react-app @@ -44,10 +41,6 @@ rules: - 'REACT_APP_AUTH_PASSWORD=whiteduke' - ' REACT_APP_AUTH_PASSWORD=whiteduke # some comment' - 'REACT_APP_MAILER_PASSWORD=smtp_password # Enter your SMTP email password' - negative_examples: - - ' const password = process.env.REACT_APP_FIREBASE_DEV_PASSWORD || "not-set"' - - 'REACT_APP_FRONTEND_LOGIN_FORGOT_PASSWORD=$REACT_APP_MATRIX_BASE_URL/classroom/#/forgot_password' - categories: [fuzzy, secret] references: - https://create-react-app.dev/docs/adding-custom-environment-variables/ - https://stackoverflow.com/questions/48699820/how-do-i-hide-an-api-key-in-create-react-app \ No newline at end of file diff --git a/data/rules/readme.yml b/data/rules/readme.yml index 0e415ec..6a4b95d 100644 --- a/data/rules/readme.yml +++ b/data/rules/readme.yml @@ -2,11 +2,14 @@ rules: - name: ReadMe API Key id: kingfisher.readme.1 pattern: | - (?x)(?i) + (?xi) \b ( rdme_(?P[a-z0-9]{70}) ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/recaptcha.yml b/data/rules/recaptcha.yml index e8f62a6..0c40e91 100644 --- a/data/rules/recaptcha.yml +++ b/data/rules/recaptcha.yml @@ -9,6 +9,8 @@ rules: ( 6l[c-f][a-z0-9_-].{36} ) + pattern_requirements: + min_digits: 3 min_entropy: 3 confidence: medium examples: diff --git a/data/rules/replicate.yml b/data/rules/replicate.yml index b5f1a47..6eb6ffd 100644 --- a/data/rules/replicate.yml +++ b/data/rules/replicate.yml @@ -2,13 +2,15 @@ rules: - name: Replicate API Token id: kingfisher.replicate.1 pattern: | - (?x) - \b + (?x) + \b ( r8_ [A-Za-z0-9]{37} ) \b + pattern_requirements: + min_digits: 3 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/rubygems.yml b/data/rules/rubygems.yml index 93ace54..bb9487c 100644 --- a/data/rules/rubygems.yml +++ b/data/rules/rubygems.yml @@ -8,8 +8,10 @@ rules: ( rubygems_ [a-z0-9]{42,52} - ) - \b + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium categories: [api, secret] diff --git a/data/rules/runway.yml b/data/rules/runway.yml index 9c816b2..8e899f7 100644 --- a/data/rules/runway.yml +++ b/data/rules/runway.yml @@ -2,13 +2,15 @@ rules: - name: Runway API Key id: kingfisher.runway.1 pattern: | - (?x) + (?x) \b ( key_ [A-Fa-f0-9]{128} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 confidence: medium examples: diff --git a/data/rules/salesforce.yml b/data/rules/salesforce.yml index d532346..be3b544 100644 --- a/data/rules/salesforce.yml +++ b/data/rules/salesforce.yml @@ -2,7 +2,7 @@ rules: - name: Salesforce Access / Refresh Token id: kingfisher.salesforce.1 pattern: | - (?xi) + (?xi) \b ( 00 @@ -10,6 +10,8 @@ rules: ! [A-Z0-9._-]{90,120} ) + pattern_requirements: + min_digits: 6 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/sauce.yml b/data/rules/sauce.yml index 6c8be24..009f33a 100644 --- a/data/rules/sauce.yml +++ b/data/rules/sauce.yml @@ -25,7 +25,6 @@ rules: id: kingfisher.saucelabs.2 pattern: | (?xi) - \b ( (?:api|ondemand)\.(?:us|eu)-(?:west|east|central)-[0-9]\.saucelabs\.com ) @@ -47,11 +46,12 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12} ) \b + pattern_requirements: + min_digits: 4 confidence: medium min_entropy: 3.0 depends_on_rule: diff --git a/data/rules/scale.yml b/data/rules/scale.yml index b0d9b90..81148f2 100644 --- a/data/rules/scale.yml +++ b/data/rules/scale.yml @@ -4,11 +4,13 @@ rules: pattern: | (?x) \b - ( # capture => TOKEN - live_ # live-mode prefix per docs - [0-9a-f]{32} # 32 lowercase hex chars + ( + live_ + [0-9a-f]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.1 confidence: medium examples: @@ -50,6 +52,8 @@ rules: [0-9a-f]{32} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.1 confidence: medium examples: diff --git a/data/rules/scalingo.yml b/data/rules/scalingo.yml index ea39b5a..15351c4 100644 --- a/data/rules/scalingo.yml +++ b/data/rules/scalingo.yml @@ -7,7 +7,8 @@ rules: ( tk-us-[\w-]{48} ) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/segment.yml b/data/rules/segment.yml index 1a2518f..234955d 100644 --- a/data/rules/segment.yml +++ b/data/rules/segment.yml @@ -8,6 +8,8 @@ rules: sgp_[A-Z0-9_-]{60,70} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -33,7 +35,6 @@ rules: id: kingfisher.segment.2 pattern: | (?xi) - \b (?:segment|sgmt) (?:.|[\n\r]){0,16}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) @@ -42,7 +43,8 @@ rules: ( [A-Z0-9_-]{40,50}\.[A-Z0-9_-]{40,50} ) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/sendbird.yml b/data/rules/sendbird.yml index 9c36a46..d82ec94 100644 --- a/data/rules/sendbird.yml +++ b/data/rules/sendbird.yml @@ -12,6 +12,8 @@ rules: [0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12} ) \b + pattern_requirements: + min_digits: 2 confidence: medium visible: false min_entropy: 3.0 @@ -31,6 +33,8 @@ rules: [a-f0-9]{40} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 examples: diff --git a/data/rules/sendgrid.yml b/data/rules/sendgrid.yml index 89145fc..96c5bff 100644 --- a/data/rules/sendgrid.yml +++ b/data/rules/sendgrid.yml @@ -11,7 +11,8 @@ rules: \. [0-9A-Z_-]{39,47} ) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/sendinblue.yml b/data/rules/sendinblue.yml index 9b53d07..f57a613 100644 --- a/data/rules/sendinblue.yml +++ b/data/rules/sendinblue.yml @@ -8,6 +8,8 @@ rules: xkeysib-[a-f0-9]{64}-[a-z0-9]{16} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.2 confidence: medium examples: diff --git a/data/rules/sentry.yml b/data/rules/sentry.yml index 8eebda8..a53b647 100644 --- a/data/rules/sentry.yml +++ b/data/rules/sentry.yml @@ -6,16 +6,20 @@ rules: \b sentry (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? \b ( [a-f0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: - - SENTRY_TOKEN=abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd - - '"sentry": "1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef"' + - SENTRY_TOKEN=cbadefcbadefcbadefcbadefcbadefcbadefcbadefcbadefcbadefcbadefcbad + - '"sentry-key": "3214567890cbadef3214567890cbadef3214567890cbadef3214567890cbadef"' references: - https://docs.sentry.io/api/auth/ validation: @@ -37,15 +41,17 @@ rules: id: kingfisher.sentry.2 pattern: | (?xi) - \b ( sntrys_eyJpYXQiO[a-zA-Z0-9+/]{10,200}(?:LCJyZWdpb25fdXJs|InJlZ2lvbl91cmwi|cmVnaW9uX3VybCI6)[a-zA-Z0-9+/]{10,200}={0,2}_[a-zA-Z0-9+/]{43} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 4.2 confidence: medium examples: - - sntrys_eyJpYXQiOjE2OTA4ODAwMDAsInJlZ2lvbl91cmwiOiJodHRwczovL3NlbnRyeS5pby9vcmdzL215LW9yZy8ifQ==_abcdefghijklmnopqrstuvwx1234567890abcdefabc - - sntrys_eyJpYXQiOiIxNjkwODgwMDAwIiwicmVnaW9uX3VybCI6Imh0dHBzOi8vc2VudHJ5LmlvLyJ9_abcdABCD1234567890abcdABCD1234567890abcdABCD + - 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 + - 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 references: - https://docs.sentry.io/api/auth/ validation: @@ -72,11 +78,13 @@ rules: sntryu_[a-f0-9]{64} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: - - sntryu_abcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcdefabcd - - SNTRY_USER="sntryu_1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + - 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 + - SNTRY_USER="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" references: - https://docs.sentry.io/api/auth/ validation: diff --git a/data/rules/shippo.yml b/data/rules/shippo.yml index cd769e2..e1bae07 100644 --- a/data/rules/shippo.yml +++ b/data/rules/shippo.yml @@ -8,6 +8,8 @@ rules: shippo_(?:live|test)_[a-f0-9]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/shodan.yml b/data/rules/shodan.yml index 99ab150..76f946c 100644 --- a/data/rules/shodan.yml +++ b/data/rules/shodan.yml @@ -13,6 +13,8 @@ rules: [A-Z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 validation: diff --git a/data/rules/shopify.yml b/data/rules/shopify.yml index da340e5..bb92ea4 100644 --- a/data/rules/shopify.yml +++ b/data/rules/shopify.yml @@ -8,6 +8,8 @@ rules: (?:shpat|shpca|shppa|shpss)_[a-f0-9]{30,34} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - 'shopify_app_secret: "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"' diff --git a/data/rules/slack.yml b/data/rules/slack.yml index 9544d10..0e61776 100644 --- a/data/rules/slack.yml +++ b/data/rules/slack.yml @@ -15,6 +15,8 @@ rules: [0-9a-z]{10,66} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - xapp-1-A05V64V7F2B-5062360157732-9f01726eebe77df2c096a65e95acdd02107b2c1e92ca341cff27ca271b7251b4 @@ -55,7 +57,10 @@ rules: ( xoxe-\d- [A-Z0-9]{140,150} - )\b + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - xoxb-853BAAEE-1B2eDb6A4c75-01bB6Da1CE3E98f6fED5AeC07Dc3E94C @@ -89,7 +94,8 @@ rules: B[a-z0-9_-]{8,12}/ [a-z0-9_-]{20,30} ) - \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 examples: - https://hooks.slack.com/services/TY40v9sZ9/BxIqhIXIi/NGUyXK6nK7HMAqd0ASzXluoV diff --git a/data/rules/snyk.yml b/data/rules/snyk.yml index e6237e3..3ba783f 100644 --- a/data/rules/snyk.yml +++ b/data/rules/snyk.yml @@ -8,10 +8,12 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [A-Z0-9]{8}-(?:[A-Z0-9]{4}-){3}[A-Z0-9]{12} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - snyk_token = 123e4567-e89b-12d3-a456-426614174000 diff --git a/data/rules/sonarcloud.yml b/data/rules/sonarcloud.yml index f0b874d..c3307e9 100644 --- a/data/rules/sonarcloud.yml +++ b/data/rules/sonarcloud.yml @@ -13,6 +13,8 @@ rules: [0-9a-z]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 2.5 examples: - sonar_api_token=abcdef0123456789abcdef0123456789abcdef23 diff --git a/data/rules/sonarqube.yml b/data/rules/sonarqube.yml index ceddbee..63be8fe 100644 --- a/data/rules/sonarqube.yml +++ b/data/rules/sonarqube.yml @@ -8,6 +8,9 @@ rules: (?:sq[pua]) _[a-z0-9]{40} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 examples: - sonar.login=sqp_4b78f8494075e310d62dfdcaeb14be2c78fca2fc @@ -35,11 +38,9 @@ rules: (?xi) sonar.{0,8}host (?:.|[\n\r]){0,64}? - \b ( https?://.*?:\d{2,6} ) - \b min_entropy: 3.5 visible: false examples: diff --git a/data/rules/sourcegraph.yml b/data/rules/sourcegraph.yml index 3aed539..cbda95d 100644 --- a/data/rules/sourcegraph.yml +++ b/data/rules/sourcegraph.yml @@ -4,8 +4,12 @@ rules: pattern: | (?xi) \b - sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40} + ( + sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40} + ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 examples: - sgp_210f1131b08e93adcfc3f05faa2d768ff883a61f @@ -41,6 +45,8 @@ rules: (?:sgp_(?:[a-f0-9]{16}_local_)?[a-f0-9]{40}|[a-f0-9]{40}) ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: diff --git a/data/rules/square.yml b/data/rules/square.yml index d3fd851..24a635a 100644 --- a/data/rules/square.yml +++ b/data/rules/square.yml @@ -10,10 +10,13 @@ rules: ( EAAA[a-z0-9\-\+=]{60} ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: - - square EAAA7h9fL9zQJR8P0eAioAf9239345rDA2349bQ8edUA9FgA5JojdsF3A9f6nKLmn + - square EAAA7h9fL9zQJR8P0eAioAf9239345rDA2349bQ8edUA9FgA5JojdsF3A9f6nKLm - square EAAAvlYh9H7dZwC9ash2hrHjtlL5D2srERGK5OM6F2nvle23he3NzA60PAeFXNHj validation: type: Http @@ -33,7 +36,15 @@ rules: - name: Square Access Token id: kingfisher.square.2 - pattern: '(?i)\b(sq0atp-[a-z0-9_-]{22})\b' + pattern: | + (?xi) + \b + ( + sq0atp-[a-z0-9_-]{22} + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -57,7 +68,15 @@ rules: - name: Square OAuth Secret id: kingfisher.square.3 - pattern: '(?i)\b(sq0csp-[a-z0-9_-]{43})\b' + pattern: | + (?xi) + \b + ( + sq0csp-[a-z0-9_-]{43} + ) + \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/stabilityai.yml b/data/rules/stabilityai.yml index 1ac8fd1..8848ce2 100644 --- a/data/rules/stabilityai.yml +++ b/data/rules/stabilityai.yml @@ -2,13 +2,15 @@ rules: - name: Stability AI API Key id: kingfisher.stabilityai.1 pattern: | - (?x) + (?x) \b ( sk- [A-Za-z0-9]{48} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 confidence: medium examples: diff --git a/data/rules/stackhawk.yml b/data/rules/stackhawk.yml index b8fec69..d001e40 100644 --- a/data/rules/stackhawk.yml +++ b/data/rules/stackhawk.yml @@ -8,6 +8,8 @@ rules: hawk\.[0-9A-Z_-]{20}\.[0-9A-Z_-]{20} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 examples: diff --git a/data/rules/stripe.yml b/data/rules/stripe.yml index faaa1ab..9d48341 100644 --- a/data/rules/stripe.yml +++ b/data/rules/stripe.yml @@ -11,7 +11,11 @@ rules: ( pk_live_ (?:[0-9A-Z]{6}){4,30} - ) + ) + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.3 confidence: medium categories: [api, key] @@ -33,6 +37,8 @@ rules: )_live_ (?:[0-9A-Z]{8}){3,25} ) + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/supabase.yml b/data/rules/supabase.yml index d297514..b2d7c32 100644 --- a/data/rules/supabase.yml +++ b/data/rules/supabase.yml @@ -8,6 +8,8 @@ rules: sbp_[a-z0-9_-]{40} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.5 confidence: medium examples: @@ -37,8 +39,10 @@ rules: sb_secret_[a-z0-9_-]{31} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 4.0 - confidence: high + confidence: medium validation: type: Http content: @@ -67,9 +71,11 @@ rules: id: kingfisher.supabase.3 pattern: (?xi) + \b ( https:\/\/[a-z0-9]{16,32}\.supabase\.co ) + \b confidence: medium min_entropy: 3.0 visible: false diff --git a/data/rules/tailscale.yml b/data/rules/tailscale.yml index 3544b1f..08fa9eb 100644 --- a/data/rules/tailscale.yml +++ b/data/rules/tailscale.yml @@ -8,6 +8,8 @@ rules: tskey-[a-z]{3,10}-[A-Z0-9_-]{20,36} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: diff --git a/data/rules/tavily.yml b/data/rules/tavily.yml index a908a21..1775eb4 100644 --- a/data/rules/tavily.yml +++ b/data/rules/tavily.yml @@ -9,6 +9,8 @@ rules: tvly-[a-zA-Z0-9]{32} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 references: diff --git a/data/rules/teamcity.yml b/data/rules/teamcity.yml index c3bf68b..b475c32 100644 --- a/data/rules/teamcity.yml +++ b/data/rules/teamcity.yml @@ -12,6 +12,8 @@ rules: \. [A-Za-z0-9_-]{48} ) + pattern_requirements: + min_digits: 2 examples: - '' references: diff --git a/data/rules/telegram.yml b/data/rules/telegram.yml index 9b6dc98..3f628a6 100644 --- a/data/rules/telegram.yml +++ b/data/rules/telegram.yml @@ -4,12 +4,16 @@ rules: pattern: | (?xi) \b + (?:telegram|tgram:) + (?:.|[\n\r]){0,32}? ( - [0-9]{8,10} + [0-9]{7,10} : [A-Z0-9_-]{35} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 validation: @@ -25,6 +29,6 @@ rules: words: - '"ok":true' examples: - - "110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsawd" - - "508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0" - - "3628091811:BAG9RuJiqgOGIfFbOPBpAo6QhIJoD9mCdDs" + - "tgram://110201543:AAHdqTcvCH1vGWJxfSeofSAs0K5PALDsawd" + - "telegram: 508627689:AAEuLPKs-EhrjrYGnz60bnYNZqakf6HJxc0" + - "telegram token is 3628091811:BAG9RuJiqgOGIfFbOPBpAo6QhIJoD9mCdDs" \ No newline at end of file diff --git a/data/rules/thingsboard.yml b/data/rules/thingsboard.yml index 562ee25..ce5ed78 100644 --- a/data/rules/thingsboard.yml +++ b/data/rules/thingsboard.yml @@ -3,8 +3,14 @@ rules: id: kingfisher.thingsboard.1 pattern: | (?x) + \b thingsboard\.cloud/api/v1/ - ([a-z0-9]{20}) + ( + [a-z0-9]{20} + ) + \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 examples: @@ -20,8 +26,10 @@ rules: pattern: | (?x) "provisionDeviceKey"\s*:\s*" - ([a-z0-9]{20}) - " + ( + [a-z0-9]{20} + ) + \b confidence: medium min_entropy: 3.0 examples: @@ -34,8 +42,10 @@ rules: pattern: | (?x) "provisionDeviceSecret"\s*:\s*" - ([a-z0-9]{20}) - " + ( + [a-z0-9]{20} + ) + \b confidence: medium min_entropy: 3.0 examples: diff --git a/data/rules/togetherai.yml b/data/rules/togetherai.yml index ee43097..e3d9b04 100644 --- a/data/rules/togetherai.yml +++ b/data/rules/togetherai.yml @@ -7,6 +7,9 @@ rules: ( tgp_v1_[A-Z0-9_-]{43} ) + \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.0 examples: diff --git a/data/rules/travisci.yml b/data/rules/travisci.yml index 73e75c4..1d4b188 100644 --- a/data/rules/travisci.yml +++ b/data/rules/travisci.yml @@ -13,6 +13,8 @@ rules: [A-Z-_0-9]{22} ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.0 confidence: medium examples: @@ -35,10 +37,8 @@ rules: id: kingfisher.travisci.2 pattern: | (?xis) - \b (?:language|env|deploy|script):[\r\n] (?:.|[\\n\r]){0,256}? - \b ( secure:\s*"?[A-Za-z0-9+/=\\]+"?\s* ) diff --git a/data/rules/truenas.yml b/data/rules/truenas.yml index a0a6d96..57ffcff 100644 --- a/data/rules/truenas.yml +++ b/data/rules/truenas.yml @@ -6,6 +6,8 @@ rules: "params"\s*:\s*\[\s*" (\d+-[a-zA-Z0-9]{64}) "\s*\] + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: @@ -36,6 +38,8 @@ rules: Bearer\s* (\d+-[a-zA-Z0-9]{64}) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/twilio.yml b/data/rules/twilio.yml index b3b2d32..14c5b0e 100644 --- a/data/rules/twilio.yml +++ b/data/rules/twilio.yml @@ -8,6 +8,9 @@ rules: (?:SK|AC)[A-F0-9]{32} ) \b + pattern_requirements: + min_digits: 2 + visible: false min_entropy: 3.5 examples: - | @@ -31,7 +34,10 @@ rules: ( [a-z0-9]{32} ) - \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.0 examples: - Twilio_key=Cd2Bd1dE1201aE2DFFEcfeBafCc3c31D diff --git a/data/rules/twitch.yml b/data/rules/twitch.yml index c17c225..d9ecc55 100644 --- a/data/rules/twitch.yml +++ b/data/rules/twitch.yml @@ -3,18 +3,22 @@ rules: id: kingfisher.twitch.1 pattern: | (?xi) - \b twitch (?:.|[\n\r]){0,32}? + \b ( [a-z0-9]{30} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 3.5 confidence: medium examples: - TWITCH_TOKEN=abcdefghijklmnopqrstuvwx123456 - - "twitch_api_token: '0123456789abcdefghijklmnopqrstuv'" + - "twitch_api_token: '0123456789abcdefghijklmnopqrst'" references: - https://dev.twitch.tv/docs/authentication/validate-tokens/ validation: diff --git a/data/rules/twitter.yml b/data/rules/twitter.yml index e75b48e..941d48d 100644 --- a/data/rules/twitter.yml +++ b/data/rules/twitter.yml @@ -13,6 +13,10 @@ rules: A{10,}[A-Za-z0-9_\-]{40,200} ) \b + pattern_requirements: + min_digits: 2 + min_uppercase: 1 + min_lowercase: 1 min_entropy: 4.0 confidence: medium examples: @@ -45,11 +49,9 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [A-Z0-9]{25} ) - \b min_entropy: 3.5 visible: false examples: @@ -63,11 +65,9 @@ rules: (?:.|[\n\r]){0,32}? (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) (?:.|[\n\r]){0,32}? - \b ( [A-Z0-9]{50} ) - \b min_entropy: 4.5 examples: - "TWITTER_SECRET=ZGwXeK2DNCqv49Z9ofwYdqlBgeoHDyh8uoAgHju6OeYC7wTQJq" diff --git a/data/rules/typeform.yml b/data/rules/typeform.yml index a91578d..cfbc3ee 100644 --- a/data/rules/typeform.yml +++ b/data/rules/typeform.yml @@ -9,6 +9,8 @@ rules: ( tfp_[a-z0-9_\-=\.]{59} ) + pattern_requirements: + min_digits: 2 min_entropy: 4.0 confidence: medium examples: diff --git a/data/rules/uri.yml b/data/rules/uri.yml index e5542be..04c8057 100644 --- a/data/rules/uri.yml +++ b/data/rules/uri.yml @@ -3,17 +3,19 @@ rules: id: kingfisher.uri.1 pattern: | (?xi) - (https?):// - [a-z][a-z0-9+\-.]* - : - [a-z0-9\-._~%!$&'()*,;=]+ - @ - (?:[a-z0-9\-._~%]+|\[[a-f0-9:.]+\]|\[v[a-f0-9][a-z0-9\-._~%!$&'()*,;=:]+\]) - (:?[0-9]+)? - (?:/[a-z0-9\-._~%!$&'()*,;=:@]*)* # Match path - /? - (?:\?[a-z0-9\-._~%!$&'()*,;=:@/?]*)? - (?:\#[a-z0-9\-._~%!$&'()*,;=:@/?]*)? + ( + (?:https?):// + [A-Za-z](?:[A-Za-z0-9+\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})* + : + (?:[A-Za-z0-9\-._~!$&'()*+,;=]|%[0-9A-Fa-f]{2})+ + @ + (?:[A-Za-z0-9\-._~%]+|\[[A-Fa-f0-9:.]+\]|\[v[A-Fa-f0-9][A-Za-z0-9\-._~%!$&'()*,;=:]+\]) + (:?[0-9]+)? + (?:/[A-Za-z0-9\-._~%!$&'()*,;=:@%]*)* + /? + (?:\?[A-Za-z0-9\-._~%!$&'()*,;=:@/?%]*)? + (?:\#[A-Za-z0-9\-._~%!$&'()*,;=:@/?%]*)? + ) min_entropy: 4.0 confidence: medium examples: diff --git a/data/rules/vercel.yml b/data/rules/vercel.yml index 121b5fe..3ace650 100644 --- a/data/rules/vercel.yml +++ b/data/rules/vercel.yml @@ -11,6 +11,10 @@ rules: [A-Z0-9]{24} ) \b + pattern_requirements: + min_digits: 6 + min_uppercase: 1 + min_lowercase: 1 confidence: medium min_entropy: 3.5 validation: diff --git a/data/rules/vmware.yml b/data/rules/vmware.yml index 9332a37..c623ebf 100644 --- a/data/rules/vmware.yml +++ b/data/rules/vmware.yml @@ -8,7 +8,6 @@ rules: -User \s+ (\S{3,30}) \s+ (?# username ) .{0,50} -Password \s+ (\S{3,30}) (?# password ) - examples: - 'Connect-VIServer -Server 192.168.1.51 -User administrator@vSphere.local -Password VMware1!' - | diff --git a/data/rules/weightsandbiases.yml b/data/rules/weightsandbiases.yml index bed9ca5..88bb37b 100644 --- a/data/rules/weightsandbiases.yml +++ b/data/rules/weightsandbiases.yml @@ -3,13 +3,15 @@ rules: id: kingfisher.wandb.1 pattern: | (?xi) - \b (?:wandb|weightsandbiases) (?:.|[\n\r]){0,16}? + \b ( [a-f0-9]{40} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 3.5 examples: diff --git a/data/rules/wireguard.yml b/data/rules/wireguard.yml index 70aa1ae..47223c1 100644 --- a/data/rules/wireguard.yml +++ b/data/rules/wireguard.yml @@ -2,6 +2,8 @@ rules: - name: WireGuard Private Key id: kingfisher.wireguard.1 pattern: PrivateKey\s*=\s*([A-Za-z0-9+/]{43}=) + pattern_requirements: + min_digits: 2 min_entropy: 3.3 confidence: medium examples: diff --git a/data/rules/xAI.yml b/data/rules/xAI.yml index 398ac03..67a04ae 100644 --- a/data/rules/xAI.yml +++ b/data/rules/xAI.yml @@ -6,8 +6,10 @@ rules: \b ( xai-[A-Za-z0-9_-]{70,120} - ) + ) \b + pattern_requirements: + min_digits: 2 min_entropy: 3.8 confidence: medium examples: diff --git a/data/rules/zhipu.yml b/data/rules/zhipu.yml index bc1bffd..3d7ddab 100644 --- a/data/rules/zhipu.yml +++ b/data/rules/zhipu.yml @@ -10,6 +10,8 @@ rules: [A-Z0-9]{16} ) \b + pattern_requirements: + min_digits: 2 confidence: medium min_entropy: 4.0 examples: diff --git a/data/rules/zuplo.yml b/data/rules/zuplo.yml new file mode 100644 index 0000000..bbfdb7b --- /dev/null +++ b/data/rules/zuplo.yml @@ -0,0 +1,36 @@ +rules: + - name: Zuplo API Key + id: kingfisher.zuplo.1 + pattern: | + (?xi) + \b + ( + zpka_(?P[a-z0-9]{32})_(?P[0-9a-f]{8}) + ) + pattern_requirements: + checksum: + actual: + template: "{{ CHECKSUM | downcase }}" + requires_capture: checksum + expected: "{{ BODY | crc32_hex }}" + min_entropy: 3.3 + confidence: medium + examples: + - zpka_3e6c4f7d39954ca29353b7ab88589b64_de26cd55 + - zpka_b3f94d8d3d4d4a6ea5c5b20d0a5bb407_18eb262b + references: + - https://zuplo.com/blog/api-key-authentication + validation: + type: Http + content: + request: + headers: + authorization: "Bearer {{ TOKEN }}" + x-api-key: "{{ TOKEN }}" + method: GET + response_matcher: + - report_response: true + - status: + - 200 + type: StatusMatch + url: https://dev.zuplo.com/v1/who-am-i \ No newline at end of file diff --git a/docs/RULES.md b/docs/RULES.md index 5a2fbfc..4a3dd8d 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -38,6 +38,15 @@ rules: - rule_id: kingfisher.aws.id variable: AKID # referenced as {{ AKID }} + pattern_requirements: # (optional) character/word requirements + min_digits: 1 # require at least 1 digit + min_uppercase: 1 # require at least 1 uppercase letter + min_lowercase: 1 # require at least 1 lowercase letter + min_special_chars: 1 # require at least 1 special character + special_chars: "!@#$%^&*()" # optional: custom special character set + ignore_if_contains: # optional: drop matches containing these words + - test + validation: # (optional) live validation type: Http content: @@ -61,17 +70,18 @@ rules: - type: JsonValid ``` -| Field | What it does | -| ----------------- | -------------------------------------------------------------------- | -| name | Friendly name shown in reports | -| id | Unique text ID (namespace.v#) used internally | -| pattern | Regex used to spot secrets (free‑spacing & flags allowed) | -| min_entropy | Threshold to guard against low‑complexity false positives | -| confidence | Suggests severity: low β†’ high | -| examples | Good matches; used for testing | -| visible | false to hide non‑secret captures (e.g. IDs) | -| depends_on_rule | Chain rules: use captures from one rule in another’s validation | -| validation | Configure HTTP, AWS, GCP, etc. checks to verify live validity | +| Field | What it does | +| ----------------------- | -------------------------------------------------------------------- | +| name | Friendly name shown in reports | +| id | Unique text ID (namespace.v#) used internally | +| pattern | Regex used to spot secrets (free‑spacing & flags allowed) | +| min_entropy | Threshold to guard against low‑complexity false positives | +| confidence | Suggests severity: low β†’ high | +| examples | Good matches; used for testing | +| visible | false to hide non‑secret captures (e.g. IDs) | +| depends_on_rule | Chain rules: use captures from one rule in another's validation | +| pattern_requirements | Require character types and/or exclude placeholder words from matches | +| validation | Configure HTTP, AWS, GCP, etc. checks to verify live validity | *responser_matcher* variants. Multiple can be used @@ -107,12 +117,19 @@ Below is the complete list of Liquid filters available in Kingfisher, along with | --------------------- | -------------------------------------------- | -------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | | `b64enc` | – | Base64-encodes the input using the standard alphabet. | `{{ TOKEN \| b64enc }}` | | `b64url_enc` | – | URL-safe Base64 (no padding). Useful for JWT headers & payloads. | `{{ TOKEN \| b64url_enc }}` | -| `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | +| `b64dec` | – | Decodes a Base64 string. | `{{ "aGVsbG8=" \| b64dec }}` | | `sha256` | – | Computes the SHA-256 hex digest of the input. | `{{ TOKEN \| sha256 }}` | +| `crc32` | – | Computes the CRC32 checksum of the input and returns a decimal value. | `{{ TOKEN \| crc32 }}` | +| `crc32_dec` | `digits` (integer, optional) | Computes the CRC32 checksum and returns the last `digits` decimal characters (zero-padded). Defaults to the full value when omitted. | `{{ TOKEN \| crc32_dec: 6 }}` | +| `crc32_hex` | `digits` (integer, optional) | Computes the CRC32 checksum and returns the last `digits` hexadecimal characters (zero-padded). Defaults to the full value when omitted. | `{{ TOKEN \| crc32_hex: 8 }}` | +| `crc32_le_b64` | `len` (integer, optional) | Computes the CRC32 checksum, encodes the little-endian bytes using Base64, and optionally truncates to the first `len` characters. | `{{ TOKEN \| crc32_le_b64: 6 }}` | | `hmac_sha1` | `key` (string) | Computes HMAC-SHA1 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha1: "secret-key" }}` | | `hmac_sha256` | `key` (string) | Computes HMAC-SHA256 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha256: "secret-key" }}` | | `hmac_sha384` | `key` (string) | Computes HMAC-SHA384 over the input, returns Base64-encoded result. | `{{ TOKEN \| hmac_sha384: "secret-key" }}` | | `random_string` | `len` (integer, optional) | Generates a cryptographically-secure random alphanumeric string of the specified length (default: 32). | `{{ "" \| random_string: 16 }}` | +| `prefix` | `len` (integer, optional) | Returns the first `len` characters from the string (default: full). | `{{ TOKEN \| prefix: 6 }}` | +| `suffix` | `len` (integer, optional) | Returns the last `len` characters from the string (default: full). | `{{ TOKEN \| suffix: 6 }}` | +| `base62` | `width` (integer, optional) | Encodes the input number as Base62, left-padding with zeros as needed. | `{{ TOKEN \| crc32 \| base62: 6 }}` | | `url_encode` | – | Percent-encodes the input according to RFC 3986. | `{{ TOKEN \| url_encode }}` | | `json_escape` | – | Escapes special characters so a string can be safely injected into JSON contexts. | `{{ TOKEN \| json_escape }}` | | `unix_timestamp` | – | Returns the current Unix epoch time in seconds (UTC). | `{{ "" \| unix_timestamp }}` | @@ -237,6 +254,138 @@ For example, a rule might match a username, an email address, an AWS Access Key `visible: false` helps keep the scan output focused on actual secrets while still capturing important contextual data needed for comprehensive validation. +## Character Requirements + +The `pattern_requirements` field allows you to specify data type requirements for matched secrets. This is particularly useful when: + +- Your regex pattern must be permissive (due to Hyperscan limitations) +- You want to enforce password complexity requirements +- You need to filter out low-quality matches that lack certain character types + +Kingfisher's regex engine (Hyperscan) does not support lookahead assertions like `(?=.*\d)` to require specific character types. Instead, use the `pattern_requirements` field to filter matches post-detection. + +### Available Requirements + +```yaml +pattern_requirements: + min_digits: 1 # Require at least 1 digit (0-9) + min_uppercase: 1 # Require at least 1 uppercase letter (A-Z) + min_lowercase: 1 # Require at least 1 lowercase letter (a-z) + min_special_chars: 1 # Require at least 1 special character + special_chars: "!@#$%^&*" # Optional: define which characters are "special" + ignore_if_contains: # Optional: reject matches containing any of these (case-insensitive) + - test + - demo + checksum: # Optional: compare rendered values to drop mismatched formats + actual: + template: "{{ MATCH | suffix: 6 }}" # Liquid template for the observed checksum + requires_capture: checksum # (optional) skip unless this capture is present + expected: "{{ BODY | crc32 | base62: 6 }}" # Liquid template to render the expected checksum + skip_if_missing: true # (optional) treat missing captures as legacy tokens +``` + +All fields are optional. If `special_chars` is not specified, the default set includes: `!@#$%^&*()_+-=[]{}|;:'",.<>?/\`~` + +`ignore_if_contains` performs a case-insensitive substring check. If any entry (after trimming whitespace) appears within the match, the match is discarded. This is helpful for dropping known dummy tokens such as "test" or "demo" that otherwise satisfy the regex. + +The optional `checksum` block renders Liquid templates against the match to determine whether the captured checksum matches your expectation. Both templates gain access to `{{ MATCH }}`, `{{ FULL_MATCH }}`, and every named capture in two forms: the original capture name and its uppercase alias (e.g. `{{ body }}` and `{{ BODY }}`). Use helper filters like `suffix`, `crc32`, and `base62` to mirror provider-specific checksum pipelines. If a required capture is missing or the rendered values differ, Kingfisher skips the findingβ€”logging the reason, including checksum lengths, at the `DEBUG` level. Set `skip_if_missing` to `true` to treat absent captures as legacy matches. + +When any of these filters remove a match it is logged at the `DEBUG` level so you can see exactly why the skip occurred. If you need to keep every match even when one of these substrings appears, pass `--no-ignore-if-contains` to `kingfisher scan`. The flag disables this post-processing step without changing the rule definitions. + +### Are `requires_capture` and `skip_if_missing` equivalent? + +`requires_capture` + - Optional field that names a specific regex capture that must be present before the checksum templates are evaluated. + - In the engine, Kingfisher checks whether that capture exists in the match context. If it’s missing, the behavior falls back to whatever `skip_if_missing` dictates (fail or treat as a legacy match). + +`skip_if_missing` + - Boolean switch that controls what happens when Kingfisher can’t render the checksumβ€”because there’s no match context or a required capture is absent. + - `true`: silently skip (pass) the match so legacy, non-checksum tokens are still accepted. + - `false`: treat the situation as a validation failure. + +In short, `requires_capture` identifies which capture must exist, while `skip_if_missing` determines whether missing data is a hard failure or an allowed legacy case. + +### Example: Secure API Key + +```yaml +rules: + - name: Secure API Key + id: custom.secure_api.1 + pattern: | + (?xi) + api[_-]?key + (?:.|[\n\r]){0,32}? + \b + ([A-Za-z0-9!@#$%^&*]{20,}) + \b + min_entropy: 4.0 + confidence: high + pattern_requirements: + min_digits: 1 # Must contain at least 1 digit + min_uppercase: 1 # Must contain at least 1 uppercase letter + min_lowercase: 1 # Must contain at least 1 lowercase letter + min_special_chars: 1 # Must contain at least 1 special character + ignore_if_contains: + - test + examples: + - api_key = "MyS3cur3K3y!2024" + - 'api-key: "Abc123!@#Token"' +``` + +In this example: +- The regex pattern is permissive: `[A-Za-z0-9!@#$%^&*]{20,}` matches any combination of those characters +- The `pattern_requirements` filters out matches that don't have at least one of each required type +- A match like `"abcdefghijklmnopqrst"` would be rejected (no uppercase, no digit, no special) +- A match like `"Abc123!SecureToken"` would be accepted (has all required types) +- A match like `"Test123!SecureToken"` would be rejected because it contains the `ignore_if_contains` term `test` + +### Example: Excluding Dummy Values + +```yaml +rules: + - name: Token without placeholders + id: custom.token.2 + pattern: |- + (?i)token[:=]\s*([A-Za-z0-9]{12,}) + pattern_requirements: + ignore_if_contains: + - placeholder + - sample + examples: + - token: "REALVALUE1234" + negative_examples: + - token = "SAMPLETOKEN9999" # dropped by ignore_if_contains +``` + +### Example: Custom Special Characters + +```yaml +rules: + - name: Token with Custom Special Chars + id: custom.token.1 + pattern: | + (?xi) + token + (?:.|[\n\r]){0,16}? + \b([A-Za-z0-9$%^]{16,})\b + min_entropy: 3.5 + confidence: medium + pattern_requirements: + min_special_chars: 2 + special_chars: "$%^" # Only these characters count as "special" + examples: + - token = "abc$%defgh123456" +``` + +### How It Works + +1. Hyperscan regex matches a pattern in the input +2. Entropy check filters low-complexity matches (if `min_entropy` is set) +3. **Character requirements check filters matches that don't meet the criteria** +4. Validation checks verify the secret is live (if `validation` is configured) + +Matches that fail the character requirements check are silently dropped with a debug log message. + ## Writing Custom Rules diff --git a/src/baseline.rs b/src/baseline.rs index 6f3dab5..68f5d93 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -147,6 +147,7 @@ mod tests { references: vec![], validation: None, depends_on_rule: vec![], + pattern_requirements: None, })) } diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 8deb595..f30d028 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -144,6 +144,10 @@ pub struct ScanArgs { /// Disable inline ignore directives entirely #[arg(long = "no-ignore", default_value_t = false)] pub no_inline_ignore: bool, + + /// Disable rule-level `ignore_if_contains` filtering for pattern requirements + #[arg(long = "no-ignore-if-contains", default_value_t = false)] + pub no_ignore_if_contains: bool, } /// Confidence levels for findings @@ -416,6 +420,10 @@ impl ScanCommandArgs { self.scan_args.input_specifier_args.emit_deprecated_warnings(); } + if self.scan_args.manage_baseline { + self.scan_args.no_dedup = true; + } + Ok(ScanOperation::Scan(self.scan_args)) } } diff --git a/src/decompress.rs b/src/decompress.rs index 46ae5d5..bef07c6 100644 --- a/src/decompress.rs +++ b/src/decompress.rs @@ -340,7 +340,7 @@ mod tests { fn smoke_decompress_tar_gz_archive() -> anyhow::Result<()> { let dir = tempdir()?; let tar_gz = dir.path().join("payload.tar.gz"); - let github_pat = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa"; // this is not a real secret + let github_pat = "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"; // this is not a real secret // build payload.tar.gz containing secret.txt { @@ -393,7 +393,7 @@ mod tests { fn smoke_decompress_without_extract_archives() -> anyhow::Result<()> { let dir = tempdir()?; let tar_gz = dir.path().join("payload.tar.gz"); - let github_pat = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa"; + let github_pat = "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"; // ── build payload.tar.gz containing secret.txt ────────────────────────────── { diff --git a/src/findings_store.rs b/src/findings_store.rs index 23d8c7b..6148fc6 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -147,12 +147,23 @@ impl FindingsStore { β”‚ 1. Optional duplicate filter (unchanged) β”‚ └───────────────────────────────────────────────────────────────*/ if dedup { + // Prefer the full unnamed match (index 0). Fall back to a named TOKEN capture + // before using whatever capture is available. let snippet = m .groups .captures - .get(1) - .or_else(|| m.groups.captures.get(0)) - .map_or("", |c| c.value); + .iter() + .find(|c| c.name.is_none() && c.match_number == 0) + .map(|c| c.value) + .or_else(|| { + m.groups + .captures + .iter() + .find(|c| matches!(c.name.as_deref(), Some("TOKEN"))) + .map(|c| c.value) + }) + .or_else(|| m.groups.captures.get(0).map(|c| c.value)) + .unwrap_or(""); let origin_kind = match origin.first() { Origin::GitRepo(_) => "git", diff --git a/src/liquid_filters.rs b/src/liquid_filters.rs index e9d6ea4..66a2fab 100644 --- a/src/liquid_filters.rs +++ b/src/liquid_filters.rs @@ -1,6 +1,7 @@ //! Collection of small Liquid filters that make HTTP validations & API-signing templates easy use base64::{engine::general_purpose, Engine}; +use crc32fast::Hasher; use hmac::{Hmac, Mac}; use liquid_core::{ Display_filter, Error as LiquidError, Expression, Filter, FilterParameters, FilterReflection, @@ -108,6 +109,41 @@ impl Filter for ReplaceFilter { } } +#[derive(Debug, FilterParameters)] +struct LstripCharsArgs { + #[parameter( + description = "Characters to remove from the start of the input.", + arg_type = "str" + )] + chars: Expression, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "lstrip_chars", + description = "Removes the provided characters from the beginning of the string.", + parameters(LstripCharsArgs), + parsed(LstripCharsFilter) +)] +pub struct LstripChars; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "lstrip_chars"] +struct LstripCharsFilter { + #[parameters] + args: LstripCharsArgs, +} + +impl Filter for LstripCharsFilter { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let chars = args.chars.to_string(); + let input_str = input.to_kstr(); + let trimmed = input_str.trim_start_matches(|c| chars.contains(c)).to_string(); + Ok(Value::scalar(trimmed)) + } +} + // ── HMAC args ───────────────────────────────────── #[derive(Debug, FilterParameters)] struct HmacArgs { @@ -223,22 +259,133 @@ impl Filter for HmacSha384Filter { } // ── random_string ──────────────────────────────── -static_filter!( - /// Random alphanumeric string (default 32 chars). - RandomStringFilter { len: Option }, - "random_string", - |s: &RandomStringFilter, input: &dyn ValueView| -> String { - let n = s.len // explicit argument? - .or_else(|| input.to_kstr().parse().ok()) // else parse input - .unwrap_or(32); // else default +#[derive(Debug, FilterParameters)] +struct RandomStringArgs { + #[parameter(description = "Desired output length", arg_type = "integer")] + len: Option, +} - rand::rng() - .sample_iter(&Alphanumeric) - .take(n) - .map(char::from) - .collect() +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "random_string", + description = "Random alphanumeric string (default 32 chars).", + parameters(RandomStringArgs), + parsed(RandomString) +)] +pub struct RandomStringFilter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "random_string"] +struct RandomString { + #[parameters] + args: RandomStringArgs, +} + +impl Filter for RandomString { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let n = args + .len + .and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) + .or_else(|| input.to_kstr().parse().ok()) + .unwrap_or(32); + + let value: String = + rand::rng().sample_iter(&Alphanumeric).take(n).map(char::from).collect(); + + Ok(Value::scalar(value)) } -); +} + +#[derive(Debug, FilterParameters)] +struct SuffixArgs { + #[parameter(description = "Number of trailing characters to keep", arg_type = "integer")] + len: Option, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "suffix", + description = "Return the suffix (last N characters) of the provided string.", + parameters(SuffixArgs), + parsed(Suffix) +)] +pub struct SuffixFilter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "suffix"] +struct Suffix { + #[parameters] + args: SuffixArgs, +} + +impl Filter for Suffix { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let text = input.to_kstr(); + let requested = args + .len + .and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) + .unwrap_or_else(|| text.len()); + if requested == 0 { + return Ok(Value::scalar(String::new())); + } + + let mut chars: Vec = text.chars().collect(); + let keep = requested.min(chars.len()); + chars.drain(0..chars.len().saturating_sub(keep)); + Ok(Value::scalar(chars.into_iter().collect::())) + } +} + +#[derive(Debug, FilterParameters)] +struct PrefixArgs { + #[parameter(description = "Number of leading characters to keep", arg_type = "integer")] + len: Option, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "prefix", + description = "Return the prefix (first N characters) of the provided string.", + parameters(PrefixArgs), + parsed(Prefix) +)] +pub struct PrefixFilter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "prefix"] +struct Prefix { + #[parameters] + args: PrefixArgs, +} + +impl Filter for Prefix { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let text = input.to_kstr(); + let requested = args + .len + .and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) + .unwrap_or_else(|| text.len()); + if requested == 0 { + return Ok(Value::scalar(String::new())); + } + + let mut chars: Vec = text.chars().collect(); + chars.truncate(requested.min(chars.len())); + Ok(Value::scalar(chars.into_iter().collect::())) + } +} #[derive(Debug, Clone, Default, FilterReflection, ParseFilter)] #[filter( @@ -307,6 +454,280 @@ static_filter!( } ); +static_filter!( + /// Compute the CRC32 of the input and return it as a decimal number. + Crc32Filter, + "crc32", + |input: &dyn ValueView| -> i64 { + let mut hasher = Hasher::new(); + hasher.update(input.to_kstr().as_bytes()); + i64::from(hasher.finalize()) + } +); + +#[derive(Debug, FilterParameters)] +struct Crc32DecArgs { + #[parameter( + description = "Number of trailing decimal digits to return (zero padded)", + arg_type = "integer" + )] + digits: Option, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "crc32_dec", + description = "Compute the CRC32 and optionally return the last N decimal digits.", + parameters(Crc32DecArgs), + parsed(Crc32Dec) +)] +pub struct Crc32DecFilter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "crc32_dec"] +struct Crc32Dec { + #[parameters] + args: Crc32DecArgs, +} + +impl Filter for Crc32Dec { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let mut hasher = Hasher::new(); + hasher.update(input.to_kstr().as_bytes()); + let checksum = u128::from(hasher.finalize()); + + let digits = args + .digits + .and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) + .unwrap_or(0); + + if digits == 0 { + return Ok(Value::scalar(checksum.to_string())); + } + + let clamped_digits = digits.min(38); // 10^38 fits within u128 + let modulus = 10u128.pow(clamped_digits as u32); + let truncated = checksum % modulus; + let mut value = truncated.to_string(); + if clamped_digits > value.len() { + let mut padded = String::with_capacity(clamped_digits); + for _ in 0..(clamped_digits - value.len()) { + padded.push('0'); + } + padded.push_str(&value); + value = padded; + } + + Ok(Value::scalar(value)) + } +} + +#[derive(Debug, FilterParameters)] +struct Crc32HexArgs { + #[parameter( + description = "Number of trailing hexadecimal digits to return (zero padded)", + arg_type = "integer" + )] + digits: Option, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "crc32_hex", + description = "Compute the CRC32 and optionally return the last N hexadecimal digits.", + parameters(Crc32HexArgs), + parsed(Crc32Hex) +)] +pub struct Crc32HexFilter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "crc32_hex"] +struct Crc32Hex { + #[parameters] + args: Crc32HexArgs, +} + +impl Filter for Crc32Hex { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let mut hasher = Hasher::new(); + hasher.update(input.to_kstr().as_bytes()); + let checksum = hasher.finalize(); + let mut hex = format!("{checksum:08x}"); + + let digits = args + .digits + .and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) + .unwrap_or(0); + + if digits == 0 { + return Ok(Value::scalar(hex)); + } + + let clamped = digits.min(32); + if clamped > hex.len() { + let mut padded = String::with_capacity(clamped); + for _ in 0..(clamped - hex.len()) { + padded.push('0'); + } + padded.push_str(&hex); + hex = padded; + } else { + let start = hex.len() - clamped; + hex = hex[start..].to_string(); + } + + Ok(Value::scalar(hex)) + } +} + +#[derive(Debug, FilterParameters)] +struct Crc32LeB64Args { + #[parameter( + description = "Number of leading characters from the Base64 string to keep", + arg_type = "integer" + )] + len: Option, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "crc32_le_b64", + description = "Compute the CRC32, encode little-endian bytes as Base64, optionally truncating.", + parameters(Crc32LeB64Args), + parsed(Crc32LeB64) +)] +pub struct Crc32LeB64Filter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "crc32_le_b64"] +struct Crc32LeB64 { + #[parameters] + args: Crc32LeB64Args, +} + +impl Filter for Crc32LeB64 { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let mut hasher = Hasher::new(); + hasher.update(input.to_kstr().as_bytes()); + let checksum = hasher.finalize(); + let encoded = general_purpose::STANDARD.encode(checksum.to_le_bytes()); + + let output = if let Some(len) = args.len.and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) { + encoded.chars().take(len).collect::() + } else { + encoded + }; + + Ok(Value::scalar(output)) + } +} + +#[derive(Debug, FilterParameters)] +struct Base62Args { + #[parameter( + description = "Pad the encoded value to at least this width", + arg_type = "integer" + )] + width: Option, +} + +#[derive(Clone, ParseFilter, FilterReflection, Default)] +#[filter( + name = "base62", + description = "Encode the provided integer value using Base62.", + parameters(Base62Args), + parsed(Base62) +)] +pub struct Base62Filter; + +#[derive(Debug, FromFilterParameters, Display_filter)] +#[name = "base62"] +struct Base62 { + #[parameters] + args: Base62Args, +} + +impl Filter for Base62 { + fn evaluate(&self, input: &dyn ValueView, runtime: &dyn Runtime) -> Result { + let args = self.args.evaluate(runtime)?; + let value = input + .as_scalar() + .and_then(|scalar| { + if let Some(int) = scalar.to_integer() { + Some(if int < 0 { 0 } else { int as u64 }) + } else if let Some(float) = scalar.to_float() { + Some(if float.is_sign_negative() { 0 } else { float.floor() as u64 }) + } else if let Some(boolean) = scalar.to_bool() { + Some(u64::from(boolean)) + } else { + scalar.to_kstr().to_string().parse::().ok() + } + }) + .or_else(|| input.to_kstr().to_string().parse::().ok()) + .unwrap_or(0); + + let mut encoded = encode_base62(value); + if let Some(width) = args.width.and_then(|value| { + let scalar = Value::scalar(value); + value_to_usize(&scalar) + }) { + if encoded.len() < width { + let mut padded = String::with_capacity(width); + for _ in 0..(width - encoded.len()) { + padded.push('0'); + } + padded.push_str(&encoded); + encoded = padded; + } + } + + Ok(Value::scalar(encoded)) + } +} + +fn encode_base62(mut value: u64) -> String { + const ALPHABET: &[u8; 62] = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; + if value == 0 { + return "0".to_string(); + } + let mut buf = Vec::new(); + while value > 0 { + let rem = (value % 62) as usize; + buf.push(ALPHABET[rem] as char); + value /= 62; + } + buf.iter().rev().collect() +} + +fn value_to_usize(value: &Value) -> Option { + let view = value.as_view(); + view.as_scalar() + .and_then(|scalar| { + if let Some(int) = scalar.to_integer() { + Some(if int < 0 { 0 } else { int as usize }) + } else if let Some(float) = scalar.to_float() { + Some(if float.is_sign_negative() { 0 } else { float.floor() as usize }) + } else if let Some(boolean) = scalar.to_bool() { + Some(if boolean { 1 } else { 0 }) + } else { + scalar.to_kstr().parse::().ok() + } + }) + .or_else(|| view.to_kstr().parse::().ok()) +} + // {{ value | b64url_enc }} – URL-safe base64 w/o padding static_filter!( /// Base64 URL-safe (no β€˜=’ padding). @@ -415,6 +836,14 @@ pub fn register_all(builder: liquid::ParserBuilder) -> liquid::ParserBuilder { .filter(B64EncFilter::default()) .filter(B64DecFilter::default()) .filter(RandomStringFilter::default()) + .filter(SuffixFilter::default()) + .filter(PrefixFilter::default()) + .filter(LstripChars::default()) + .filter(Crc32Filter::default()) + .filter(Crc32DecFilter::default()) + .filter(Crc32HexFilter::default()) + .filter(Crc32LeB64Filter::default()) + .filter(Base62Filter::default()) .filter(HmacSha256::default()) .filter(HmacSha1::default()) .filter(HmacSha384::default()) @@ -461,6 +890,46 @@ mod tests { assert_eq!(render(r#"{{ "hello" | sha256 }}"#), expect); } + #[test] + fn suffix_filter() { + assert_eq!(render(r#"{{ "abcdef" | suffix: 3 }}"#), "def"); + assert_eq!(render(r#"{{ "short" | suffix: 10 }}"#), "short"); + assert_eq!(render(r#"{{ "value" | suffix: 0 }}"#), ""); + } + + #[test] + fn prefix_filter() { + assert_eq!(render(r#"{{ "abcdef" | prefix: 3 }}"#), "abc"); + assert_eq!(render(r#"{{ "short" | prefix: 10 }}"#), "short"); + assert_eq!(render(r#"{{ "value" | prefix: 0 }}"#), ""); + } + + #[test] + fn crc32_and_base62_filters() { + assert_eq!(render(r#"{{ "hello" | crc32 }}"#), "907060870"); + assert_eq!(render(r#"{{ "hello" | crc32 | base62 }}"#), "zNvy2"); + assert_eq!(render(r#"{{ "hello" | crc32 | base62: 6 }}"#), "0zNvy2"); + } + + #[test] + fn crc32_dec_filter() { + assert_eq!(render(r#"{{ "hello" | crc32_dec }}"#), "907060870"); + assert_eq!(render(r#"{{ "hello" | crc32_dec: 6 }}"#), "060870"); + } + + #[test] + fn crc32_hex_filter() { + assert_eq!(render(r#"{{ "hello" | crc32_hex }}"#), "3610a686"); + assert_eq!(render(r#"{{ "hello" | crc32_hex: 4 }}"#), "a686"); + assert_eq!(render(r#"{{ "hello" | crc32_hex: 10 }}"#), "003610a686"); + } + + #[test] + fn crc32_le_b64_filter() { + assert_eq!(render(r#"{{ "hello" | crc32_le_b64 }}"#), "hqYQNg=="); + assert_eq!(render(r#"{{ "hello" | crc32_le_b64: 6 }}"#), "hqYQNg"); + } + #[test] fn hmac_sha1_filter() { let key = b"key1"; @@ -580,6 +1049,16 @@ mod tests { assert_eq!(render(r#"{{ "hello world" | replace: "world", "mars" }}"#), "hello mars"); } + #[test] + fn lstrip_chars_single() { + assert_eq!(render(r#"{{ "000abc" | lstrip_chars: "0" }}"#), "abc"); + } + + #[test] + fn lstrip_chars_multiple_chars() { + assert_eq!(render(r#"{{ "-=--token" | lstrip_chars: "-=" }}"#), "token"); + } + // ------------------------------------------------------------------------- // iso_timestamp_no_frac filter // ------------------------------------------------------------------------- diff --git a/src/main.rs b/src/main.rs index a7deda6..b52fcd3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -446,6 +446,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty }, no_base64: false, no_inline_ignore: false, + no_ignore_if_contains: false, } } /// Run the rules check command diff --git a/src/matcher.rs b/src/matcher.rs index f93576a..b820233 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -29,7 +29,7 @@ use crate::{ parser, parser::{Checker, Language}, rule_profiling::{ConcurrentRuleProfiler, RuleStats, RuleTimer}, - rules::rule::Rule, + rules::rule::{PatternRequirementContext, PatternValidationResult, Rule}, rules_database::RulesDatabase, safe_list::{is_safe_match, is_user_match}, scanner_pool::ScannerPool, @@ -203,6 +203,9 @@ pub struct Matcher<'a> { /// Configuration that controls inline ignore directives inline_ignore_config: InlineIgnoreConfig, + + /// Whether matches should honour `ignore_if_contains` requirements. + respect_ignore_if_contains: bool, } /// This `Drop` implementation updates the `global_stats` with the local stats impl<'a> Drop for Matcher<'a> { @@ -232,6 +235,7 @@ impl<'a> Matcher<'a> { shared_profiler: Option>, extra_ignore_directives: &[String], disable_inline_ignores: bool, + respect_ignore_if_contains: bool, ) -> Result { // Changed: removed `with_capacity(16384)` so we don't pre-allocate a large Vec let raw_matches_scratch = Vec::new(); @@ -258,6 +262,7 @@ impl<'a> Matcher<'a> { } else { InlineIgnoreConfig::new(extra_ignore_directives) }, + respect_ignore_if_contains, }) } @@ -414,6 +419,7 @@ impl<'a> Matcher<'a> { redact, &filename, self.profiler.as_ref(), + self.respect_ignore_if_contains, &self.inline_ignore_config, ); } @@ -439,6 +445,7 @@ impl<'a> Matcher<'a> { redact, &filename, self.profiler.as_ref(), + self.respect_ignore_if_contains, &self.inline_ignore_config, ); } @@ -470,6 +477,7 @@ impl<'a> Matcher<'a> { redact, &filename, self.profiler.as_ref(), + self.respect_ignore_if_contains, &self.inline_ignore_config, ); } @@ -574,6 +582,7 @@ fn filter_match<'b>( redact: bool, filename: &str, profiler: Option<&Arc>, + respect_ignore_if_contains: bool, inline_ignore_config: &InlineIgnoreConfig, ) { let mut timer = @@ -602,6 +611,42 @@ fn filter_match<'b>( ); continue; } + + // Check character requirements if specified + if let Some(char_reqs) = rule.pattern_requirements() { + let context = PatternRequirementContext { + regex: re, + captures: &captures, + full_match: full_bytes, + }; + match char_reqs.validate(mi_bytes, Some(context), respect_ignore_if_contains) { + PatternValidationResult::Passed => {} + PatternValidationResult::Failed => { + debug!( + "Skipping match that does not meet character requirements for rule {}", + rule.id() + ); + continue; + } + PatternValidationResult::FailedChecksum { actual_len, expected_len } => { + debug!( + "Skipping match for rule {} due to checksum mismatch (actual_len={}, expected_len={})", + rule.id(), + actual_len, + expected_len + ); + continue; + } + PatternValidationResult::IgnoredBySubstring { matched_term } => { + debug!( + "Skipping match for rule {} because it contains ignored term {matched_term}", + rule.id() + ); + continue; + } + } + } + let matching_input_offset_span = OffsetSpan::from_range( (start + matching_input.start())..(start + matching_input.end()), ); @@ -759,40 +804,31 @@ impl SerializableCaptures { redact: bool, ) -> Self { let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new(); - // Process named captures - for name in re.capture_names().flatten() { - if let Some(capture) = captures.name(name) { - let value = if redact { - redact_value(&String::from_utf8_lossy(capture.as_bytes())) - } else { - String::from_utf8_lossy(capture.as_bytes()).to_string() - }; - serialized_captures.push(SerializableCapture { - name: Some(name.to_string()), - match_number: -1, - start: capture.start(), - end: capture.end(), - value: intern(&value), - }); - } - } - // Process unnamed captures (numbered groups) + + let capture_names: SmallVec<[Option; 4]> = + re.capture_names().map(|name| name.map(str::to_string)).collect(); + for i in 0..captures.len() { - if let Some(capture) = captures.get(i) { + if let Some(cap) = captures.get(i) { let value = if redact { - redact_value(&String::from_utf8_lossy(capture.as_bytes())) + redact_value(&String::from_utf8_lossy(cap.as_bytes())) } else { - String::from_utf8_lossy(capture.as_bytes()).to_string() + String::from_utf8_lossy(cap.as_bytes()).to_string() }; + let interned = intern(&value); + + let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned(); + serialized_captures.push(SerializableCapture { - name: None, + name, match_number: i32::try_from(i).unwrap_or(0), - start: capture.start(), - end: capture.end(), - value: intern(&value), + start: cap.start(), + end: cap.end(), + value: interned, }); } } + SerializableCaptures { captures: serialized_captures } } } @@ -992,7 +1028,9 @@ mod test { use crate::{ blob::{Blob, BlobIdMap}, origin::{Origin, OriginSet}, - rules::rule::{DependsOnRule, HttpRequest, HttpValidation, RuleSyntax, Validation}, + rules::rule::{ + DependsOnRule, HttpRequest, HttpValidation, PatternRequirements, RuleSyntax, Validation, + }, }; proptest! { @@ -1027,6 +1065,7 @@ mod test { references: vec![], validation: None::, // no HTTP validation needed depends_on_rule: vec![], + pattern_requirements: None, }); let rules_db = RulesDatabase::from_rules(vec![rule]).unwrap(); @@ -1041,6 +1080,7 @@ mod test { None, &[], false, + true, ) .unwrap(); @@ -1098,6 +1138,7 @@ mod test { variable: "domain".to_string(), }), ], + pattern_requirements: None, })]; let rules_db = RulesDatabase::from_rules(rules)?; let input = "some test data for vectorscan"; @@ -1115,6 +1156,7 @@ mod test { None, // Pass the shared profiler &[], false, + true, )?; matcher.scan_bytes_raw(input.as_bytes(), "fname")?; assert_eq!( @@ -1124,6 +1166,131 @@ mod test { Ok(()) } + #[test] + fn test_pattern_requirements_ignore_if_contains_filters_matches() -> Result<()> { + let rules = vec![Rule::new(RuleSyntax { + id: "test.exclude".to_string(), + name: "exclude words".to_string(), + pattern: "(?Pprefix[A-Za-z]+)".to_string(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None, + depends_on_rule: vec![], + pattern_requirements: Some(PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: Some(vec!["TEST".to_string()]), + checksum: None, + }), + })]; + + let rules_db = RulesDatabase::from_rules(rules)?; + let input = b"prefixgood prefixtest"; + let seen_blobs: BlobIdMap = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let mut matcher = Matcher::new( + &rules_db, + scanner_pool, + &seen_blobs, + None, + false, + None, + &[], + false, + true, + )?; + + let blob = Blob::from_bytes(input.to_vec()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("exclude.txt"))); + + let matches = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches, + ScanResult::SeenWithMatches => { + panic!("unexpected scan result: blob should not be considered previously seen with matches") + } + ScanResult::SeenSansMatches => { + panic!("unexpected scan result: blob should not be considered previously seen without matches") + } + }; + + assert_eq!(matches.len(), 1, "ignore_if_contains should drop filtered matches"); + assert_eq!( + matches[0].matching_input, b"prefixgood", + "remaining match should be the non-excluded token", + ); + + Ok(()) + } + + #[test] + fn test_pattern_requirements_ignore_if_contains_can_be_disabled_in_matcher() -> Result<()> { + let rules = vec![Rule::new(RuleSyntax { + id: "test.exclude".to_string(), + name: "exclude words".to_string(), + pattern: "(?Pprefix[A-Za-z]+)".to_string(), + confidence: crate::rules::rule::Confidence::Medium, + min_entropy: 0.0, + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None, + depends_on_rule: vec![], + pattern_requirements: Some(PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: Some(vec!["TEST".to_string()]), + checksum: None, + }), + })]; + + let rules_db = RulesDatabase::from_rules(rules)?; + let input = b"prefixgood prefixtest"; + let seen_blobs: BlobIdMap = BlobIdMap::new(); + let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); + let mut matcher = Matcher::new( + &rules_db, + scanner_pool, + &seen_blobs, + None, + false, + None, + &[], + false, + false, + )?; + + let blob = Blob::from_bytes(input.to_vec()); + let origin = OriginSet::from(Origin::from_file(PathBuf::from("exclude-disabled.txt"))); + + let matches = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { + ScanResult::New(matches) => matches, + ScanResult::SeenWithMatches => { + panic!( + "unexpected scan result: blob should not be considered previously seen with matches" + ) + } + ScanResult::SeenSansMatches => { + panic!( + "unexpected scan result: blob should not be considered previously seen without matches" + ) + } + }; + + assert_eq!(matches.len(), 2, "disabling ignore_if_contains should keep all matches"); + Ok(()) + } + // --------------------------------------------------------------------- // additional deterministic unit-tests // --------------------------------------------------------------------- @@ -1197,12 +1364,14 @@ mod test { references: vec![], validation: None::, depends_on_rule: vec![], + pattern_requirements: None, }); let rules_db = RulesDatabase::from_rules(vec![rule])?; let seen = BlobIdMap::new(); let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); - let mut m = Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + let mut m = + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; let buf = b"dup dup"; // two literal hits, same rule @@ -1234,12 +1403,13 @@ mod test { references: vec![], validation: None::, depends_on_rule: vec![], + pattern_requirements: None, }); let rules_db = RulesDatabase::from_rules(vec![rule])?; let seen = BlobIdMap::new(); let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); let mut matcher = - Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; let blob = Blob::from_bytes(b"let key = \"secret_token\" # kingfisher:ignore".to_vec()); let origin = OriginSet::from(Origin::from_file(PathBuf::from("inline.txt"))); @@ -1266,12 +1436,13 @@ mod test { references: vec![], validation: None::, depends_on_rule: vec![], + pattern_requirements: None, }); let rules_db = RulesDatabase::from_rules(vec![rule])?; let seen = BlobIdMap::new(); let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); let mut matcher = - Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; let blob = Blob::from_bytes( br#"let data = """ @@ -1306,6 +1477,7 @@ line2 references: vec![], validation: None::, depends_on_rule: vec![], + pattern_requirements: None, }); let rules_db = RulesDatabase::from_rules(vec![rule])?; @@ -1315,7 +1487,7 @@ line2 let seen = BlobIdMap::new(); let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); let mut matcher = - Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false)?; + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &[], false, true)?; let matches_without_compat = match matcher.scan_blob(&blob, &origin, None, false, false, false)? { ScanResult::New(matches) => matches.len(), @@ -1327,7 +1499,7 @@ line2 let scanner_pool = Arc::new(ScannerPool::new(Arc::new(rules_db.vsdb.clone()))); let extra = vec![String::from("gitleaks:allow")]; let mut matcher = - Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false)?; + Matcher::new(&rules_db, scanner_pool, &seen, None, false, None, &extra, false, true)?; match matcher.scan_blob(&blob, &origin, None, false, false, false)? { ScanResult::New(matches) => assert!(matches.is_empty()), _ => panic!("unexpected scan result"), @@ -1335,4 +1507,24 @@ line2 Ok(()) } + + #[test] + fn serializes_captures_in_numeric_order() { + let re = + Regex::new(r"(?xi)\b(ghp_(?P[A-Z0-9]{3})(?P[A-Z0-9]{2}))").unwrap(); + let caps = re.captures(b"ghp_ABC12").expect("expected captures"); + + let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false); + let entries: Vec<(Option<&str>, i32, &str)> = serialized + .captures + .iter() + .map(|cap| (cap.name.as_deref(), cap.match_number, cap.value)) + .collect(); + + assert_eq!(entries.len(), 4); + assert_eq!(entries[0], (None, 0, "ghp_ABC12")); + assert_eq!(entries[1], (None, 1, "ghp_ABC12")); + assert_eq!(entries[2], (Some("body"), 2, "ABC")); + assert_eq!(entries[3], (Some("checksum"), 3, "12")); + } } diff --git a/src/reporter.rs b/src/reporter.rs index 9ec0f66..e8e29ba 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -807,6 +807,7 @@ mod tests { skip_aws_account: Vec::new(), skip_aws_account_file: None, no_inline_ignore: false, + no_ignore_if_contains: false, } } @@ -840,6 +841,7 @@ mod tests { references: vec![], validation: None, depends_on_rule: vec![], + pattern_requirements: None, })); let blob_id = BlobId::new(b"blob-data"); diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 26d1b27..f9fe2bf 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -180,6 +180,7 @@ mod tests { skip_aws_account_file: None, no_base64: false, no_inline_ignore: false, + no_ignore_if_contains: false, } } @@ -196,6 +197,7 @@ mod tests { references: vec![], validation: None, depends_on_rule: vec![], + pattern_requirements: None, }; let rule = Arc::new(Rule::new(syntax)); Match { diff --git a/src/rules/rule.rs b/src/rules/rule.rs index dbf52ef..1c2fb08 100644 --- a/src/rules/rule.rs +++ b/src/rules/rule.rs @@ -10,6 +10,10 @@ use std::{ use anyhow::{anyhow, Context, Result}; use lazy_static::lazy_static; +use liquid::{ + model::{KString, Value}, + object, ParserBuilder, +}; use regex::Regex; use schemars::{ gen::SchemaGenerator, @@ -17,9 +21,12 @@ use schemars::{ JsonSchema, }; use serde::{Deserialize, Serialize}; +use tracing::debug; // use sha1::{Digest, Sha1}; use xxhash_rust::xxh3::xxh3_64; +use crate::liquid_filters; + /// Returns false as the default value. fn default_false() -> bool { false @@ -52,6 +59,233 @@ pub struct DependsOnRule { pub variable: String, } +/// Specifies character type requirements for matched secrets. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct PatternRequirements { + /// Minimum number of digits required (0-9) + #[serde(default)] + pub min_digits: Option, + /// Minimum number of uppercase letters required (A-Z) + #[serde(default)] + pub min_uppercase: Option, + /// Minimum number of lowercase letters required (a-z) + #[serde(default)] + pub min_lowercase: Option, + /// Minimum number of special characters required + #[serde(default)] + pub min_special_chars: Option, + /// Custom set of characters to consider as "special" (defaults to common punctuation) + #[serde(default)] + pub special_chars: Option, + /// Words that should cause the match to be excluded when present (case-insensitive) + #[serde(default)] + pub ignore_if_contains: Option>, + /// Optional checksum validation configuration. + #[serde(default)] + pub checksum: Option, +} + +/// Defines a checksum validation strategy for a matched pattern. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct ChecksumRequirement { + /// Template describing how to extract the checksum from the match. + pub actual: ChecksumActual, + /// Template describing how to compute the expected checksum. + pub expected: String, + /// When true, checksum evaluation is skipped if the required capture is missing. + #[serde(default)] + pub skip_if_missing: bool, +} + +/// Describes how to extract the checksum value from a match. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct ChecksumActual { + /// Liquid template used to compute the checksum from the match. + pub template: String, + /// Optional capture group that must be present before evaluating the checksum. + #[serde(default)] + pub requires_capture: Option, +} + +/// Contextual information available when validating pattern requirements. +#[derive(Clone, Copy)] +pub struct PatternRequirementContext<'a> { + /// Compiled regex associated with the rule. + pub regex: &'a regex::bytes::Regex, + /// Captures for the current match. + pub captures: &'a regex::bytes::Captures<'a>, + /// Full bytes matched by the rule (capture group 0). + pub full_match: &'a [u8], +} + +impl PatternRequirements { + /// Default special characters if none are specified + const DEFAULT_SPECIAL_CHARS: &'static str = "!@#$%^&*()_+-=[]{}|;:'\",.<>?/\\`~"; + + /// Validates whether the given byte slice meets the character requirements. + /// Returns the validation outcome, including whether the match should be ignored + /// due to `ignore_if_contains` entries when that behaviour is enabled. + pub fn validate( + &self, + input: &[u8], + context: Option>, + respect_ignore_if_contains: bool, + ) -> PatternValidationResult { + // Convert to string (lossy for non-UTF8) + let s = String::from_utf8_lossy(input); + + // Check digit requirement + if let Some(min_digits) = self.min_digits { + let digit_count = s.chars().filter(|c| c.is_ascii_digit()).count(); + if digit_count < min_digits { + return PatternValidationResult::Failed; + } + } + + // Check uppercase requirement + if let Some(min_uppercase) = self.min_uppercase { + let uppercase_count = s.chars().filter(|c| c.is_ascii_uppercase()).count(); + if uppercase_count < min_uppercase { + return PatternValidationResult::Failed; + } + } + + // Check lowercase requirement + if let Some(min_lowercase) = self.min_lowercase { + let lowercase_count = s.chars().filter(|c| c.is_ascii_lowercase()).count(); + if lowercase_count < min_lowercase { + return PatternValidationResult::Failed; + } + } + + // Check special character requirement + if let Some(min_special) = self.min_special_chars { + let special_chars = + self.special_chars.as_deref().unwrap_or(Self::DEFAULT_SPECIAL_CHARS); + let special_count = s.chars().filter(|c| special_chars.contains(*c)).count(); + if special_count < min_special { + return PatternValidationResult::Failed; + } + } + + // Check ignore-if-contains requirement + if respect_ignore_if_contains { + if let Some(ignore_terms) = self.ignore_if_contains.as_ref() { + let lowercase_input = s.to_lowercase(); + if let Some(matched_term) = ignore_terms + .iter() + .filter_map(|term| { + let trimmed = term.trim(); + if trimmed.is_empty() { + None + } else { + Some((trimmed, trimmed.to_lowercase())) + } + }) + .find_map(|(original, lowered)| { + if lowercase_input.contains(&lowered) { + Some(original.to_string()) + } else { + None + } + }) + { + return PatternValidationResult::IgnoredBySubstring { matched_term }; + } + } + } + + if let Some(checksum) = &self.checksum { + let Some(ctx) = context else { + return if checksum.skip_if_missing { + PatternValidationResult::Passed + } else { + PatternValidationResult::Failed + }; + }; + + if let Some(required) = checksum.actual.requires_capture.as_deref() { + if ctx.captures.name(required).is_none() { + return if checksum.skip_if_missing { + PatternValidationResult::Passed + } else { + PatternValidationResult::Failed + }; + } + } + + let mut globals = object!({ + "MATCH": s.to_string(), + "FULL_MATCH": String::from_utf8_lossy(ctx.full_match).to_string(), + }); + + for name in ctx.regex.capture_names().flatten() { + if let Some(capture) = ctx.captures.name(name) { + let value = String::from_utf8_lossy(capture.as_bytes()).to_string(); + globals.insert(KString::from_ref(name), Value::scalar(value.clone())); + globals.insert( + KString::from_string(name.to_ascii_uppercase()), + Value::scalar(value), + ); + } + } + + let actual = + match render_pattern_requirement_template(&checksum.actual.template, &globals) { + Ok(rendered) => rendered, + Err(err) => { + debug!( + "Failed to render checksum actual template '{}': {}", + checksum.actual.template, err + ); + return PatternValidationResult::Failed; + } + }; + let expected = match render_pattern_requirement_template(&checksum.expected, &globals) { + Ok(rendered) => rendered, + Err(err) => { + debug!( + "Failed to render checksum expected template '{}': {}", + checksum.expected, err + ); + return PatternValidationResult::Failed; + } + }; + + if actual != expected { + let actual_len = actual.chars().count(); + let expected_len = expected.chars().count(); + return PatternValidationResult::FailedChecksum { actual_len, expected_len }; + } + } + + PatternValidationResult::Passed + } +} + +fn render_pattern_requirement_template( + template: &str, + globals: &liquid::Object, +) -> Result { + PATTERN_REQUIREMENTS_TEMPLATE_PARSER + .parse(template) + .map_err(|e| e.to_string()) + .and_then(|parsed| parsed.render(globals).map_err(|e| e.to_string())) +} + +/// Result of validating [`PatternRequirements`] against a potential match. +#[derive(Debug, PartialEq, Eq)] +pub enum PatternValidationResult { + /// All requirements are satisfied and the match should be kept. + Passed, + /// Requirements were not satisfied. + Failed, + /// Checksum requirements were not satisfied; captures basic mismatch details for debugging. + FailedChecksum { actual_len: usize, expected_len: usize }, + /// The match contains one of the `ignore_if_contains` substrings and should be skipped. + IgnoredBySubstring { matched_term: String }, +} + /// Configuration for HTTP validation. This contains a request configuration /// and an optional multipart configuration. #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] @@ -283,6 +517,9 @@ pub struct RuleSyntax { /// Optional dependencies on other rules. #[serde(default)] pub depends_on_rule: Vec>, + /// Optional character type requirements for matched secrets. + #[serde(default)] + pub pattern_requirements: Option, } lazy_static! { @@ -290,6 +527,10 @@ lazy_static! { pub static ref RULE_COMMENTS_PATTERN: Regex = Regex::new( r"(?m)(\(\?#[^)]*\))|(\s\#[\sa-zA-Z]*$)" ).expect("comment-stripping regex should compile"); + static ref PATTERN_REQUIREMENTS_TEMPLATE_PARSER: liquid::Parser = + liquid_filters::register_all(ParserBuilder::with_stdlib()) + .build() + .expect("pattern requirement template parser should compile"); } impl RuleSyntax { @@ -333,6 +574,7 @@ impl RuleSyntax { /// visible: true, /// validation: None, /// depends_on_rule: vec![], + /// pattern_requirements: None, /// }; /// assert_eq!(r.as_anchored_regex().unwrap().as_str(), r"hello\s*world$"); /// ``` @@ -436,4 +678,305 @@ impl Rule { pub fn confidence(&self) -> Confidence { self.syntax.confidence } + + /// Returns the character requirements for this rule, if any. + pub fn pattern_requirements(&self) -> Option<&PatternRequirements> { + self.syntax.pattern_requirements.as_ref() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use regex::bytes::Regex as BytesRegex; + + #[test] + fn test_pattern_requirements_digits() { + let reqs = PatternRequirements { + min_digits: Some(2), + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: has 3 digits + assert!(matches!(reqs.validate(b"abc123def", None, true), PatternValidationResult::Passed)); + + // Should fail: only 1 digit + assert!(matches!(reqs.validate(b"abc1def", None, true), PatternValidationResult::Failed)); + + // Should fail: no digits + assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed)); + } + + #[test] + fn test_pattern_requirements_checksum() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: None, + checksum: Some(ChecksumRequirement { + actual: ChecksumActual { + template: "{{ MATCH | suffix: 6 }}".to_string(), + requires_capture: Some("checksum".to_string()), + }, + expected: "{{ BODY | crc32 | base62: 6 }}".to_string(), + skip_if_missing: true, + }), + }; + + let token = b"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"; + let regex = + BytesRegex::new(r"(?x) ghp_(?P[A-Za-z0-9]{30})(?P[A-Za-z0-9]{6})") + .unwrap(); + let captures = regex.captures(token).expect("token should match"); + assert!(matches!( + reqs.validate( + token, + Some(PatternRequirementContext { + regex: ®ex, + captures: &captures, + full_match: token + }), + true + ), + PatternValidationResult::Passed + )); + + let mut invalid = token.to_vec(); + *invalid.last_mut().unwrap() = b'0'; + let captures_invalid = + regex.captures(&invalid).expect("invalid token should still match pattern"); + assert!(matches!( + reqs.validate( + &invalid, + Some(PatternRequirementContext { + regex: ®ex, + captures: &captures_invalid, + full_match: &invalid, + }), + true + ), + PatternValidationResult::FailedChecksum { .. } + )); + + let legacy = b"ghp_legacy_token"; + assert!(matches!(reqs.validate(legacy, None, true), PatternValidationResult::Passed)); + } + + #[test] + fn test_pattern_requirements_uppercase() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: Some(2), + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: has 3 uppercase + assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed)); + + // Should fail: only 1 uppercase + assert!(matches!(reqs.validate(b"Adef", None, true), PatternValidationResult::Failed)); + + // Should fail: no uppercase + assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed)); + } + + #[test] + fn test_pattern_requirements_lowercase() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: Some(2), + min_special_chars: None, + special_chars: None, + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: has 3 lowercase + assert!(matches!(reqs.validate(b"ABCdef", None, true), PatternValidationResult::Passed)); + + // Should fail: only 1 lowercase + assert!(matches!(reqs.validate(b"ABCd", None, true), PatternValidationResult::Failed)); + + // Should fail: no lowercase + assert!(matches!(reqs.validate(b"ABC123", None, true), PatternValidationResult::Failed)); + } + + #[test] + fn test_pattern_requirements_special_chars() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: Some(2), + special_chars: None, // uses default + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: has 2 special chars + assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Passed)); + + // Should fail: only 1 special char + assert!(matches!(reqs.validate(b"abc!def", None, true), PatternValidationResult::Failed)); + + // Should fail: no special chars + assert!(matches!(reqs.validate(b"abcdef", None, true), PatternValidationResult::Failed)); + } + + #[test] + fn test_pattern_requirements_custom_special_chars() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: Some(2), + special_chars: Some("$%^".to_string()), + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: has 2 custom special chars + assert!(matches!(reqs.validate(b"abc$%def", None, true), PatternValidationResult::Passed)); + + // Should fail: has special chars but not the custom ones + assert!(matches!(reqs.validate(b"abc!@def", None, true), PatternValidationResult::Failed)); + + // Should fail: only 1 custom special char + assert!(matches!(reqs.validate(b"abc$def", None, true), PatternValidationResult::Failed)); + } + + #[test] + fn test_pattern_requirements_combined() { + let reqs = PatternRequirements { + min_digits: Some(1), + min_uppercase: Some(1), + min_lowercase: Some(1), + min_special_chars: Some(1), + special_chars: None, + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: has all requirements + assert!(matches!(reqs.validate(b"Abc1!", None, true), PatternValidationResult::Passed)); + + // Should fail: missing digit + assert!(matches!(reqs.validate(b"Abc!", None, true), PatternValidationResult::Failed)); + + // Should fail: missing uppercase + assert!(matches!(reqs.validate(b"abc1!", None, true), PatternValidationResult::Failed)); + + // Should fail: missing lowercase + assert!(matches!(reqs.validate(b"ABC1!", None, true), PatternValidationResult::Failed)); + + // Should fail: missing special + assert!(matches!(reqs.validate(b"Abc1", None, true), PatternValidationResult::Failed)); + } + + #[test] + fn test_pattern_requirements_ignore_if_contains() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: Some(vec!["test".to_string(), "Demo".to_string()]), + checksum: None, + }; + + // Should fail: contains "test" (case-insensitive) + assert!(matches!( + reqs.validate(b"MyTestToken", None, true), + PatternValidationResult::IgnoredBySubstring { .. } + )); + + // Should fail: contains "demo" (case-insensitive) + assert!(matches!( + reqs.validate(b"example-demo-value", None, true), + PatternValidationResult::IgnoredBySubstring { .. } + )); + + // Should pass: does not contain excluded words + assert!(matches!( + reqs.validate(b"example-value", None, true), + PatternValidationResult::Passed + )); + } + + #[test] + fn test_pattern_requirements_ignore_if_contains_ignores_empty_entries() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: Some(vec![" ".to_string(), "".to_string(), "BLOCK".to_string()]), + checksum: None, + }; + + // Should fail only when non-empty exclusion matches + assert!(matches!( + reqs.validate(b"needs-blocking", None, true), + PatternValidationResult::IgnoredBySubstring { .. } + )); + assert!(matches!(reqs.validate(b"allowed", None, true), PatternValidationResult::Passed)); + } + + #[test] + fn test_pattern_requirements_ignore_if_contains_can_be_disabled() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: Some(vec!["ignoreme".to_string()]), + checksum: None, + }; + + // With ignoring enabled, the match is skipped + assert!(matches!( + reqs.validate(b"value-ignoreme", None, true), + PatternValidationResult::IgnoredBySubstring { .. } + )); + + // With ignoring disabled, the same input passes requirements + assert!(matches!( + reqs.validate(b"value-ignoreme", None, false), + PatternValidationResult::Passed + )); + } + + #[test] + fn test_pattern_requirements_none() { + let reqs = PatternRequirements { + min_digits: None, + min_uppercase: None, + min_lowercase: None, + min_special_chars: None, + special_chars: None, + ignore_if_contains: None, + checksum: None, + }; + + // Should pass: no requirements + assert!(matches!(reqs.validate(b"anything", None, true), PatternValidationResult::Passed)); + assert!(matches!(reqs.validate(b"123", None, true), PatternValidationResult::Passed)); + assert!(matches!(reqs.validate(b"!@#", None, true), PatternValidationResult::Passed)); + } } diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 06cc3c9..1a354ae 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -169,6 +169,7 @@ pub fn enumerate_filesystem_inputs( Some(shared_profiler), &args.extra_ignore_comments, args.no_inline_ignore, + !args.no_ignore_if_contains, )?; let blob_processor_init_time = Mutex::new(t1.elapsed()); let make_blob_processor = || -> BlobProcessor { diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index f041049..331f610 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -683,6 +683,7 @@ pub async fn fetch_s3_objects( Some(shared_profiler.clone()), &args.extra_ignore_comments, args.no_inline_ignore, + !args.no_ignore_if_contains, )?; let mut processor = BlobProcessor { matcher }; @@ -764,6 +765,7 @@ pub async fn fetch_gcs_objects( Some(shared_profiler.clone()), &args.extra_ignore_comments, args.no_inline_ignore, + !args.no_ignore_if_contains, )?; let mut processor = BlobProcessor { matcher }; diff --git a/src/validation.rs b/src/validation.rs index 073b615..778127c 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -332,9 +332,7 @@ async fn timed_validate_single_match<'a>( } let mut globals = Object::new(); - for (k, v, ..) in &captured_values { - globals.insert(k.to_uppercase().into(), Value::scalar(v.clone())); - } + populate_globals_from_captures(&mut globals, &captured_values); let rule_syntax = m.rule.syntax(); @@ -961,6 +959,58 @@ async fn timed_validate_single_match<'a>( commit_and_return(m); } +fn populate_globals_from_captures( + globals: &mut Object, + captured_values: &[(String, String, usize, usize)], +) { + let mut best_token: Option<&String> = None; + + for (k, v, ..) in captured_values { + if k.eq_ignore_ascii_case("TOKEN") { + if best_token.map_or(true, |best| v.len() >= best.len()) { + best_token = Some(v); + } + } else { + globals.insert(k.to_uppercase().into(), Value::scalar(v.clone())); + } + } + + if let Some(token) = best_token { + globals.insert("TOKEN".into(), Value::scalar(token.clone())); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn populate_globals_prefers_longest_token() { + let captured_values = vec![ + ("TOKEN".to_string(), "short".to_string(), 0usize, 5usize), + ("BODY".to_string(), "body".to_string(), 0usize, 4usize), + ("TOKEN".to_string(), "longervalue".to_string(), 0usize, 11usize), + ]; + + let mut globals = Object::new(); + populate_globals_from_captures(&mut globals, &captured_values); + + assert_eq!(globals.get("TOKEN"), Some(Value::scalar("longervalue")).as_ref()); + assert_eq!(globals.get("BODY"), Some(Value::scalar("body")).as_ref()); + } + + #[test] + fn populate_globals_handles_missing_token() { + let captured_values = vec![("CHECKSUM".to_string(), "123456".to_string(), 0usize, 6usize)]; + + let mut globals = Object::new(); + populate_globals_from_captures(&mut globals, &captured_values); + + assert!(globals.get("TOKEN").is_none()); + assert_eq!(globals.get("CHECKSUM"), Some(Value::scalar("123456")).as_ref()); + } +} + // #[cfg(test)] // mod tests { // use std::sync::Arc; diff --git a/src/validation/utils.rs b/src/validation/utils.rs index 9e8422d..4db50a3 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -6,19 +6,11 @@ use crate::validation::SerializableCaptures; /// Return (NAME, value, start, end) for every capture we care about. /// /// * If a capture has a name, use that (upper-cased) -/// * If it’s unnamed, fall back to `"TOKEN"` -/// * Skip the unnamed β€œwhole-match” capture **only when** there are -/// additional captures to return. +/// * If it’s unnamed, fall back to `"TOKEN"` pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String, usize, usize)> { - let multiple = captures.captures.len() > 1; - captures .captures .iter() - // Skip the whole-match capture (match_number == 0) only when there - // are additional captures. All other captures – named or unnamed – - // should be preserved. - .filter(|cap| !multiple || cap.match_number != 0) .map(|cap| { let name = cap.name.as_ref().map(|n| n.to_uppercase()).unwrap_or_else(|| "TOKEN".to_string()); @@ -140,7 +132,7 @@ mod tests { } #[test] - fn skips_whole_match_when_multiple() { + fn includes_whole_match_when_multiple() { let captures = SerializableCaptures { captures: smallvec![ SerializableCapture { @@ -160,11 +152,17 @@ mod tests { ], }; let result = process_captures(&captures); - assert_eq!(result, vec![("FOO".to_string(), "bcd".to_string(), 1usize, 4usize)]); + assert_eq!( + result, + vec![ + ("TOKEN".to_string(), "abcde".to_string(), 0usize, 5usize), + ("FOO".to_string(), "bcd".to_string(), 1usize, 4usize), + ] + ); } #[test] - fn includes_unnamed_groups_but_skips_whole_match() { + fn includes_whole_match_and_unnamed_groups() { let captures = SerializableCaptures { captures: smallvec![ SerializableCapture { @@ -188,6 +186,7 @@ mod tests { assert_eq!( result, vec![ + ("TOKEN".to_string(), "aabbcc".to_string(), 0usize, 6usize), ("FOO".to_string(), "aa".to_string(), 0usize, 2usize), ("TOKEN".to_string(), "cc".to_string(), 4usize, 6usize), ] diff --git a/testdata/test_char_requirements.yaml b/testdata/test_char_requirements.yaml new file mode 100644 index 0000000..f1f9801 --- /dev/null +++ b/testdata/test_char_requirements.yaml @@ -0,0 +1,24 @@ +rules: + - name: Secure API Key with Character Requirements + id: test.char_requirements.1 + pattern: | + (?xi) + api[_-]?key + (?:.|[\n\r]){0,32}? + \b + ([A-Za-z0-9!@#$%^&*]{16,}) + \b + min_entropy: 3.0 + confidence: high + pattern_requirements: + min_digits: 1 + min_uppercase: 1 + min_lowercase: 1 + min_special_chars: 1 + examples: + - api_key = "Abc123!SecureToken" + - api-key: "MyK3y!WithSpecial" + negative_examples: + - api_key = "abcdefghijklmnop" # no uppercase, no digit, no special + - api_key = "ABCDEFGHIJKLMNOP" # no lowercase, no digit, no special + - api_key = "abc123defghijklm" # no uppercase, no special diff --git a/tests/cli.rs b/tests/cli.rs index 0486b73..ee845cd 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -6,8 +6,7 @@ mod test { use super::*; #[test] fn cli_lists_rules_pretty() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["rules", "list", "--format", "pretty", "--no-update-check"]) .assert() .success() @@ -15,8 +14,7 @@ mod test { } #[test] fn cli_lists_rules_json() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["rules", "list", "--format", "json", "--no-update-check"]) .assert() .success() @@ -25,8 +23,7 @@ mod test { #[test] fn cli_version_flag() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .arg("--version") .assert() .success() diff --git a/tests/cli_failure.rs b/tests/cli_failure.rs index ab352f6..0968f4f 100644 --- a/tests/cli_failure.rs +++ b/tests/cli_failure.rs @@ -8,8 +8,7 @@ use tempfile::TempDir; /// 1. Path-does-not-exist β‡’ run_async_scan bails with β€œInvalid input” #[test] fn scan_fails_for_missing_path() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "no/such/path/here", "--no-update-check"]) .assert() .failure() // exit-code β‰  0 @@ -22,8 +21,7 @@ fn scan_fails_for_bad_rule_yaml() { let tmp = TempDir::new().unwrap(); fs::write(tmp.path().join("broken.yml"), "this: is: : not yaml").unwrap(); - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", tmp.path().to_str().unwrap(), // dummy input dir (exists) @@ -68,8 +66,7 @@ rules: // Create a dummy input file that matches the rule fs::write(tmp.path().join("input.txt"), "dummy_dead").unwrap(); - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", tmp.path().join("input.txt").to_str().unwrap(), diff --git a/tests/cli_subcommands.rs b/tests/cli_subcommands.rs index 2be09ab..b51fc29 100644 --- a/tests/cli_subcommands.rs +++ b/tests/cli_subcommands.rs @@ -19,8 +19,7 @@ mod github { #[test] fn scan_github_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "github", "--help"]) .assert() .success() @@ -29,8 +28,7 @@ mod github { #[test] fn scan_github_list_only_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "github", "--help"]) .assert() .success() @@ -42,8 +40,7 @@ mod github { #[test] fn scan_github_requires_specifier() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "github", "--no-update-check"]) .assert() .failure() @@ -52,8 +49,7 @@ mod github { #[test] fn scan_github_with_user() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "github", "--user", "testuser", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| { @@ -64,8 +60,7 @@ mod github { #[test] fn scan_github_with_organization() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -80,8 +75,7 @@ mod github { #[test] fn scan_github_multiple_users() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -98,8 +92,7 @@ mod github { #[test] fn scan_github_with_exclude() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -116,8 +109,7 @@ mod github { #[test] fn scan_github_with_repo_type_fork() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -134,8 +126,7 @@ mod github { #[test] fn scan_github_with_repo_type_source() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -152,8 +143,7 @@ mod github { #[test] fn scan_github_custom_api_url() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -170,8 +160,7 @@ mod github { #[test] fn scan_github_all_organizations() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -187,8 +176,7 @@ mod github { #[test] fn scan_github_invalid_repo_type() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -205,8 +193,7 @@ mod github { #[test] fn scan_github_mixed_user_and_org() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "github", @@ -231,8 +218,7 @@ mod gitlab { #[test] fn scan_gitlab_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitlab", "--help"]) .assert() .success() @@ -241,8 +227,7 @@ mod gitlab { #[test] fn scan_gitlab_list_only_flag() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitlab", "--help"]) .assert() .success() @@ -251,8 +236,7 @@ mod gitlab { #[test] fn scan_gitlab_requires_specifier() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitlab", "--no-update-check"]) .assert() .failure() @@ -261,8 +245,7 @@ mod gitlab { #[test] fn scan_gitlab_with_user() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitlab", "--user", "testuser", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); @@ -270,8 +253,7 @@ mod gitlab { #[test] fn scan_gitlab_with_group() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitlab", "--group", "testgroup", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); @@ -279,8 +261,7 @@ mod gitlab { #[test] fn scan_gitlab_with_include_subgroups() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitlab", @@ -296,8 +277,7 @@ mod gitlab { #[test] fn scan_gitlab_with_repo_type() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitlab", @@ -314,8 +294,7 @@ mod gitlab { #[test] fn scan_gitlab_with_exclude() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitlab", @@ -332,8 +311,7 @@ mod gitlab { #[test] fn scan_gitlab_custom_api_url() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitlab", @@ -350,8 +328,7 @@ mod gitlab { #[test] fn scan_gitlab_all_groups() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitlab", @@ -375,8 +352,7 @@ mod azure { #[test] fn scan_azure_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "azure", "--help"]) .assert() .success() @@ -385,8 +361,7 @@ mod azure { #[test] fn scan_azure_list_only_flag() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "azure", "--help"]) .assert() .success() @@ -395,8 +370,7 @@ mod azure { #[test] fn scan_azure_requires_specifier() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "azure", "--no-update-check"]) .assert() .failure() @@ -405,8 +379,7 @@ mod azure { #[test] fn scan_azure_with_organization() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "azure", @@ -421,8 +394,7 @@ mod azure { #[test] fn scan_azure_with_project() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "azure", @@ -437,8 +409,7 @@ mod azure { #[test] fn scan_azure_all_projects() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "azure", @@ -454,8 +425,7 @@ mod azure { #[test] fn scan_azure_with_exclude() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "azure", @@ -472,8 +442,7 @@ mod azure { #[test] fn scan_azure_with_repo_type() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "azure", @@ -498,8 +467,7 @@ mod bitbucket { #[test] fn scan_bitbucket_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "bitbucket", "--help"]) .assert() .stdout(is_match(r"kingfisher(\.exe)? scan bitbucket \[OPTIONS\]").unwrap()); @@ -507,8 +475,7 @@ mod bitbucket { #[test] fn scan_bitbucket_list_only_flag() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "bitbucket", "--help"]) .assert() .success() @@ -517,8 +484,7 @@ mod bitbucket { #[test] fn scan_bitbucket_requires_specifier() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "bitbucket", "--no-update-check"]) .assert() .failure() @@ -527,8 +493,7 @@ mod bitbucket { #[test] fn scan_bitbucket_with_workspace() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "bitbucket", @@ -543,8 +508,7 @@ mod bitbucket { #[test] fn scan_bitbucket_with_user() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "bitbucket", "--user", "testuser", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); @@ -552,8 +516,7 @@ mod bitbucket { #[test] fn scan_bitbucket_with_project() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "bitbucket", @@ -568,8 +531,7 @@ mod bitbucket { #[test] fn scan_bitbucket_all_workspaces() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "bitbucket", @@ -585,8 +547,7 @@ mod bitbucket { #[test] fn scan_bitbucket_with_exclude() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "bitbucket", @@ -603,8 +564,7 @@ mod bitbucket { #[test] fn scan_bitbucket_with_repo_type() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "bitbucket", @@ -629,8 +589,7 @@ mod gitea { #[test] fn scan_gitea_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitea", "--help"]) .assert() .success() @@ -639,8 +598,7 @@ mod gitea { #[test] fn scan_gitea_requires_specifier() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitea", "--no-update-check"]) .assert() .failure() @@ -649,8 +607,7 @@ mod gitea { #[test] fn scan_gitea_with_organization() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitea", @@ -665,8 +622,7 @@ mod gitea { #[test] fn scan_gitea_with_user() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "gitea", "--user", "testuser", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); @@ -674,8 +630,7 @@ mod gitea { #[test] fn scan_gitea_all_organizations() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitea", @@ -689,8 +644,7 @@ mod gitea { #[test] fn scan_gitea_with_exclude() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitea", @@ -707,8 +661,7 @@ mod gitea { #[test] fn scan_gitea_with_repo_type() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitea", @@ -725,8 +678,7 @@ mod gitea { #[test] fn scan_gitea_custom_api_url() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "gitea", @@ -751,8 +703,7 @@ mod huggingface { #[test] fn scan_huggingface_help() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "huggingface", "--help"]) .assert() .success() @@ -761,8 +712,7 @@ mod huggingface { #[test] fn scan_huggingface_requires_specifier() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "huggingface", "--no-update-check"]) .assert() .failure() @@ -771,8 +721,7 @@ mod huggingface { #[test] fn scan_huggingface_with_user() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "huggingface", @@ -787,8 +736,7 @@ mod huggingface { #[test] fn scan_huggingface_with_organization() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "huggingface", @@ -803,8 +751,7 @@ mod huggingface { #[test] fn scan_huggingface_with_model() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "huggingface", @@ -819,8 +766,7 @@ mod huggingface { #[test] fn scan_huggingface_with_dataset() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "huggingface", @@ -835,8 +781,7 @@ mod huggingface { #[test] fn scan_huggingface_with_space() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "huggingface", @@ -851,8 +796,7 @@ mod huggingface { #[test] fn scan_huggingface_with_exclude() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", "huggingface", @@ -888,8 +832,7 @@ mod cross_platform { for mut platform_args in platforms { platform_args.extend_from_slice(&["--list-only", "--no-update-check"]); - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(&platform_args) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); @@ -898,8 +841,7 @@ mod cross_platform { #[test] fn scan_requires_subcommand_or_path() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "--no-update-check"]) .assert() .failure() @@ -908,8 +850,7 @@ mod cross_platform { #[test] fn scan_invalid_platform_subcommand() { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "invalid-platform", "--no-update-check"]) .assert() .failure() @@ -919,8 +860,7 @@ mod cross_platform { #[test] fn scan_github_without_scanning_no_paths() { // list-only should work without providing actual scan paths - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "github", "--user", "testuser", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); @@ -941,8 +881,7 @@ mod legacy_compatibility { let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); let test_file = root.join("testdata").join("generic_secrets.py"); - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", test_file.to_str().expect("REASON"), @@ -962,8 +901,7 @@ mod legacy_compatibility { // This just validates the CLI parsing works // New syntax - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "github", "--user", "test", "--list-only", "--no-update-check"]) .assert() .code(predicates::function::function(|code: &i32| *code == 0 || *code == 1)); diff --git a/tests/fingerprint_dedup.rs b/tests/fingerprint_dedup.rs index f458e7d..5771cc4 100644 --- a/tests/fingerprint_dedup.rs +++ b/tests/fingerprint_dedup.rs @@ -33,6 +33,7 @@ fn make_match(fp: u64) -> Match { references: vec![], validation: None, depends_on_rule: vec![], + pattern_requirements: None, }; let rule = Arc::new(Rule::new(syntax)); Match { diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index d950c47..dcd9067 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -148,6 +148,7 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result anyhow::Result<()> { let dir = tempdir()?; let file_path = dir.path().join("secret.txt"); - // Base64 for ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa - let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ=="; + // Base64 for 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 + let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw=="; fs::write(&file_path, encoded)?; - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -26,7 +26,7 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> { .assert() .code(200) .stdout( - predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa") + predicate::str::contains("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") .and(predicate::str::contains("\"encoding\": \"base64\"")), ); @@ -39,10 +39,10 @@ fn detects_base64_encoded_secret() -> anyhow::Result<()> { fn skips_base64_when_disabled() -> anyhow::Result<()> { let dir = tempdir()?; let file_path = dir.path().join("secret.txt"); - let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ=="; + let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw=="; fs::write(&file_path, encoded)?; - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -68,7 +68,7 @@ fn no_base64_skips_empty_files() -> anyhow::Result<()> { let file_path = dir.path().join("empty.py"); fs::write(&file_path, "")?; - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -92,11 +92,11 @@ fn no_base64_skips_empty_files() -> anyhow::Result<()> { fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> { let dir = tempdir()?; let file_path = dir.path().join("secret.py"); - // Base64 for ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa - let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ=="; + // Base64 for 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 + let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDBNV0h4cw=="; fs::write(&file_path, format!("token = \"{}\"\n", encoded))?; - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -110,7 +110,7 @@ fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> { .assert() .code(200) .stdout( - predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa") + predicate::str::contains("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") .and(predicate::str::contains("\"encoding\": \"base64\"")), ); diff --git a/tests/int_bitbucket.rs b/tests/int_bitbucket.rs index fd05911..507f3d7 100644 --- a/tests/int_bitbucket.rs +++ b/tests/int_bitbucket.rs @@ -148,6 +148,7 @@ fn test_bitbucket_remote_scan() -> Result<()> { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index b6eaae9..d059e73 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -168,6 +168,7 @@ rules: no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { diff --git a/tests/int_github.rs b/tests/int_github.rs index f96dd16..91af8db 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -155,6 +155,7 @@ fn test_github_remote_scan() -> Result<()> { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; // Create global arguments let global_args = GlobalArgs { diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 71421d1..6651fa7 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -153,6 +153,7 @@ fn test_gitlab_remote_scan() -> Result<()> { skip_aws_account_file: None, no_base64: false, no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { @@ -304,6 +305,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { diff --git a/tests/int_quiet.rs b/tests/int_quiet.rs index 1968972..9b3c403 100644 --- a/tests/int_quiet.rs +++ b/tests/int_quiet.rs @@ -10,8 +10,7 @@ fn contains_bytes(haystack: &[u8], needle: &[u8]) -> bool { #[test] fn scan_quiet_suppresses_summary() { for format in FORMATS { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .env("NO_COLOR", "1") .args([ "scan", @@ -35,8 +34,7 @@ fn scan_quiet_suppresses_summary() { #[test] fn scan_quiet_with_rule_stats_prints_rule_stats() { for format in FORMATS { - Command::cargo_bin("kingfisher") - .unwrap() + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .env("NO_COLOR", "1") .args([ "scan", diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 86b9dd1..b9fb1f8 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -131,6 +131,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { diff --git a/tests/int_rules_no_validated_findings.rs b/tests/int_rules_no_validated_findings.rs index bd0f4f7..a8531c2 100644 --- a/tests/int_rules_no_validated_findings.rs +++ b/tests/int_rules_no_validated_findings.rs @@ -4,7 +4,7 @@ use serde_json::Value; #[test] fn scan_rules_has_no_validated_findings() -> Result<()> { - let output = Command::cargo_bin("kingfisher")? + let output = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "data/rules", "--format", "json", "--no-update-check", "--only-valid"]) .output()?; diff --git a/tests/int_slack.rs b/tests/int_slack.rs index d2d9048..c3cacc3 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -139,6 +139,7 @@ impl TestContext { skip_aws_account_file: None, no_base64: false, no_inline_ignore: false, + no_ignore_if_contains: false, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?; @@ -158,7 +159,7 @@ async fn test_scan_slack_messages() -> Result<()> { "messages": { "matches": [{ "permalink": "https://example.slack.com/archives/C123/p1234", - "text": "This contains a github token ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa", + "text": "This contains a github token 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", "ts": "1234.56", "channel": {"id": "C123", "name": "general"} }], @@ -278,6 +279,7 @@ async fn test_scan_slack_messages() -> Result<()> { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index d868936..970ad0a 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -211,6 +211,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; /* --------------------------------------------------------- * diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 9a17797..3e8bc4a 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -154,6 +154,7 @@ impl TestContext { no_base64: false, extra_ignore_comments: Vec::new(), no_inline_ignore: false, + no_ignore_if_contains: false, }; let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules) @@ -281,6 +282,7 @@ impl TestContext { skip_aws_account_file: None, no_base64: false, no_inline_ignore: false, + no_ignore_if_contains: false, }; let global_args = GlobalArgs { diff --git a/tests/smoke_archive.rs b/tests/smoke_archive.rs index c2afc7f..3e08a0a 100644 --- a/tests/smoke_archive.rs +++ b/tests/smoke_archive.rs @@ -7,7 +7,7 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> { let dir = tempfile::tempdir()?; let tar_gz = dir.path().join("payload.tar.gz"); - let github_pat = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa"; + let github_pat = "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"; // --- build a payload.tar.gz ------------------------------------------------- { @@ -29,7 +29,7 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> { let findings_code = 200; // ── 1) extraction ENABLED -- secret should be found ───────────────────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", tar_gz.to_str().unwrap(), @@ -43,7 +43,7 @@ fn smoke_scan_tar_gz_archive() -> anyhow::Result<()> { .stdout(predicates::str::contains(github_pat)); // ── 2) extraction DISABLED -- secret *not* found ──────────────────────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", tar_gz.to_str().unwrap(), diff --git a/tests/smoke_baseline.rs b/tests/smoke_baseline.rs index f69be7c..734682c 100644 --- a/tests/smoke_baseline.rs +++ b/tests/smoke_baseline.rs @@ -1,10 +1,44 @@ use std::fs; use assert_cmd::Command; +use clap::Parser; use predicates::prelude::*; use tempfile::tempdir; -const GH_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa"; +const GH_PAT: &str = "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"; + +#[test] +fn manage_baseline_enables_no_dedup() -> anyhow::Result<()> { + use kingfisher::cli::{ + commands::scan::ScanOperation, + global::{Command, CommandLineArgs}, + }; + + let dir = tempdir()?; + + let args = CommandLineArgs::try_parse_from([ + "kingfisher", + "scan", + dir.path().to_str().unwrap(), + "--manage-baseline", + "--no-update-check", + ])?; + + let command = match args.command { + Command::Scan(scan_args) => scan_args, + other => panic!("unexpected command parsed: {:?}", other), + }; + + let scan_args = match command.into_operation()? { + ScanOperation::Scan(scan_args) => scan_args, + op => panic!("expected scan operation, got {:?}", op), + }; + + assert!(scan_args.manage_baseline); + assert!(scan_args.no_dedup); + + Ok(()) +} #[test] fn baseline_create_and_filter() -> anyhow::Result<()> { @@ -14,7 +48,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { let baseline = dir.path().join("baseline.yaml"); // Create baseline with manage flag - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -39,7 +73,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { // Scanning with the baseline should suppress the existing finding and leave // the baseline untouched. - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -61,7 +95,7 @@ fn baseline_create_and_filter() -> anyhow::Result<()> { assert_eq!(initial_baseline, baseline_after_scan, "baseline remains stable after reuse"); // Managing the baseline again should not churn entries or report the secret - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -96,7 +130,7 @@ fn baseline_exclude_prunes_entries() -> anyhow::Result<()> { let baseline = dir.path().join("baseline.yaml"); // Initial baseline includes the .git secret - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -117,7 +151,7 @@ fn baseline_exclude_prunes_entries() -> anyhow::Result<()> { assert!(content.contains(".git/secret.txt")); // Rescan with exclusion, which should prune the .git entry - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), diff --git a/tests/smoke_branch.rs b/tests/smoke_branch.rs index 96b9d23..27f4d26 100644 --- a/tests/smoke_branch.rs +++ b/tests/smoke_branch.rs @@ -94,7 +94,7 @@ aws_secret_access_key = efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i )?; // ── scan the repository by commit hash ─────────────────────────────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", repo_dir.to_str().unwrap(), @@ -112,7 +112,7 @@ aws_secret_access_key = efnegoUp/WXc3XwlL77dXu1aKIICzvz+n+7Sz88i ); // ── scan only the diff between feature-1 and the merge base ───────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", repo_dir.to_str().unwrap(), @@ -196,7 +196,7 @@ fn scan_specific_commit_reports_only_that_commit() -> Result<()> { let c1_hex = commits[0].to_string(); // first commit (AWS only) // Scan exactly the initial commit via --branch - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", repo_dir.to_str().unwrap(), @@ -225,7 +225,7 @@ fn scan_with_branch_root_includes_descendants() -> Result<()> { let c1_hex = commits[0].to_string(); // start from first commit // Using --branch-root should include the selected commit and remaining history up to HEAD - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", repo_dir.to_str().unwrap(), @@ -256,7 +256,7 @@ fn scan_branch_tip_with_branch_root_commit() -> Result<()> { // Passing --branch-root-commit should implicitly enable inclusive scanning even // without the legacy --branch-root flag when targeting a named branch tip. - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", repo_dir.to_str().unwrap(), diff --git a/tests/smoke_check_rules.rs b/tests/smoke_check_rules.rs index 7dff499..341b64d 100644 --- a/tests/smoke_check_rules.rs +++ b/tests/smoke_check_rules.rs @@ -7,7 +7,7 @@ use predicates::prelude::*; #[test] fn check_rules() -> anyhow::Result<()> { // ── run kingfisher ──────────────────────────────────────────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "rules", "check", diff --git a/tests/smoke_docker.rs b/tests/smoke_docker.rs index 8392c4a..3bd4307 100644 --- a/tests/smoke_docker.rs +++ b/tests/smoke_docker.rs @@ -1,9 +1,8 @@ -use assert_cmd::prelude::*; use std::process::Command; #[test] fn smoke_scan_docker_image() -> anyhow::Result<()> { - let mut cmd = Command::cargo_bin("kingfisher")?; + let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")); let output = cmd .args([ "scan", diff --git a/tests/smoke_exclude.rs b/tests/smoke_exclude.rs index eb82ef8..048c85f 100644 --- a/tests/smoke_exclude.rs +++ b/tests/smoke_exclude.rs @@ -4,7 +4,7 @@ use assert_cmd::Command; use predicates::prelude::*; use tempfile::tempdir; -const SECRET: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa"; +const SECRET: &str = "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"; #[test] fn exclude_pattern_hides_matches() -> anyhow::Result<()> { @@ -14,7 +14,7 @@ fn exclude_pattern_hides_matches() -> anyhow::Result<()> { fs::write(&py, format!("token = \"{}\"\n", SECRET))?; fs::write(&txt, format!("token = \"{}\"\n", SECRET))?; - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), @@ -41,7 +41,7 @@ fn exclude_git_directory_hides_matches() -> anyhow::Result<()> { fs::write(git_dir.join("config"), format!("token = \"{}\"\n", SECRET))?; fs::write(dir.path().join("bar.txt"), format!("token = \"{}\"\n", SECRET))?; - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), diff --git a/tests/smoke_fs.rs b/tests/smoke_fs.rs index 1c1b35b..55c45fa 100644 --- a/tests/smoke_fs.rs +++ b/tests/smoke_fs.rs @@ -5,7 +5,7 @@ use assert_cmd::prelude::*; use predicates::prelude::*; use tempfile::tempdir; -const GITHUB_PAT: &str = "ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa"; +const GITHUB_PAT: &str = "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"; #[test] fn smoke_scan_filesystem_text_and_binary() -> anyhow::Result<()> { @@ -18,7 +18,7 @@ fn smoke_scan_filesystem_text_and_binary() -> anyhow::Result<()> { fs::write(&bin_path, [0x89, 0x50, 0x4E, 0x47])?; // tiny PNG header // ── run kingfisher ──────────────────────────────────────────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", dir.path().to_str().unwrap(), diff --git a/tests/smoke_git.rs b/tests/smoke_git.rs index 428ce24..b260bb9 100644 --- a/tests/smoke_git.rs +++ b/tests/smoke_git.rs @@ -15,7 +15,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> { // commit v1 let file_path = repo_dir.join("config.yml"); - fs::write(&file_path, b"ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa")?; + fs::write(&file_path, b"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")?; let mut idx = repo.index()?; idx.add_path(std::path::Path::new("config.yml"))?; let oid1 = idx.write_tree()?; @@ -23,7 +23,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> { repo.commit(Some("HEAD"), &sig, &sig, "init", &tree1, &[])?; // commit v2 (same leak, will test dedup) - fs::write(&file_path, b"ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa # unchanged")?; + fs::write(&file_path, b"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 # unchanged")?; idx.add_path(std::path::Path::new("config.yml"))?; let oid2 = idx.write_tree()?; let tree2 = repo.find_tree(oid2)?; @@ -31,7 +31,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> { repo.commit(Some("HEAD"), &sig, &sig, "update", &tree2, &[&head])?; // ── run kingfisher with git-history mode FULL ───────────────────── - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args([ "scan", repo_dir.to_str().unwrap(), @@ -44,7 +44,7 @@ fn smoke_scan_git_history() -> anyhow::Result<()> { ]) .assert() .code(200) // ← kingfisher’s β€œfindings present” status - .stdout(predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa")); + .stdout(predicate::str::contains("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")); dir.close()?; Ok(()) diff --git a/tests/smoke_github_homebrew.rs b/tests/smoke_github_homebrew.rs index e2eaa11..65b5527 100644 --- a/tests/smoke_github_homebrew.rs +++ b/tests/smoke_github_homebrew.rs @@ -3,7 +3,7 @@ use predicates::str::contains; #[test] fn scan_homebrew_github_no_findings() -> anyhow::Result<()> { - Command::cargo_bin("kingfisher")? + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) .args(["scan", "--git-url", "https://github.com/homebrew/.github", "--no-update-check"]) .assert() .success()