forked from mirrors/kingfisher
commit
6ee70be8be
38 changed files with 1810 additions and 98 deletions
|
|
@ -2,10 +2,18 @@
|
|||
|
||||
All notable changes to this project will be documented in this file.
|
||||
|
||||
## [1.47.0]
|
||||
- MongoDB validator now validates `mongodb+srv://` URIs with a fast timeout instead of skipping them
|
||||
- Improved rules: github oauth2, diffbot, mailchimp, aws
|
||||
- Added validation to SauceLabs rule
|
||||
- Added rules: shodan, bitly, flickr
|
||||
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64`
|
||||
|
||||
## [1.46.0]
|
||||
- Improved rules: AWS, pem
|
||||
- Added rule for Ollama, Weights and Biases, Cerebras, Friendli, Fireworks.ai, NVIDIA NIM, together.ai, zhipu
|
||||
- Added `self-update` command to update the binary independently. Now supports updating over homebrew managed binary
|
||||
- MongoDB validator now checks `mongodb+srv://` URIs with fast-fail timeouts
|
||||
|
||||
## [1.45.0]
|
||||
- Added `--repo-artifacts` flag to scan repository issues, gists/snippets, and wikis when cloning via `--git-url`
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ publish = false
|
|||
|
||||
[package]
|
||||
name = "kingfisher"
|
||||
version = "1.46.0"
|
||||
version = "1.47.0"
|
||||
description = "MongoDB's blazingly fast secret scanning and validation tool"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
|
|
|
|||
9
Makefile
9
Makefile
|
|
@ -183,14 +183,15 @@ ubuntu-arm64: setup-zig # ensures Zig & cargo-zigbuild exist
|
|||
|
||||
$(MAKE) list-archives
|
||||
|
||||
|
||||
darwin-arm64:
|
||||
@echo "Checking Rust for darwin-arm64..."
|
||||
@$(MAKE) check-rust || ( \
|
||||
echo "Rust not found or out-of-date. Installing via Homebrew..." && \
|
||||
brew install rust \
|
||||
)
|
||||
@brew install boost cmake gcc libpcap pkg-config ragel sqlite coreutils gnu-tar || true
|
||||
@brew list cmake >/dev/null 2>&1 || brew install cmake
|
||||
@brew list boost >/dev/null 2>&1 || brew install boost
|
||||
@brew install gcc libpcap pkg-config ragel sqlite coreutils gnu-tar
|
||||
@rustup target add aarch64-apple-darwin
|
||||
cargo build --release --target aarch64-apple-darwin --features system-alloc
|
||||
@cd target/aarch64-apple-darwin/release && \
|
||||
|
|
@ -212,7 +213,9 @@ darwin-x64:
|
|||
echo "Rust not found or out-of-date. Installing via Homebrew..." && \
|
||||
brew install rust \
|
||||
)
|
||||
@brew install boost cmake gcc libpcap pkg-config ragel sqlite coreutils gnu-tar || true
|
||||
@brew list cmake >/dev/null 2>&1 || brew install cmake
|
||||
@brew list boost >/dev/null 2>&1 || brew install boost
|
||||
@brew install gcc libpcap pkg-config ragel sqlite coreutils gnu-tar
|
||||
@rustup target add x86_64-apple-darwin
|
||||
source $$HOME/.cargo/env && cargo build --release --target x86_64-apple-darwin --features system-alloc
|
||||
@cd target/x86_64-apple-darwin/release && \
|
||||
|
|
|
|||
|
|
@ -23,7 +23,8 @@ Originally forked from Praetorian’s Nosey Parker, Kingfisher adds live cloud-A
|
|||
- **Slack messages**: query‑based scans with `--slack-query`
|
||||
- **AWS S3**: bucket scans via `--s3-bucket`/`--s3-prefix` with credentials from `KF_AWS_KEY`/`KF_AWS_SECRET`, `--role-arn`, `--aws-local-profile`, or anonymous
|
||||
- **Compressed Files**: Supports extracting and scanning compressed files for secrets
|
||||
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
|
||||
- Decode Base64 blobs and scan their contents for secrets while skipping short strings for performance. This has a small performance impact and can be disabled with `--no-base64`
|
||||
- **Baseline management**: generate and track baselines to suppress known secrets ([docs/BASELINE.md](/docs/BASELINE.md))
|
||||
|
||||
**Learn more:** [Introducing Kingfisher: Real‑Time Secret Detection and Validation](https://www.mongodb.com/blog/post/product-release-announcements/introducing-kingfisher-real-time-secret-detection-validation)
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ rules:
|
|||
(?:
|
||||
\b
|
||||
(?:AWS|AMAZON|AMZN|A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:.|[\n\r]){0,64}?
|
||||
\b
|
||||
(
|
||||
[A-Z0-9/+=]{40}
|
||||
|
|
@ -34,7 +34,7 @@ rules:
|
|||
(?:SECRET|PRIVATE|ACCESS)
|
||||
(?:.|[\n\r]){0,16}?
|
||||
(?:KEY|TOKEN)
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:.|[\n\r]){0,64}?
|
||||
\b
|
||||
(
|
||||
[A-Z0-9/+=]{40}
|
||||
|
|
|
|||
36
data/rules/bitly.yml
Normal file
36
data/rules/bitly.yml
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
rules:
|
||||
- name: Bitly Access Token
|
||||
id: kingfisher.bitly.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
bitly
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
(
|
||||
[a-f0-9]{40}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
min_entropy: 3.0
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: "https://api-ssl.bitly.com/v4/user"
|
||||
headers:
|
||||
Authorization: "Bearer {{ TOKEN }}"
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"login":'
|
||||
references:
|
||||
- https://dev.bitly.com/api-reference#Authentication
|
||||
examples:
|
||||
- "bitly_token = 20e9817b9c5ddde1b0cec7622bfc557dbc823791"
|
||||
|
|
@ -27,9 +27,8 @@ rules:
|
|||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: JsonValid
|
||||
- type: WordMatch
|
||||
match_all_words: true
|
||||
words:
|
||||
- '"name"'
|
||||
- '"email"'
|
||||
- '"email"'
|
||||
- '"planCredits"'
|
||||
48
data/rules/docker.yml
Normal file
48
data/rules/docker.yml
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
rules:
|
||||
- name: Docker Registry Credentials (auths JSON)
|
||||
id: kingfisher.docker.1
|
||||
pattern: |
|
||||
(?xis)
|
||||
"auths"\s*:\s*\{
|
||||
[^}]*?
|
||||
" (?P<REG> (?:https?:\/\/)? [a-z0-9.\-:+/]+ ) "\s*:\s*\{
|
||||
[^}]*?
|
||||
"auth"\s*:\s*"(?P<B64> [A-Za-z0-9+/=]{16,} )"
|
||||
[^}]*?
|
||||
\}
|
||||
[^}]*?
|
||||
\}
|
||||
min_entropy: 2.0
|
||||
confidence: medium
|
||||
examples:
|
||||
- |
|
||||
{
|
||||
"auths": {
|
||||
"quay.io": {
|
||||
"auth": "cmhkaCtyaHRhcDowM1BERl1RQTJQTDlaQUE5T1gzSU9IQjFYTUlXOVNGNU1XRzNSRVRHNThKVXpKMzEwV0ZZRVMOQTdGMExMNOYx"
|
||||
}
|
||||
}
|
||||
}
|
||||
- |
|
||||
{"auths":{"index.docker.io/v1/":{"auth":"dXNlcjp0b2tlbg=="}}}
|
||||
references:
|
||||
- https://distribution.github.io/distribution/spec/api/
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: >
|
||||
{%- assign r = REG -%}
|
||||
{%- if r contains "://" -%}
|
||||
{{ r | replace: "/$", "" }}/v2/auth
|
||||
{%- else -%}
|
||||
https://{{ r }}/v2/auth
|
||||
{%- endif -%}
|
||||
headers:
|
||||
Authorization: "Basic {{ B64 }}"
|
||||
Accept: application/json
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
72
data/rules/flickr.yml
Normal file
72
data/rules/flickr.yml
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
rules:
|
||||
- name: Flickr API Key
|
||||
id: kingfisher.flickr.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
flickr
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)?
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
(
|
||||
[a-f0-9]{32}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
min_entropy: 3.0
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: "https://www.flickr.com/services/rest/?method=flickr.test.login&api_key={{TOKEN}}&format=json&nojsoncallback=1"
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"Invalid API Key'
|
||||
negative: true
|
||||
references:
|
||||
- https://www.flickr.com/services/api/
|
||||
- https://www.flickr.com/services/api/flickr.test.login.html
|
||||
examples:
|
||||
- "flickr_api_key: d1953fb62a9798593bfdb4287ed2423e"
|
||||
- name: Flickr OAuth Token
|
||||
id: kingfisher.flickr.2
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
flickr
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:OAUTH|ACCESS|TOKEN)?
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
(
|
||||
[a-f0-9]{32}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
min_entropy: 3.0
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: "https://www.flickr.com/services/rest/?method=flickr.auth.oauth.checkToken&api_key={{TOKEN}}&oauth_token={{TOKEN}}&format=json&nojsoncallback=1"
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"stat":"ok"'
|
||||
- '"oauth":'
|
||||
match_all_words: true
|
||||
references:
|
||||
- https://www.flickr.com/services/api/
|
||||
- https://www.flickr.com/services/api/flickr.auth.oauth.checkToken.html
|
||||
examples:
|
||||
- "flickr_oauth_token: a8c1b9f1d9d34aa5a1edbd43234bcdef"
|
||||
|
|
@ -192,4 +192,22 @@ rules:
|
|||
password = 'abuser123456' # some other comment
|
||||
- |
|
||||
user = 'Aladdin'
|
||||
password = 'open sesame'
|
||||
password = 'open sesame'
|
||||
- name: Docker Robot Credentials (plaintext pair)
|
||||
id: kingfisher.generic.9
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
(
|
||||
(?P<USER> [a-z0-9._-]+ \+ [a-z0-9._-]+ )
|
||||
:
|
||||
(?P<PASS> [A-Z0-9]{32,80} )
|
||||
)
|
||||
\b
|
||||
min_entropy: 2.0
|
||||
confidence: low
|
||||
examples:
|
||||
- some+thing:02PDFMQN2PL2ZAB9OX3IOHC1XMIW1SE5NWG3RETG58JUZJ310WFYESRA7F0LM461
|
||||
- org+builder:1C2F9D0BB1E67E9F6B3B5B9A2A3D4E5F6A7B8C9D0E1F2A3B4C5D6E7F8A9B0C1
|
||||
references:
|
||||
- https://docs.quay.io/use_quay.html#robot-accounts
|
||||
|
|
@ -166,6 +166,7 @@ rules:
|
|||
(?: id | identifier | key )
|
||||
.{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2}
|
||||
\b ([a-z0-9]{20}) \b
|
||||
visible: false
|
||||
examples:
|
||||
- |
|
||||
GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7
|
||||
|
|
@ -181,6 +182,26 @@ rules:
|
|||
(?: key | oauth | sec | secret )?
|
||||
.{0,2} \s{0,20} .{0,2} \s{0,20} .{0,2}
|
||||
\b ([a-z0-9]{40}) \b
|
||||
depends_on_rule:
|
||||
- rule_id: "kingfisher.github.5"
|
||||
variable: GITHUB_CLIENT_ID
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: POST
|
||||
url: "https://github.com/login/oauth/access_token"
|
||||
headers:
|
||||
Accept: "application/json"
|
||||
Content-Type: "application/json"
|
||||
body: '{"client_id":"{{GITHUB_CLIENT_ID}}","client_secret":"{{TOKEN}}","code":"invalid_code"}'
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"error":"bad_verification_code"'
|
||||
examples:
|
||||
- |
|
||||
GITHUB_CLIENT_ID=ac58d6da7d7a84c039b7
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@ rules:
|
|||
id: kingfisher.mailchimp.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
mailchimp
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:.|[\n\r]){0,128}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
|
|
|
|||
|
|
@ -1,23 +1,82 @@
|
|||
rules:
|
||||
- name: Sauce Token
|
||||
id: kingfisher.sauce.1
|
||||
|
||||
- name: Sauce Labs Username
|
||||
id: kingfisher.saucelabs.1
|
||||
pattern: |
|
||||
(?x)(?i)
|
||||
sauce .{0,50}
|
||||
(?xi)
|
||||
\b
|
||||
([a-f0-9-]{36})
|
||||
(?: [^a-f0-9-] | $ )
|
||||
sauce
|
||||
(?:.|[\n\r]){0,16}?
|
||||
(?:USER|ID|NAME|CLIENT|OAUTH)
|
||||
(?:.|[\n\r]){0,16}?
|
||||
\b
|
||||
(
|
||||
[A-Z0-9_.-]{2,70}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
visible: false
|
||||
min_entropy: 1.0
|
||||
examples:
|
||||
- "SAUCE_USERNAME=oauth-someusername-487ea"
|
||||
- SAUCE_USERNAME="oauth-ci-bot-487ea"
|
||||
- '"sauce_username":"build-user"'
|
||||
- 'saucelabs user oauth-release-bot'
|
||||
- name: Sauce Labs API Endpoint
|
||||
id: kingfisher.saucelabs.2
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
(
|
||||
(?:api|ondemand)\.(?:us|eu)-(?:west|east|central)-[0-9]\.saucelabs\.com
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
visible: false
|
||||
min_entropy: 2.0
|
||||
examples:
|
||||
- "api.us-west-1.saucelabs.com"
|
||||
- "api.eu-central-1.saucelabs.com"
|
||||
- "ondemand.eu-central-1.saucelabs.com"
|
||||
|
||||
- name: Sauce Labs Access Key
|
||||
id: kingfisher.saucelabs.3
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
sauce
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
(
|
||||
[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
min_entropy: 3.0
|
||||
|
||||
examples:
|
||||
- |
|
||||
- SAUCE_USERNAME=vitess
|
||||
- SAUCE_ACCESS_KEY=2397f603-c2c4-4897-a8ca-587ace5dc8dd
|
||||
- SAUCE_ACCESS_KEY=2397f603-c2c4-4897-a8ca-587ace5dc8d-
|
||||
|
||||
depends_on_rule:
|
||||
- rule_id: "kingfisher.saucelabs.1"
|
||||
variable: SAUCE_USERNAME
|
||||
- rule_id: "kingfisher.saucelabs.2"
|
||||
variable: SAUCE_URL
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: "https://{{ SAUCE_URL | default: 'api.us-west-1.saucelabs.com' | replace: 'ondemand.', 'api.' }}/rest/v1/users/{{SAUCE_USERNAME}}"
|
||||
headers:
|
||||
Authorization: "Basic {{ SAUCE_USERNAME | append: ':' | append: TOKEN | b64enc }}"
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"username":'
|
||||
references:
|
||||
- https://docs.saucelabs.com/dev/api/
|
||||
- https://docs.saucelabs.com/dev/api/#authentication
|
||||
examples:
|
||||
- "SAUCE_ACCESS_KEY=1736468d-b178-39cd-bfde-30fabdc371e4"
|
||||
|
||||
|
|
|
|||
34
data/rules/shodan.yml
Normal file
34
data/rules/shodan.yml
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
rules:
|
||||
- name: SHODAN API Key
|
||||
id: kingfisher.shodan.1
|
||||
pattern: |
|
||||
(?xi)
|
||||
\b
|
||||
shodan
|
||||
(?:.|[\n\r]){0,32}?
|
||||
(?:SECRET|PRIVATE|ACCESS|KEY|TOKEN)
|
||||
(?:.|[\n\r]){0,32}?
|
||||
\b
|
||||
(
|
||||
[A-Z0-9]{32}
|
||||
)
|
||||
\b
|
||||
confidence: medium
|
||||
min_entropy: 4.0
|
||||
validation:
|
||||
type: Http
|
||||
content:
|
||||
request:
|
||||
method: GET
|
||||
url: "https://api.shodan.io/api-info?key={{TOKEN}}"
|
||||
response_matcher:
|
||||
- report_response: true
|
||||
- type: StatusMatch
|
||||
status: [200]
|
||||
- type: WordMatch
|
||||
words:
|
||||
- '"scan_credits"'
|
||||
references:
|
||||
- https://developer.shodan.io/api
|
||||
examples:
|
||||
- "shodan_api_key = dqlblS2CmTOc5zYn4nZkJljYsXRnNuiq"
|
||||
|
|
@ -92,6 +92,10 @@ pub struct ScanArgs {
|
|||
#[arg(long, short = 'r', default_value_t = false)]
|
||||
pub redact: bool,
|
||||
|
||||
/// Skip decoding Base64 blobs before scanning
|
||||
#[arg(long, default_value_t = false)]
|
||||
pub no_base64: bool,
|
||||
|
||||
/// Timeout for Git repository scanning in seconds
|
||||
#[arg(long, default_value_t = 1800, value_name = "SECONDS")]
|
||||
pub git_repo_timeout: u64,
|
||||
|
|
|
|||
|
|
@ -255,7 +255,9 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
|
|||
}
|
||||
},
|
||||
},
|
||||
Command::SelfUpdate => unreachable!(),
|
||||
Command::SelfUpdate => {
|
||||
anyhow::bail!("SelfUpdate command should not reach this branch")
|
||||
}
|
||||
}
|
||||
if let Some(msg) = update_msg {
|
||||
info!("{msg}");
|
||||
|
|
@ -335,6 +337,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
output_args: OutputArgs { output: None, format: ReportOutputFormat::Pretty },
|
||||
no_base64: false,
|
||||
}
|
||||
}
|
||||
/// Run the rules check command
|
||||
|
|
|
|||
118
src/matcher.rs
118
src/matcher.rs
|
|
@ -65,6 +65,7 @@ pub struct OwnedBlobMatch {
|
|||
pub validation_response_status: StatusCode,
|
||||
pub validation_success: bool,
|
||||
pub calculated_entropy: f32,
|
||||
pub is_base64: bool,
|
||||
}
|
||||
impl<'a> Matcher<'a> {
|
||||
pub fn get_profiling_report(&self) -> Option<Vec<RuleStats>> {
|
||||
|
|
@ -85,6 +86,7 @@ impl OwnedBlobMatch {
|
|||
.unwrap_or(StatusCode::CONTINUE),
|
||||
validation_success: m.validation_success,
|
||||
calculated_entropy: m.calculated_entropy,
|
||||
is_base64: m.is_base64,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +110,7 @@ impl OwnedBlobMatch {
|
|||
validation_success: blob_match.validation_success,
|
||||
calculated_entropy: blob_match.calculated_entropy,
|
||||
finding_fingerprint: 0, //default
|
||||
is_base64: blob_match.is_base64,
|
||||
};
|
||||
|
||||
// Convert matching_finding to a &str (using lossy conversion if needed)
|
||||
|
|
@ -154,6 +157,7 @@ pub struct BlobMatch<'a> {
|
|||
|
||||
pub validation_success: bool,
|
||||
pub calculated_entropy: f32,
|
||||
pub is_base64: bool,
|
||||
}
|
||||
#[derive(Clone)]
|
||||
struct UserData {
|
||||
|
|
@ -273,6 +277,7 @@ impl<'a> Matcher<'a> {
|
|||
lang: Option<String>,
|
||||
redact: bool,
|
||||
no_dedup: bool,
|
||||
no_base64: bool,
|
||||
) -> Result<ScanResult<'b>>
|
||||
where
|
||||
'a: 'b,
|
||||
|
|
@ -305,8 +310,12 @@ impl<'a> Matcher<'a> {
|
|||
// Perform the scan
|
||||
self.scan_bytes_raw(&blob.bytes(), &filename)?;
|
||||
|
||||
// Early exit if no matches found
|
||||
if self.user_data.raw_matches_scratch.is_empty() {
|
||||
// Opportunistically look for standalone Base64 blobs. If neither
|
||||
// the raw scan nor this check yields anything, we can return early
|
||||
// before doing any heavier work.
|
||||
let mut b64_items = if no_base64 { Vec::new() } else { get_base64_strings(blob.bytes()) };
|
||||
|
||||
if self.user_data.raw_matches_scratch.is_empty() && b64_items.is_empty() {
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
if !no_dedup {
|
||||
return Ok(match self.seen_blobs.insert(blob.id, false) {
|
||||
|
|
@ -322,18 +331,22 @@ impl<'a> Matcher<'a> {
|
|||
let rules_db = self.rules_db;
|
||||
let mut seen_matches = FxHashSet::default();
|
||||
let mut previous_matches = Vec::new();
|
||||
let tree_sitter_result = lang.and_then(|lang_str| {
|
||||
get_language_and_queries(&lang_str).and_then(|(language, queries)| {
|
||||
let checker = Checker { language, rules: queries };
|
||||
match checker.check(&blob.bytes()) {
|
||||
Ok(results) => Some(results),
|
||||
Err(e) => {
|
||||
println!("Error in checker.check: {}", e);
|
||||
None
|
||||
let tree_sitter_result = if self.user_data.raw_matches_scratch.is_empty() {
|
||||
None
|
||||
} else {
|
||||
lang.and_then(|lang_str| {
|
||||
get_language_and_queries(&lang_str).and_then(|(language, queries)| {
|
||||
let checker = Checker { language, rules: queries };
|
||||
match checker.check(&blob.bytes()) {
|
||||
Ok(results) => Some(results),
|
||||
Err(e) => {
|
||||
println!("Error in checker.check: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
});
|
||||
};
|
||||
// Process matches
|
||||
let mut matches = Vec::new();
|
||||
let owned_ts_results = tree_sitter_result.map(|ts_results| {
|
||||
|
|
@ -383,6 +396,7 @@ impl<'a> Matcher<'a> {
|
|||
&mut seen_matches,
|
||||
origin,
|
||||
None,
|
||||
false,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
|
|
@ -406,6 +420,7 @@ impl<'a> Matcher<'a> {
|
|||
&mut seen_matches,
|
||||
origin,
|
||||
Some(ts_match.clone()),
|
||||
*is_base64_decoded,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
|
|
@ -414,6 +429,48 @@ impl<'a> Matcher<'a> {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !no_base64 {
|
||||
// If the blob contains standalone Base64 blobs, decode and scan them as well
|
||||
const MAX_B64_DEPTH: usize = 2; // decode at most two levels deep
|
||||
let mut b64_stack: Vec<(DecodedData, usize)> =
|
||||
b64_items.drain(..).map(|d| (d, 0)).collect();
|
||||
while let Some((item, depth)) = b64_stack.pop() {
|
||||
for (rule_id_usize, rule) in rules_db.rules.iter().enumerate() {
|
||||
let re = &rules_db.anchored_regexes[rule_id_usize];
|
||||
filter_match(
|
||||
blob,
|
||||
rule.clone(),
|
||||
re,
|
||||
item.pos_start,
|
||||
item.pos_end,
|
||||
&mut matches,
|
||||
&mut previous_matches,
|
||||
rule_id_usize,
|
||||
&mut seen_matches,
|
||||
origin,
|
||||
Some(item.decoded.clone()),
|
||||
true,
|
||||
redact,
|
||||
&filename,
|
||||
self.profiler.as_ref(),
|
||||
);
|
||||
}
|
||||
if depth + 1 < MAX_B64_DEPTH {
|
||||
for nested in get_base64_strings(item.decoded.as_bytes()) {
|
||||
b64_stack.push((
|
||||
DecodedData {
|
||||
original: nested.original,
|
||||
decoded: nested.decoded,
|
||||
pos_start: item.pos_start,
|
||||
pos_end: item.pos_end,
|
||||
},
|
||||
depth + 1,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Finalize
|
||||
// Only record in seen_blobs if deduplication is enabled
|
||||
if !no_dedup {
|
||||
|
|
@ -457,6 +514,7 @@ fn filter_match<'b>(
|
|||
seen_matches: &mut FxHashSet<u64>,
|
||||
_origin: &OriginSet,
|
||||
ts_match: Option<String>,
|
||||
is_base64: bool,
|
||||
redact: bool,
|
||||
filename: &str,
|
||||
profiler: Option<&Arc<ConcurrentRuleProfiler>>,
|
||||
|
|
@ -521,6 +579,7 @@ fn filter_match<'b>(
|
|||
validation_response_status: StatusCode::from_u16(0).unwrap_or(StatusCode::CONTINUE),
|
||||
validation_success: false,
|
||||
calculated_entropy,
|
||||
is_base64,
|
||||
});
|
||||
previous_matches.push((rule_id, matching_input_offset_span));
|
||||
}
|
||||
|
|
@ -729,6 +788,8 @@ pub struct Match {
|
|||
pub calculated_entropy: f32,
|
||||
|
||||
pub visible: bool,
|
||||
#[serde(default)]
|
||||
pub is_base64: bool,
|
||||
}
|
||||
impl Match {
|
||||
#[inline]
|
||||
|
|
@ -780,6 +841,7 @@ impl Match {
|
|||
validation_response_status: owned_blob_match.validation_response_status.as_u16(),
|
||||
validation_success: owned_blob_match.validation_success,
|
||||
calculated_entropy: owned_blob_match.calculated_entropy,
|
||||
is_base64: owned_blob_match.is_base64,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -832,33 +894,26 @@ pub struct DecodedData {
|
|||
}
|
||||
pub fn get_base64_strings(input: &[u8]) -> Vec<DecodedData> {
|
||||
lazy_static! {
|
||||
static ref RE_BASE64: Regex =
|
||||
Regex::new(r"(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?").unwrap();
|
||||
// Require a reasonably long run of valid Base64 characters to reduce
|
||||
// noise. 32 bytes corresponds to 24 decoded bytes.
|
||||
static ref RE_BASE64: Regex = Regex::new(r"[A-Za-z0-9+/]{32,}={0,2}").unwrap();
|
||||
}
|
||||
let mut results = Vec::new();
|
||||
for capture in RE_BASE64.captures_iter(input) {
|
||||
let base64_match = capture.get(0).unwrap();
|
||||
|
||||
if base64_match.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let start = base64_match.start();
|
||||
let end = base64_match.end();
|
||||
let base64_string = &input[start..end];
|
||||
// Check if the length is a multiple of 4
|
||||
for m in RE_BASE64.find_iter(input) {
|
||||
let base64_string = m.as_bytes();
|
||||
// Skip candidates whose length isn't a multiple of four – they cannot
|
||||
// be valid Base64.
|
||||
if base64_string.len() % 4 != 0 {
|
||||
continue;
|
||||
}
|
||||
if let Ok(decoded) = general_purpose::STANDARD.decode(base64_string) {
|
||||
// Check if the decoded string is valid UTF-8
|
||||
if let Ok(decoded_str) = std::str::from_utf8(&decoded) {
|
||||
if decoded_str.is_ascii() {
|
||||
results.push(DecodedData {
|
||||
original: String::from_utf8_lossy(base64_string).into_owned(),
|
||||
decoded: decoded_str.to_string(),
|
||||
pos_start: start,
|
||||
pos_end: end,
|
||||
pos_start: m.start(),
|
||||
pos_end: m.end(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -1026,12 +1081,13 @@ mod test {
|
|||
/// and report correct byte-offsets.
|
||||
#[test]
|
||||
fn test_get_base64_strings_basic() {
|
||||
let raw = b"foo SGVsbG8gV29ybGQ= bar"; // "Hello World"
|
||||
let raw = b"foo MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY= bar";
|
||||
// decodes to "0123456789abcdef0123456789abcdef"
|
||||
let hits = get_base64_strings(raw);
|
||||
assert_eq!(hits.len(), 1);
|
||||
let item = &hits[0];
|
||||
assert_eq!(item.decoded, "Hello World");
|
||||
assert_eq!(item.original, "SGVsbG8gV29ybGQ=");
|
||||
assert_eq!(item.decoded, "0123456789abcdef0123456789abcdef");
|
||||
assert_eq!(item.original, "MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY=");
|
||||
// "foo␠" is 4 bytes, so the start offset is 4
|
||||
assert_eq!((item.pos_start, item.pos_end), (4, 4 + item.original.len()));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -384,6 +384,7 @@ impl DetailsReporter {
|
|||
column_start: source_span.start.column as u32,
|
||||
column_end: source_span.end.column as u32,
|
||||
path: file_path,
|
||||
encoding: if rm.m.is_base64 { Some("base64".to_string()) } else { None },
|
||||
git_metadata: git_metadata_val,
|
||||
},
|
||||
}
|
||||
|
|
@ -521,6 +522,8 @@ pub struct FindingRecordData {
|
|||
pub column_end: u32,
|
||||
pub path: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub encoding: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub git_metadata: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ mod tests {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -165,6 +166,7 @@ mod tests {
|
|||
validation_success,
|
||||
calculated_entropy: 4.5,
|
||||
visible: true,
|
||||
is_base64: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -100,6 +100,9 @@ impl<'a> Display for PrettyFindingRecord<'a> {
|
|||
};
|
||||
let finding = &record.finding;
|
||||
writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?;
|
||||
if let Some(enc) = &finding.encoding {
|
||||
writeln!(f, " |Encoding.....: {}", enc)?;
|
||||
}
|
||||
writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?;
|
||||
writeln!(f, " |Confidence....: {}", finding.confidence)?;
|
||||
writeln!(f, " |Entropy.......: {}", finding.entropy)?;
|
||||
|
|
|
|||
|
|
@ -181,7 +181,7 @@ pub fn enumerate_filesystem_inputs(
|
|||
return Ok(());
|
||||
}
|
||||
progress.inc(blob.len().try_into().unwrap());
|
||||
match processor.run(origin, blob, args.no_dedup, args.redact) {
|
||||
match processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64) {
|
||||
Ok(None) => {
|
||||
// nothing to record
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,12 @@ impl<'a> BlobProcessor<'a> {
|
|||
blob: Blob,
|
||||
no_dedup: bool,
|
||||
redact: bool,
|
||||
no_base64: bool,
|
||||
) -> Result<Option<DatastoreMessage>> {
|
||||
let blob_id = blob.id.hex();
|
||||
let _span = debug_span!("matcher", blob_id).entered();
|
||||
let t1 = Instant::now();
|
||||
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup)?;
|
||||
let res = self.matcher.scan_blob(&blob, &origin, None, redact, no_dedup, no_base64)?;
|
||||
let scan_us = t1.elapsed().as_micros();
|
||||
match res {
|
||||
// blob already seen, but with no matches; nothing to do!
|
||||
|
|
|
|||
|
|
@ -427,7 +427,7 @@ pub async fn fetch_s3_objects(
|
|||
let blob = crate::blob::Blob::from_bytes(bytes);
|
||||
|
||||
if let Some((origin, blob_md, scored_matches)) =
|
||||
processor.run(origin, blob, args.no_dedup, args.redact)?
|
||||
processor.run(origin, blob, args.no_dedup, args.redact, args.no_base64)?
|
||||
{
|
||||
// Wrap origin & metadata once:
|
||||
let origin_arc = Arc::new(origin);
|
||||
|
|
|
|||
|
|
@ -553,17 +553,24 @@ async fn timed_validate_single_match<'a>(
|
|||
return;
|
||||
}
|
||||
|
||||
let cache_key = mongodb::generate_mongodb_cache_key(&uri);
|
||||
if let Some(cached) = cache.get(&cache_key) {
|
||||
let c = cached.value();
|
||||
if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
|
||||
m.validation_success = c.is_valid;
|
||||
m.validation_response_body = c.body.clone();
|
||||
m.validation_response_status = c.status;
|
||||
commit_and_return(m);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
match mongodb::validate_mongodb(&uri).await {
|
||||
Ok((ok, msg)) => {
|
||||
m.validation_success = ok;
|
||||
m.validation_response_body = msg;
|
||||
m.validation_response_status = if uri.starts_with("mongodb+srv://") {
|
||||
StatusCode::CONTINUE
|
||||
} else if ok {
|
||||
StatusCode::OK
|
||||
} else {
|
||||
StatusCode::UNAUTHORIZED
|
||||
};
|
||||
m.validation_response_status =
|
||||
if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED };
|
||||
}
|
||||
Err(e) => {
|
||||
m.validation_success = false;
|
||||
|
|
@ -1021,6 +1028,7 @@ rules:
|
|||
validation_response_status: StatusCode::OK,
|
||||
validation_success: false,
|
||||
calculated_entropy: 0.0, // or compute your own
|
||||
is_base64: false,
|
||||
};
|
||||
let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?;
|
||||
let client = reqwest::Client::new();
|
||||
|
|
|
|||
1052
src/validation.rs.orig
Normal file
1052
src/validation.rs.orig
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -1,9 +1,10 @@
|
|||
// src/validation/mongodb.rs
|
||||
use std::time::Duration;
|
||||
use std::{net::IpAddr, time::Duration};
|
||||
|
||||
use anyhow::Result;
|
||||
use bson::doc;
|
||||
use mongodb::{options::ClientOptions, Client};
|
||||
use mongodb::{error::ErrorKind, options::ClientOptions, Client};
|
||||
use tokio::time::timeout;
|
||||
|
||||
pub fn looks_like_mongodb_uri(uri: &str) -> bool {
|
||||
// quick scheme check first
|
||||
|
|
@ -14,10 +15,87 @@ pub fn looks_like_mongodb_uri(uri: &str) -> bool {
|
|||
mongodb::options::ConnectionString::parse(uri).is_ok()
|
||||
}
|
||||
|
||||
/// Return true if the URI targets localhost/loopback or a unix domain socket.
|
||||
/// This is a *string-only* check—no DNS or driver IO.
|
||||
fn uri_targets_localhost(uri: &str) -> bool {
|
||||
// strip scheme
|
||||
let rest = uri
|
||||
.strip_prefix("mongodb://")
|
||||
.or_else(|| uri.strip_prefix("mongodb+srv://"))
|
||||
.unwrap_or(uri);
|
||||
|
||||
// authority ends at first '/' (before db/path); if missing, take whole rest
|
||||
let authority = rest.split_once('/').map(|(a, _)| a).unwrap_or(rest);
|
||||
|
||||
// unix domain socket forms (percent-encoded "/path/to.sock")
|
||||
let auth_lower = authority.to_ascii_lowercase();
|
||||
if auth_lower.starts_with("%2f") || authority.starts_with('/') {
|
||||
return true; // UDS → treat as local
|
||||
}
|
||||
|
||||
// drop userinfo if present
|
||||
let hostlist = authority.rsplit_once('@').map(|(_, h)| h).unwrap_or(authority);
|
||||
|
||||
// iterate seed list (mongodb://hostA,hostB,...)
|
||||
for part in hostlist.split(',') {
|
||||
let mut host = part.trim();
|
||||
|
||||
// strip brackets for IPv6 literals
|
||||
if host.starts_with('[') && host.ends_with(']') && host.len() >= 2 {
|
||||
host = &host[1..host.len() - 1];
|
||||
}
|
||||
|
||||
// strip :port if present (only when suffix is all digits)
|
||||
if let Some(idx) = host.rfind(':') {
|
||||
if host[idx + 1..].chars().all(|c| c.is_ascii_digit()) {
|
||||
host = &host[..idx];
|
||||
}
|
||||
}
|
||||
|
||||
if is_local_host(host) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true for localhost/loopback/unspecified IPs and common localhost aliases.
|
||||
fn is_local_host(h: &str) -> bool {
|
||||
let s = h.trim().trim_end_matches('.');
|
||||
let s_lower = s.to_ascii_lowercase();
|
||||
|
||||
// common aliases seen in hosts files across distros
|
||||
if matches!(
|
||||
s_lower.as_str(),
|
||||
"localhost"
|
||||
| "localhost.localdomain"
|
||||
| "localhost6"
|
||||
| "localhost6.localdomain6"
|
||||
| "ip6-localhost"
|
||||
| "ip6-loopback"
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// explicit unspecified forms
|
||||
if s_lower.as_str() == "0.0.0.0" || s_lower.as_str() == "::" {
|
||||
return true;
|
||||
}
|
||||
|
||||
// literal IPs
|
||||
if let Ok(ip) = s.parse::<IpAddr>() {
|
||||
return ip.is_loopback() || ip.is_unspecified();
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs
|
||||
const FAST_SELECT_MS: u64 = 300;
|
||||
const SRV_CONNECT_MS: u64 = 15_000; // gives Atlas a fighting chance
|
||||
const SRV_SELECT_MS: u64 = 15_000;
|
||||
const SRV_PARSE_MS: u64 = 2_000; // limit DNS resolution time
|
||||
const SRV_CONNECT_MS: u64 = 2500;
|
||||
const SRV_SELECT_MS: u64 = 2500;
|
||||
|
||||
/// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the
|
||||
/// boolean indicates success and the string provides a status message.
|
||||
|
|
@ -27,25 +105,32 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> {
|
|||
return Ok((false, "Invalid MongoDB URI".to_string()));
|
||||
}
|
||||
|
||||
let is_srv = uri.starts_with("mongodb+srv://");
|
||||
|
||||
if is_srv {
|
||||
// Skip SRV URIs to avoid slow DNS lookups and topology discovery.
|
||||
return Ok((
|
||||
false,
|
||||
"Validation skipped for mongodb+srv:// URI (performance reasons)".to_string(),
|
||||
));
|
||||
// ---- refuse localhost/loopback/UDS outright
|
||||
if uri_targets_localhost(uri) {
|
||||
return Ok((false, "Refusing to validate localhost/loopback MongoDB URIs.".to_string()));
|
||||
}
|
||||
|
||||
// ---- build client opts
|
||||
let mut opts = ClientOptions::parse(uri).await?;
|
||||
let is_srv = uri.starts_with("mongodb+srv://");
|
||||
|
||||
// ---- build client opts (guarded so we don't hit DNS/driver first)
|
||||
let mut opts = if is_srv {
|
||||
match timeout(Duration::from_millis(SRV_PARSE_MS), ClientOptions::parse(uri)).await {
|
||||
Ok(res) => res?,
|
||||
Err(_) => {
|
||||
return Ok((false, "MongoDB connection failed: timeout exceeded".to_string()));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ClientOptions::parse(uri).await?
|
||||
};
|
||||
|
||||
if !is_srv {
|
||||
// one socket, skip cluster discovery for plain 'mongodb://'
|
||||
opts.direct_connection = Some(true);
|
||||
opts.connect_timeout = Some(Duration::from_millis(FAST_CONNECT_MS));
|
||||
opts.server_selection_timeout = Some(Duration::from_millis(FAST_SELECT_MS));
|
||||
} else {
|
||||
// SRV needs DNS and replica-set discovery; give it a couple seconds
|
||||
// SRV needs DNS and replica-set discovery; fail fast
|
||||
opts.connect_timeout = Some(Duration::from_millis(SRV_CONNECT_MS));
|
||||
opts.server_selection_timeout = Some(Duration::from_millis(SRV_SELECT_MS));
|
||||
// leave direct_connection = None (driver decides)
|
||||
|
|
@ -55,18 +140,25 @@ pub async fn validate_mongodb(uri: &str) -> Result<(bool, String)> {
|
|||
|
||||
// ---- dial and ping
|
||||
let client = Client::with_options(opts)?;
|
||||
let ok = client.database("admin").run_command(doc! { "ping": 1 }).await.is_ok();
|
||||
let msg = if ok {
|
||||
"MongoDB connection is valid.".to_string()
|
||||
} else {
|
||||
"MongoDB connection failed.".to_string()
|
||||
};
|
||||
Ok((ok, msg))
|
||||
let res = client.database("admin").run_command(doc! { "ping": 1 }).await;
|
||||
match res {
|
||||
Ok(_) => Ok((true, "MongoDB connection is valid.".to_string())),
|
||||
Err(e) => {
|
||||
let msg = match *e.kind {
|
||||
ErrorKind::ServerSelection { .. } => {
|
||||
"MongoDB connection failed: timeout exceeded".to_string()
|
||||
}
|
||||
_ => "MongoDB connection failed.".to_string(),
|
||||
};
|
||||
Ok((false, msg))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String {
|
||||
// use sha1::{Digest, Sha1};
|
||||
// let mut hasher = Sha1::new();
|
||||
// hasher.update(mongodb_uri.as_bytes());
|
||||
// format!("MongoDB:{:x}", hasher.finalize())
|
||||
// }
|
||||
/// Return a stable cache key for the given MongoDB URI.
|
||||
pub fn generate_mongodb_cache_key(mongodb_uri: &str) -> String {
|
||||
use sha1::{Digest, Sha1};
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(mongodb_uri.as_bytes());
|
||||
format!("MongoDB:{:x}", hasher.finalize())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,30 @@
|
|||
use std::{str::FromStr, time::Duration};
|
||||
use std::{str::FromStr, sync::Once, time::Duration};
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use rustls::crypto::{ring, CryptoProvider};
|
||||
use rustls::{client::ClientConfig, RootCertStore};
|
||||
use rustls_native_certs::{load_native_certs, CertificateResult};
|
||||
use sha1::{Digest, Sha1};
|
||||
use tokio::time::{error::Elapsed, timeout};
|
||||
use tokio_postgres::{config::SslMode, tls::NoTls, Config, Error};
|
||||
use tokio_postgres::{
|
||||
config::{Host, SslMode},
|
||||
tls::NoTls,
|
||||
Config, Error,
|
||||
};
|
||||
use tokio_postgres_rustls::MakeRustlsConnect;
|
||||
use tracing::debug;
|
||||
|
||||
const CONNECT_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
static INIT_PROVIDER: Once = Once::new();
|
||||
fn ensure_crypto_provider() {
|
||||
INIT_PROVIDER.call_once(|| {
|
||||
// If another part of the program already installed a provider,
|
||||
// ignore the error — we just need one global provider.
|
||||
let _ = CryptoProvider::install_default(ring::default_provider());
|
||||
});
|
||||
}
|
||||
|
||||
pub fn generate_postgres_cache_key(postgres_url: &str) -> String {
|
||||
let mut hasher = Sha1::new();
|
||||
hasher.update(postgres_url.as_bytes());
|
||||
|
|
@ -21,6 +35,12 @@ pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec<String>)
|
|||
let mut cfg =
|
||||
Config::from_str(postgres_url).map_err(|e| anyhow!("Failed to parse Postgres URL: {e}"))?;
|
||||
|
||||
// --- skip localhost/loopback/unix-socket targets entirely -------------
|
||||
if has_any_local_host(&cfg) {
|
||||
debug!("Skipping Postgres validation: host is localhost/loopback or unix socket");
|
||||
return Ok((false, vec!["skipped localhost/loopback host".into()]));
|
||||
}
|
||||
|
||||
let original_mode = cfg.get_ssl_mode();
|
||||
if original_mode == SslMode::Prefer {
|
||||
cfg.ssl_mode(SslMode::Disable);
|
||||
|
|
@ -29,6 +49,37 @@ pub async fn validate_postgres(postgres_url: &str) -> Result<(bool, Vec<String>)
|
|||
check_postgres_db_connection(cfg, original_mode).await
|
||||
}
|
||||
|
||||
fn has_any_local_host(cfg: &Config) -> bool {
|
||||
cfg.get_hosts().iter().any(|h| match h {
|
||||
Host::Unix(_) => true, // local unix socket
|
||||
Host::Tcp(s) => is_local_tcp_host(s),
|
||||
})
|
||||
}
|
||||
|
||||
fn is_local_tcp_host(s: &str) -> bool {
|
||||
// strip URI-style IPv6 brackets if present
|
||||
let host = s.trim_matches(|c| c == '[' || c == ']');
|
||||
|
||||
// Direct IPs
|
||||
if let Ok(ip) = host.parse::<std::net::IpAddr>() {
|
||||
return match ip {
|
||||
std::net::IpAddr::V4(v4) => {
|
||||
v4.is_loopback() || v4.is_unspecified() || v4.is_link_local()
|
||||
}
|
||||
std::net::IpAddr::V6(v6) => {
|
||||
v6.is_loopback() || v6.is_unspecified() || v6.is_unicast_link_local()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Common localhost hostnames
|
||||
let lower = host.to_ascii_lowercase();
|
||||
lower == "localhost"
|
||||
|| lower.starts_with("localhost.")
|
||||
|| lower == "localhost6"
|
||||
|| lower.starts_with("localhost6.")
|
||||
}
|
||||
|
||||
async fn check_postgres_db_connection(
|
||||
mut cfg: Config,
|
||||
original_mode: SslMode,
|
||||
|
|
@ -52,6 +103,9 @@ async fn check_postgres_db_connection(
|
|||
.await
|
||||
} else {
|
||||
timeout(CONNECT_TIMEOUT, async {
|
||||
// Ensure Rustls crypto provider is installed *before* using the builder
|
||||
ensure_crypto_provider();
|
||||
|
||||
let CertificateResult { certs, errors, .. } = load_native_certs();
|
||||
for err in errors {
|
||||
debug!("native-cert error: {err}");
|
||||
|
|
@ -89,6 +143,21 @@ async fn check_postgres_db_connection(
|
|||
continue;
|
||||
}
|
||||
|
||||
Ok(Err(e))
|
||||
if attempt == 0
|
||||
&& server_requires_encryption(&e.to_string())
|
||||
&& cfg.get_ssl_mode() == SslMode::Disable =>
|
||||
{
|
||||
debug!("Encryption required: {e}; retrying with SSL");
|
||||
cfg.ssl_mode(SslMode::Require);
|
||||
continue;
|
||||
}
|
||||
|
||||
Ok(Err(e)) if missing_cluster_identifier(&e.to_string()) => {
|
||||
debug!("Missing cluster identifier: {e}; treating as valid");
|
||||
return Ok((true, Vec::new()));
|
||||
}
|
||||
|
||||
Ok(Err(e)) if database_not_exists(&e, cfg.get_dbname().unwrap_or("postgres")) => {
|
||||
return Ok((true, Vec::new()));
|
||||
}
|
||||
|
|
@ -108,3 +177,48 @@ fn database_not_exists(err: &Error, db_name: &str) -> bool {
|
|||
let db = if db_name.is_empty() { "postgres" } else { db_name };
|
||||
err.to_string().contains(&format!("database \"{db}\" does not exist"))
|
||||
}
|
||||
|
||||
fn server_requires_encryption(err_msg: &str) -> bool {
|
||||
err_msg.contains("server requires encryption")
|
||||
}
|
||||
|
||||
fn missing_cluster_identifier(err_msg: &str) -> bool {
|
||||
err_msg.contains("missing cluster identifier")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{is_local_tcp_host, missing_cluster_identifier, server_requires_encryption};
|
||||
|
||||
#[test]
|
||||
fn detects_encryption_requirement() {
|
||||
assert!(server_requires_encryption("db error: FATAL: server requires encryption"));
|
||||
assert!(!server_requires_encryption("some other error"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_missing_cluster() {
|
||||
assert!(missing_cluster_identifier(
|
||||
"db error: FATAL: codeParamsRoutingFailed: missing cluster identifier",
|
||||
));
|
||||
assert!(!missing_cluster_identifier("another error"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detects_local_hosts() {
|
||||
for h in [
|
||||
"localhost",
|
||||
"LOCALHOST",
|
||||
"localhost.localdomain",
|
||||
"localhost6",
|
||||
"127.0.0.1",
|
||||
"[::1]",
|
||||
"::",
|
||||
] {
|
||||
assert!(is_local_tcp_host(h), "should treat {h} as local");
|
||||
}
|
||||
for h in ["db.example.com", "10.0.0.1"] {
|
||||
assert!(!is_local_tcp_host(h), "should not treat {h} as local");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ fn make_match(fp: u64) -> Match {
|
|||
validation_success: false,
|
||||
calculated_entropy: 0.0,
|
||||
visible: true,
|
||||
is_base64: false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
|
|||
manage_baseline: false,
|
||||
skip_regex: skip_regex,
|
||||
skip_word: skip_skipword,
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
62
tests/int_base64.rs
Normal file
62
tests/int_base64.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
use assert_cmd::prelude::*;
|
||||
use predicates::prelude::*;
|
||||
use std::{fs, process::Command};
|
||||
use tempfile::tempdir;
|
||||
|
||||
// Ensure base64 encoded secrets are decoded and detected
|
||||
#[test]
|
||||
fn detects_base64_encoded_secret() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.txt");
|
||||
// Base64 for ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ==";
|
||||
fs::write(&file_path, encoded)?;
|
||||
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--confidence=low",
|
||||
"--format",
|
||||
"json",
|
||||
"--no-validate",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(200)
|
||||
.stdout(
|
||||
predicate::str::contains("ghp_1wuHFikBKQtCcH3EB2FBUkyn8krXhP2qLqPa")
|
||||
.and(predicate::str::contains("\"encoding\": \"base64\"")),
|
||||
);
|
||||
|
||||
dir.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Ensure disabling Base64 decoding suppresses encoded secrets
|
||||
#[test]
|
||||
fn skips_base64_when_disabled() -> anyhow::Result<()> {
|
||||
let dir = tempdir()?;
|
||||
let file_path = dir.path().join("secret.txt");
|
||||
let encoded = "Z2hwXzF3dUhGaWtCS1F0Q2NIM0VCMkZCVWt5bjhrclhoUDJxTHFQYQ==";
|
||||
fs::write(&file_path, encoded)?;
|
||||
|
||||
Command::cargo_bin("kingfisher")?
|
||||
.args([
|
||||
"scan",
|
||||
dir.path().to_str().unwrap(),
|
||||
"--no-binary",
|
||||
"--no-base64",
|
||||
"--confidence=low",
|
||||
"--format",
|
||||
"json",
|
||||
"--no-update-check",
|
||||
])
|
||||
.assert()
|
||||
.code(0)
|
||||
.stdout(predicate::str::contains("\"findings\":0"));
|
||||
|
||||
dir.close()?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -121,6 +121,7 @@ rules:
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -108,6 +108,7 @@ fn test_github_remote_scan() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
// Create global arguments
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -106,6 +106,7 @@ fn test_gitlab_remote_scan() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
@ -213,6 +214,7 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -88,6 +88,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ fn scan_rules_has_no_validated_findings() -> Result<()> {
|
|||
Some(i) => i,
|
||||
None => return Ok(()), // no array found
|
||||
};
|
||||
|
||||
|
||||
let mut depth = 0usize;
|
||||
let mut end = None;
|
||||
for (i, ch) in stdout.char_indices().skip(start) {
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ impl TestContext {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules).load(&scan_args)?;
|
||||
|
|
@ -191,6 +192,7 @@ async fn test_scan_slack_messages() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
|
|
@ -164,6 +164,7 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
/* --------------------------------------------------------- *
|
||||
|
|
|
|||
|
|
@ -107,6 +107,7 @@ impl TestContext {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let loaded = RuleLoader::from_rule_specifiers(&scan_args.rules)
|
||||
|
|
@ -189,6 +190,7 @@ impl TestContext {
|
|||
manage_baseline: false,
|
||||
skip_regex: Vec::new(),
|
||||
skip_word: Vec::new(),
|
||||
no_base64: false,
|
||||
};
|
||||
|
||||
let global_args = GlobalArgs {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue