Merge pull request #146 from mongodb/development

v1.64.0
This commit is contained in:
Mick Grove 2025-11-13 15:27:16 -08:00 committed by GitHub
commit 1af21d845d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 555 additions and 69 deletions

View file

@ -2,6 +2,11 @@
All notable changes to this project will be documented in this file.
## [v1.64.0]
- Fixed a bug when using --redact, that broke validation
- Added JDBC rule with validator
- Filter out empty 'KF_BITBUCKET_*' environment values when constructing the Bitbucket authentication configuration so blank variables no longer override valid credentials
## [v1.63.1]
- Updated allocator

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.63.1"
version = "1.64.0"
description = "MongoDB's blazingly fast and accurate secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -882,7 +882,7 @@ KF_GITEA_TOKEN="gtoken" kingfisher scan gitea --user johndoe --gitea-api-url htt
```bash
kingfisher scan bitbucket --workspace my-team
# include Bitbucket Cloud repositories from every accessible workspace
KF_BITBUCKET_USERNAME="$USER" KF_BITBUCKET_APP_PASSWORD="$APP_PASSWORD" \
KF_BITBUCKET_TOKEN="$BITBUCKET_TOKEN" \
kingfisher scan bitbucket --all-workspaces
```
@ -915,8 +915,7 @@ require credentials (see [Authenticate to Bitbucket](#authenticate-to-bitbucket)
kingfisher scan --git-url https://bitbucket.org/hashashash/secretstest.git
# Include repository issues
KF_BITBUCKET_USERNAME="user" \
KF_BITBUCKET_APP_PASSWORD="app-password" \
KF_BITBUCKET_TOKEN="$BITBUCKET_TOKEN" \
kingfisher scan --git-url https://bitbucket.org/workspace/project.git --repo-artifacts
```
@ -925,7 +924,7 @@ KF_BITBUCKET_APP_PASSWORD="app-password" \
```bash
kingfisher scan bitbucket --workspace my-team --list-only
# enumerate all accessible workspaces or projects
KF_BITBUCKET_USERNAME="$USER" KF_BITBUCKET_APP_PASSWORD="$APP_PASSWORD" \
KF_BITBUCKET_TOKEN="$BITBUCKET_TOKEN" \
kingfisher scan bitbucket --all-workspaces --list-only
# filter out repositories using glob patterns
kingfisher scan bitbucket --workspace my-team --bitbucket-exclude my-team/**/experimental-* --list-only
@ -935,14 +934,23 @@ kingfisher scan bitbucket --workspace my-team --bitbucket-exclude my-team/**/exp
Kingfisher supports Bitbucket Cloud and Bitbucket Server credentials:
- **App password or server token** set `KF_BITBUCKET_USERNAME` and either
`KF_BITBUCKET_APP_PASSWORD`, `KF_BITBUCKET_TOKEN`, or
`KF_BITBUCKET_PASSWORD`.
- **Workspace API token (Cloud)** set `KF_BITBUCKET_TOKEN`. `KF_BITBUCKET_USERNAME`
is optional; Kingfisher automatically uses the token for Bitbucket REST APIs
and authenticates git operations as `x-token-auth`.
- **Bitbucket Server token** set `KF_BITBUCKET_USERNAME` and either
`KF_BITBUCKET_TOKEN` or `KF_BITBUCKET_PASSWORD`.
- **Legacy app password (Cloud)** set `KF_BITBUCKET_USERNAME` and
`KF_BITBUCKET_APP_PASSWORD`.
- **OAuth/PAT token** set `KF_BITBUCKET_OAUTH_TOKEN`.
These credentials match the options described in the [ghorg setup
guide](https://github.com/gabrie30/ghorg/blob/master/README.md#bitbucket-setup).
Bitbucket no longer supports App Tokens as of September 9, 2025:
https://support.atlassian.com/bitbucket-cloud/docs/api-tokens/
> As of September 9, 2025, app passwords can no longer be created. Use API tokens with scopes instead. All existing app passwords will be disabled on June 9, 2026. Migrate any integrations before then to avoid disruptions.
### Self-hosted Bitbucket Server
Use `--bitbucket-api-url` to point Kingfisher at your server's REST endpoint, for example
@ -1050,8 +1058,9 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan slack "akia" \
| `KF_GITEA_USERNAME` | Username for private Gitea clones (used with `KF_GITEA_TOKEN`) |
| `KF_AZURE_TOKEN` / `KF_AZURE_PAT` | Azure Repos Personal Access Token |
| `KF_AZURE_USERNAME` | Username to use with Azure Repos PATs (defaults to `pat` when unset) |
| `KF_BITBUCKET_USERNAME` | Bitbucket username for basic authentication |
| `KF_BITBUCKET_APP_PASSWORD` / `KF_BITBUCKET_TOKEN` | Bitbucket app password or server token |
| `KF_BITBUCKET_TOKEN` | Bitbucket Cloud workspace API token or Bitbucket Server PAT |
| `KF_BITBUCKET_USERNAME` | Optional Bitbucket username for legacy app passwords or server tokens |
| `KF_BITBUCKET_APP_PASSWORD` | Legacy Bitbucket app password (deprecated September 9, 2025; disabled June 9, 2026) |
| `KF_BITBUCKET_OAUTH_TOKEN` | Bitbucket OAuth or PAT token |
| `KF_HUGGINGFACE_TOKEN` | Hugging Face access token for API enumeration and git cloning |
| `KF_HUGGINGFACE_USERNAME` | Optional username for Hugging Face git operations (defaults to `hf_user`) |

24
data/rules/jdbc.yml Normal file
View file

@ -0,0 +1,24 @@
rules:
- name: JDBC connection string with embedded credentials
id: kingfisher.jdbc.1
pattern: |
(?xi)
(
jdbc:
[a-z][a-z0-9+.-]{2,30}
(?:[:][a-z0-9+.-]{1,30})*
:
[^\s"'<>,(){}\[\]]{10,448}
)
min_entropy: 3.3
confidence: medium
validation:
type: Jdbc
examples:
- jdbc:postgresql://db.example.com:5432/app?user=admin&password=s3cr3t
- jdbc:mysql://admin:s3cr3t@prod.internal:3306/inventory
- jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1
- jdbc:sqlserver://sql.example.org:1433;databaseName=inventory;user=sa;password=s3cr3t!
references:
- https://docs.oracle.com/javase/8/docs/api/java/sql/DriverManager.html
- https://www.postgresql.org/docs/current/jdbc-use.html

View file

@ -40,18 +40,43 @@ pub struct AuthConfig {
pub bearer_token: Option<String>,
}
pub(crate) fn is_bitbucket_access_token(token: &str) -> bool {
token.len() > 40 && token.starts_with("AT") && token.contains('_')
}
impl AuthConfig {
pub fn from_options(
username: Option<String>,
password: Option<String>,
bearer_token: Option<String>,
) -> Self {
let username = username.or_else(|| env::var("KF_BITBUCKET_USERNAME").ok());
let password = password
.or_else(|| env::var("KF_BITBUCKET_APP_PASSWORD").ok())
.or_else(|| env::var("KF_BITBUCKET_TOKEN").ok())
.or_else(|| env::var("KF_BITBUCKET_PASSWORD").ok());
let bearer_token = bearer_token.or_else(|| env::var("KF_BITBUCKET_OAUTH_TOKEN").ok());
fn normalized(value: Option<String>) -> Option<String> {
value.and_then(|v| if v.trim().is_empty() { None } else { Some(v) })
}
fn env_var(name: &str) -> Option<String> {
match env::var(name) {
Ok(value) if value.trim().is_empty() => None,
Ok(value) => Some(value),
Err(_) => None,
}
}
let username = normalized(username).or_else(|| env_var("KF_BITBUCKET_USERNAME"));
let password = normalized(password)
.or_else(|| env_var("KF_BITBUCKET_APP_PASSWORD"))
.or_else(|| env_var("KF_BITBUCKET_TOKEN"))
.or_else(|| env_var("KF_BITBUCKET_PASSWORD"));
let mut bearer_token =
normalized(bearer_token).or_else(|| env_var("KF_BITBUCKET_OAUTH_TOKEN"));
if bearer_token.is_none() {
if let Some(password) = &password {
if is_bitbucket_access_token(password) {
bearer_token = Some(password.clone());
}
}
}
Self { username, password, bearer_token }
}
@ -709,4 +734,52 @@ mod tests {
Some("ws/repo")
);
}
#[test]
fn auth_config_ignores_empty_environment_values() {
temp_env::with_vars(
&[
("KF_BITBUCKET_USERNAME", Some("")),
("KF_BITBUCKET_APP_PASSWORD", Some("")),
("KF_BITBUCKET_OAUTH_TOKEN", Some(" ")),
],
|| {
let auth = AuthConfig::from_env();
assert!(auth.username.is_none());
assert!(auth.password.is_none());
assert!(auth.bearer_token.is_none());
},
);
}
#[test]
fn auth_config_prefers_basic_auth_when_bearer_is_empty() {
temp_env::with_vars(
&[
("KF_BITBUCKET_USERNAME", Some("user")),
("KF_BITBUCKET_APP_PASSWORD", Some("pass")),
("KF_BITBUCKET_OAUTH_TOKEN", Some("")),
],
|| {
let auth = AuthConfig::from_env();
assert_eq!(auth.username.as_deref(), Some("user"));
assert_eq!(auth.password.as_deref(), Some("pass"));
assert!(auth.bearer_token.is_none());
},
);
}
#[test]
fn auth_config_treats_access_token_as_bearer() {
let token = "AT1234567890_ACCESS_TOKEN_EXAMPLE_WITH_UNDERSCORE";
temp_env::with_vars(
&[("KF_BITBUCKET_USERNAME", Some("user")), ("KF_BITBUCKET_TOKEN", Some(token))],
|| {
let auth = AuthConfig::from_env();
assert_eq!(auth.username.as_deref(), Some("user"));
assert_eq!(auth.password.as_deref(), Some(token));
assert_eq!(auth.bearer_token.as_deref(), Some(token));
},
);
}
}

View file

@ -154,15 +154,15 @@ impl FindingsStore {
.captures
.iter()
.find(|c| c.name.is_none() && c.match_number == 0)
.map(|c| c.value)
.map(|c| c.raw_value())
.or_else(|| {
m.groups
.captures
.iter()
.find(|c| matches!(c.name.as_deref(), Some("TOKEN")))
.map(|c| c.value)
.map(|c| c.raw_value())
})
.or_else(|| m.groups.captures.get(0).map(|c| c.value))
.or_else(|| m.groups.captures.get(0).map(|c| c.raw_value()))
.unwrap_or("");
let origin_kind = match origin.first() {

View file

@ -5,7 +5,7 @@ use std::{
use tracing::{debug, debug_span};
use crate::git_url::GitUrl;
use crate::{bitbucket::is_bitbucket_access_token, git_url::GitUrl};
const BITBUCKET_CREDENTIAL_HELPER: &str = r#"credential.helper=!_bbcreds() {
if [ -n "$KF_BITBUCKET_OAUTH_TOKEN" ]; then
@ -13,6 +13,11 @@ const BITBUCKET_CREDENTIAL_HELPER: &str = r#"credential.helper=!_bbcreds() {
echo password="$KF_BITBUCKET_OAUTH_TOKEN";
return;
fi
if [ -n "$KF_BITBUCKET_ACCESS_TOKEN" ]; then
echo username="x-token-auth";
echo password="$KF_BITBUCKET_ACCESS_TOKEN";
return;
fi
if [ -n "$KF_BITBUCKET_USERNAME" ]; then
bb_pass="${KF_BITBUCKET_APP_PASSWORD:-${KF_BITBUCKET_TOKEN:-${KF_BITBUCKET_PASSWORD:-}}}";
if [ -n "$bb_pass" ]; then
@ -95,6 +100,7 @@ fn summarize_output(output: &[u8]) -> Option<String> {
pub struct Git {
credentials: Vec<String>,
ignore_certs: bool,
bitbucket_access_token: Option<String>,
}
impl Git {
@ -112,14 +118,18 @@ impl Git {
matches!(std::env::var("KF_GITEA_TOKEN"), Ok(token) if !token.is_empty());
let has_bitbucket_username =
matches!(std::env::var("KF_BITBUCKET_USERNAME"), Ok(value) if !value.is_empty());
let bitbucket_access_token = std::env::var("KF_BITBUCKET_TOKEN")
.ok()
.filter(|value| !value.is_empty() && is_bitbucket_access_token(value));
let has_bitbucket_password =
["KF_BITBUCKET_APP_PASSWORD", "KF_BITBUCKET_TOKEN", "KF_BITBUCKET_PASSWORD"]
.iter()
.any(|key| matches!(std::env::var(key), Ok(value) if !value.is_empty()));
let has_bitbucket_oauth_token =
matches!(std::env::var("KF_BITBUCKET_OAUTH_TOKEN"), Ok(value) if !value.is_empty());
let has_bitbucket_credentials =
has_bitbucket_oauth_token || (has_bitbucket_username && has_bitbucket_password);
let has_bitbucket_credentials = has_bitbucket_oauth_token
|| bitbucket_access_token.is_some()
|| (has_bitbucket_username && has_bitbucket_password);
let has_azure_token = ["KF_AZURE_TOKEN", "KF_AZURE_PAT"]
.iter()
.any(|key| matches!(std::env::var(key), Ok(value) if !value.is_empty()));
@ -176,7 +186,7 @@ impl Git {
credentials.push(HUGGINGFACE_CREDENTIAL_HELPER.into());
}
Self { credentials, ignore_certs }
Self { credentials, ignore_certs, bitbucket_access_token }
}
/// Create a basic `git` `Command` with environment variables set to
@ -191,6 +201,9 @@ impl Git {
if self.ignore_certs {
cmd.env("GIT_SSL_NO_VERIFY", "1");
}
if let Some(token) = &self.bitbucket_access_token {
cmd.env("KF_BITBUCKET_ACCESS_TOKEN", token);
}
cmd.args(&self.credentials);
cmd.stdin(Stdio::null());
cmd
@ -302,6 +315,7 @@ mod tests {
let git = Git::new(false);
assert!(!git.ignore_certs);
assert!(git.credentials.is_empty());
assert!(git.bitbucket_access_token.is_none());
temp_env::with_var("KF_GITHUB_TOKEN", Some("test_token"), || {
let git = Git::new(false);
@ -315,6 +329,7 @@ mod tests {
let git = Git::new(false);
assert_eq!(git.credentials.len(), 4);
assert!(git.credentials.iter().any(|value| value == BITBUCKET_CREDENTIAL_HELPER));
assert!(git.bitbucket_access_token.is_none());
});
}
@ -329,10 +344,22 @@ mod tests {
let git = Git::new(false);
assert_eq!(git.credentials.len(), 4);
assert!(git.credentials.iter().any(|value| value == BITBUCKET_CREDENTIAL_HELPER));
assert!(git.bitbucket_access_token.is_none());
},
);
}
#[test]
fn test_git_new_bitbucket_access_token() {
let token = "AT1234567890_ACCESS_TOKEN_EXAMPLE_WITH_UNDERSCORE";
temp_env::with_var("KF_BITBUCKET_TOKEN", Some(token), || {
let git = Git::new(false);
assert_eq!(git.credentials.len(), 4);
assert!(git.credentials.iter().any(|value| value == BITBUCKET_CREDENTIAL_HELPER));
assert_eq!(git.bitbucket_access_token.as_deref(), Some(token));
});
}
#[test]
fn test_clone_mode_arg() {
assert_eq!(CloneMode::Bare.arg(), Some("--bare"));

View file

@ -34,7 +34,7 @@ use crate::{
safe_list::{is_safe_match, is_user_match},
scanner_pool::ScannerPool,
snippet::Base64BString,
util::{intern, redact_value},
util::intern,
};
const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment
@ -100,7 +100,7 @@ impl OwnedBlobMatch {
.captures
.get(1)
.or_else(|| blob_match.captures.captures.get(0))
.map(|capture| capture.value.as_bytes().to_vec())
.map(|capture| capture.raw_value().as_bytes().to_vec())
.unwrap_or_else(Vec::new);
let mut owned_blob_match = OwnedBlobMatch {
@ -714,7 +714,7 @@ fn filter_match<'b>(
&blob.bytes()[matching_input_offset_span.start..matching_input_offset_span.end];
// Pass the *full* capture object to from_captures
let groups = SerializableCaptures::from_captures(&captures, haystack, re, redact);
let groups = SerializableCaptures::from_captures(&captures, haystack, re);
matches.push(BlobMatch {
rule: Arc::clone(&rule),
@ -829,16 +829,47 @@ impl JsonSchema for Groups {
// pub end: usize, // End position of the match
// pub value: String, // The actual captured value
// }
#[derive(Debug, Clone, Serialize, JsonSchema)]
#[derive(Debug, Clone, JsonSchema)]
pub struct SerializableCapture {
pub name: Option<String>,
pub match_number: i32,
pub start: usize,
pub end: usize,
/// Interned value of the capture.
/// Interned original (unredacted) value.
#[serde(skip_serializing, skip_deserializing)]
pub value: &'static str,
}
impl SerializableCapture {
/// Returns the original captured value.
pub fn raw_value(&self) -> &'static str {
self.value
}
/// Returns the value that should be shown in user-facing output.
pub fn display_value(&self) -> std::borrow::Cow<'static, str> {
crate::util::display_value(self.value)
}
}
impl serde::Serialize for SerializableCapture {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
use serde::ser::SerializeStruct;
let mut state = serializer.serialize_struct("SerializableCapture", 5)?;
state.serialize_field("name", &self.name)?;
state.serialize_field("match_number", &self.match_number)?;
state.serialize_field("start", &self.start)?;
state.serialize_field("end", &self.end)?;
let value = self.display_value();
state.serialize_field("value", &value)?;
state.end()
}
}
#[derive(Debug, Clone, Serialize, JsonSchema)]
pub struct SerializableCaptures {
#[schemars(with = "Vec<SerializableCapture>")]
@ -846,12 +877,7 @@ pub struct SerializableCaptures {
}
impl SerializableCaptures {
pub fn from_captures(
captures: &regex::bytes::Captures,
_input: &[u8],
re: &Regex,
redact: bool,
) -> Self {
pub fn from_captures(captures: &regex::bytes::Captures, _input: &[u8], re: &Regex) -> Self {
let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new();
let capture_names: SmallVec<[Option<String>; 4]> =
@ -863,12 +889,8 @@ impl SerializableCaptures {
for i in 1..captures.len() {
// Start from 1
if let Some(cap) = captures.get(i) {
let value = if redact {
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string();
let raw_interned = intern(&raw_value);
let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
@ -876,7 +898,7 @@ impl SerializableCaptures {
match_number: i32::try_from(i).unwrap_or(0),
start: cap.start(),
end: cap.end(),
value: interned,
value: raw_interned,
});
}
}
@ -884,12 +906,8 @@ impl SerializableCaptures {
// ELSE, if there is ONLY the full match (len == 1),
// serialize just that full match (group 0) as the fallback.
if let Some(cap) = captures.get(0) {
let value = if redact {
redact_value(&String::from_utf8_lossy(cap.as_bytes()))
} else {
String::from_utf8_lossy(cap.as_bytes()).to_string()
};
let interned = intern(&value);
let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string();
let raw_interned = intern(&raw_value);
let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned();
serialized_captures.push(SerializableCapture {
@ -897,7 +915,7 @@ impl SerializableCaptures {
match_number: 0,
start: cap.start(),
end: cap.end(),
value: interned,
value: raw_interned,
});
}
}
@ -959,7 +977,7 @@ impl Match {
.captures
.get(1)
.or_else(|| owned_blob_match.captures.captures.get(0))
.map(|capture| capture.value.as_bytes())
.map(|capture| capture.raw_value().as_bytes())
.unwrap_or_default();
// The fingerprint will be based on the content of the secret.
@ -1596,7 +1614,7 @@ line2
Regex::new(r"(?xi)\b(ghp_(?P<body>[A-Z0-9]{3})(?P<checksum>[A-Z0-9]{2}))").unwrap();
let caps = re.captures(b"ghp_ABC12").expect("expected captures");
let serialized = SerializableCaptures::from_captures(&caps, b"", &re, false);
let serialized = SerializableCaptures::from_captures(&caps, b"", &re);
let entries: Vec<(Option<&str>, i32, &str)> = serialized
.captures
.iter()

View file

@ -421,14 +421,12 @@ impl DetailsReporter {
// We now correctly serialize *only* the explicit capture groups (or group 0
// as a fallback). The primary "secret" is therefore always at index 0
// of the captures SmallVec.
let snippet = Escaped(
rm.m.groups
.captures
.get(0) // Get the first (and primary) serialized capture
.map(|capture| capture.value.as_bytes())
.unwrap_or_default(),
)
.to_string();
let snippet = if let Some(capture) = rm.m.groups.captures.get(0) {
let displayed = capture.display_value();
Escaped(displayed.as_ref().as_bytes()).to_string()
} else {
String::new()
};
// --- END FIX ---
let validation_status = if rm.validation_success {

View file

@ -47,6 +47,7 @@ pub enum Validation {
GCP,
MongoDB,
Postgres,
Jdbc,
JWT,
Raw(String),
Http(HttpValidation),
@ -598,11 +599,23 @@ impl RuleSyntax {
let path = path.as_ref();
let contents = std::fs::read_to_string(path)
.with_context(|| format!("Failed to read file: {}", path.display()))?;
serde_yaml::from_str(&contents).map_err(|e| {
#[derive(Deserialize)]
#[serde(untagged)]
enum RuleSyntaxDocument {
Sequence(Vec<RuleSyntax>),
Mapped { rules: Vec<RuleSyntax> },
}
let parsed: RuleSyntaxDocument = serde_yaml::from_str(&contents).map_err(|e| {
let context = e.location().map_or(String::new(), |loc| {
format!(" at line {} column {}", loc.line(), loc.column())
});
anyhow!("Failed to parse YAML from {}{}: {}", path.display(), context, e)
})?;
Ok(match parsed {
RuleSyntaxDocument::Sequence(rules) => rules,
RuleSyntaxDocument::Mapped { rules } => rules,
})
}
}

View file

@ -198,6 +198,13 @@ pub fn is_safe_match_reason(input: &[u8]) -> Option<&'static str> {
.map(|rule| rule.description)
}
/// Test helper: clear all user-provided allow-list configuration.
#[doc(hidden)]
pub fn clear_user_filters_for_tests() {
USER_SAFE_REGEXES.lock().unwrap().clear();
USER_SAFE_SKIPWORDS.lock().unwrap().clear();
}
/// Returns true if the input likely contains *benign* placeholder/test strings,
/// and logs which rule triggered at `debug!` level.
pub fn is_safe_match(input: &[u8]) -> bool {

View file

@ -33,6 +33,7 @@ use crate::{
run_secret_validation, save_docker_images,
summary::print_scan_summary,
},
util::set_redaction_enabled,
};
pub async fn run_scan(
@ -75,6 +76,8 @@ pub async fn run_async_scan(
let progress_enabled = global_args.use_progress();
initialize_environment()?;
set_redaction_enabled(args.redact);
let mut repo_urls = enumerate_github_repos(args, global_args).await?;
let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?;
let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?;

View file

@ -73,7 +73,7 @@ pub async fn run_secret_validation(
.captures
.get(1)
.or_else(|| arc_msg.2.groups.captures.get(0))
.map_or("", |c| c.value);
.map_or("", |c| c.raw_value());
groups.entry(format!("{}|{}", arc_msg.2.rule.id(), secret)).or_default().push(arc_msg);
}
@ -111,7 +111,7 @@ pub async fn run_secret_validation(
.captures
.get(1)
.or_else(|| rep_arc.2.groups.captures.get(0))
.map_or("", |c| c.value);
.map_or("", |c| c.raw_value());
let key = format!("{}|{}", rep_arc.2.rule.id(), secret);
match val_res.entry(key.clone()) {
@ -352,7 +352,7 @@ async fn validate_single(
sorted.into_iter().map(|(k, v)| format!("{}={}", k, v)).collect::<Vec<_>>().join("|")
})
.unwrap_or_default();
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string());
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string());
let cache_key = format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str);
// Check cache first
if let Some(cached) = cache.get(&cache_key) {
@ -443,6 +443,6 @@ fn build_cache_key(
.unwrap_or_default();
// For demonstration, well do a simplistic approach
// You can adapt from your existing logic
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.value.to_string());
let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string());
format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str)
}

View file

@ -1,7 +1,9 @@
use std::{
borrow::Cow,
fs::File,
io::{stdin, stdout, BufReader, BufWriter},
path::Path,
sync::atomic::{AtomicBool, Ordering},
};
use blake3::Hasher;
@ -11,6 +13,7 @@ use path_dedot::ParseDot;
use ring::rand::{SecureRandom, SystemRandom};
// Generate a random salt once and use it for the entire application runtime
static APP_SALT: Lazy<String> = Lazy::new(|| generate_salt());
static REDACTION_ENABLED: AtomicBool = AtomicBool::new(false);
/// Interns a string once and returns a `'static` reference to it.
pub fn intern(s: &str) -> &'static str {
@ -41,6 +44,26 @@ pub fn redact_value(value: &str) -> String {
let hash = hasher.finalize();
format!("[REDACTED:{}]", hash_to_short_id(&hash))
}
/// Enables or disables global output redaction.
pub fn set_redaction_enabled(enabled: bool) {
REDACTION_ENABLED.store(enabled, Ordering::Relaxed);
}
/// Returns true if redaction is enabled for user-facing output.
pub fn redaction_enabled() -> bool {
REDACTION_ENABLED.load(Ordering::Relaxed)
}
/// Returns either the original value or a redacted placeholder depending on
/// the current redaction setting.
pub fn display_value(value: &'static str) -> Cow<'static, str> {
if redaction_enabled() {
Cow::Owned(redact_value(value))
} else {
Cow::Borrowed(value)
}
}
// Generate a random salt (16-character alphanumeric string)
fn generate_salt() -> String {
let rng = SystemRandom::new();

View file

@ -29,6 +29,7 @@ mod azure;
mod coinbase;
mod gcp;
mod httpvalidation;
mod jdbc;
mod jwt;
mod mongodb;
mod postgres;
@ -80,7 +81,7 @@ fn secret_fingerprint(m: &OwnedBlobMatch) -> u64 {
// first capture = the secret text itself
if let Some(c0) = m.captures.captures.get(0) {
c0.value.hash(&mut hasher);
c0.raw_value().hash(&mut hasher);
}
hasher.finish()
}
@ -148,7 +149,7 @@ pub fn collect_variables_and_dependencies(
.entry(dependency.variable.to_uppercase())
.or_insert_with(Vec::new)
.push((
matching_input.value.to_string(),
matching_input.raw_value().to_string(),
other_match.matching_input_offset_span,
));
}
@ -676,6 +677,58 @@ async fn timed_validate_single_match<'a>(
);
}
// ---------------------------------------------------- JDBC validator
Some(Validation::Jdbc) => {
let jdbc_conn = captured_values
.iter()
.find(|(n, ..)| n == "TOKEN")
.map(|(_, v, ..)| v.clone())
.unwrap_or_default();
if jdbc_conn.is_empty() {
m.validation_success = false;
m.validation_response_body = "JDBC connection string not found.".to_string();
m.validation_response_status = StatusCode::BAD_REQUEST;
commit_and_return(m);
return;
}
let cache_key = jdbc::generate_jdbc_cache_key(&jdbc_conn);
if let Some(cached) = cache.get(&cache_key) {
let c = cached.value();
if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) {
m.validation_success = c.is_valid;
m.validation_response_body = c.body.clone();
m.validation_response_status = c.status;
commit_and_return(m);
return;
}
}
match jdbc::validate_jdbc(&jdbc_conn).await {
Ok(outcome) => {
m.validation_success = outcome.valid;
m.validation_response_body = outcome.message;
m.validation_response_status = outcome.status;
}
Err(e) => {
m.validation_success = false;
m.validation_response_body = format!("JDBC validation error: {}", e);
m.validation_response_status = StatusCode::BAD_GATEWAY;
}
}
cache.insert(
cache_key,
CachedResponse {
body: m.validation_response_body.clone(),
status: m.validation_response_status,
is_valid: m.validation_success,
timestamp: Instant::now(),
},
);
}
// ------------------------------------------------ Postgres validator
Some(Validation::Postgres) => {
let pg_url = globals

154
src/validation/jdbc.rs Normal file
View file

@ -0,0 +1,154 @@
use anyhow::{anyhow, Context, Result};
use http::StatusCode;
use tracing::debug;
use url::Url;
use xxhash_rust::xxh3::xxh3_64;
use super::postgres;
/// Result of attempting to validate a JDBC connection string.
pub struct JdbcValidationOutcome {
pub valid: bool,
pub status: StatusCode,
pub message: String,
}
/// Produce a short-lived cache key for JDBC validations.
pub fn generate_jdbc_cache_key(raw: &str) -> String {
format!("Jdbc:{:016x}", xxh3_64(raw.as_bytes()))
}
/// Validate a JDBC connection string by dispatching to the supported backend validators.
pub async fn validate_jdbc(jdbc_conn: &str) -> Result<JdbcValidationOutcome> {
let trimmed = jdbc_conn.trim();
if !trimmed.to_ascii_lowercase().starts_with("jdbc:") {
return Err(anyhow!("JDBC connection string must start with `jdbc:`"));
}
let without_prefix = &trimmed[5..];
let (raw_subprotocol, subname) = without_prefix
.split_once(':')
.ok_or_else(|| anyhow!("JDBC connection string is missing a subprotocol"))?;
let subprotocol = raw_subprotocol.trim();
let subprotocol_lower = subprotocol.to_ascii_lowercase();
match subprotocol_lower.as_str() {
"postgres" | "postgresql" | "postgis" => {
validate_postgres_jdbc(subname).await.context("Postgres JDBC validation failed")
}
other => {
debug!("Unsupported JDBC subprotocol encountered: {}", other);
Ok(JdbcValidationOutcome {
valid: false,
status: StatusCode::NOT_IMPLEMENTED,
message: format!(
"JDBC validation not implemented for subprotocol `{}`.",
subprotocol
),
})
}
}
}
async fn validate_postgres_jdbc(subname: &str) -> Result<JdbcValidationOutcome> {
let normalized = normalize_postgres_url(subname)?;
let (ok, meta) = postgres::validate_postgres(&normalized).await?;
let mut message = if ok {
"JDBC Postgres connection is valid.".to_string()
} else {
"JDBC Postgres connection failed.".to_string()
};
if !meta.is_empty() {
let joined = meta.join("; ");
if ok {
message.push_str(&format!(" Details: {}", joined));
} else {
message = format!("JDBC Postgres validation result: {}", joined);
}
}
let status = if ok {
StatusCode::OK
} else if meta.iter().any(|m| m.to_ascii_lowercase().contains("skip")) {
StatusCode::CONTINUE
} else {
StatusCode::UNAUTHORIZED
};
Ok(JdbcValidationOutcome { valid: ok, status, message })
}
fn normalize_postgres_url(subname: &str) -> Result<String> {
let trimmed = subname.trim();
if trimmed.is_empty() {
return Err(anyhow!("Postgres JDBC connection string is empty"));
}
// First try parsing using the standard JDBC layout, otherwise fall back to a canonical URL.
let candidate = format!("postgresql:{}", trimmed);
let mut url = Url::parse(&candidate).or_else(|_| {
let fallback = format!("postgresql://{}", trimmed.trim_start_matches('/'));
Url::parse(&fallback)
})?;
// Extract credentials from the query string when they are present.
let mut user = None;
let mut password = None;
if url.query().is_some() {
let mut preserved = Vec::new();
for (key, value) in url.query_pairs() {
match key.to_ascii_lowercase().as_str() {
"user" | "username" => user = Some(value.into_owned()),
"password" | "pass" | "pwd" => password = Some(value.into_owned()),
_ => preserved.push((key.into_owned(), value.into_owned())),
}
}
{
let mut pairs = url.query_pairs_mut();
pairs.clear();
for (key, value) in preserved {
pairs.append_pair(&key, &value);
}
}
}
if let Some(user) = user {
url.set_username(&user).map_err(|_| anyhow!("Failed to apply Postgres username"))?;
}
if let Some(password) = password {
url.set_password(Some(&password))
.map_err(|_| anyhow!("Failed to apply Postgres password"))?;
}
Ok(url.to_string())
}
#[cfg(test)]
mod tests {
use super::normalize_postgres_url;
use pretty_assertions::assert_eq;
#[test]
fn normalizes_postgres_query_credentials() {
let normalized = normalize_postgres_url(
"//db.example.com:5432/app?user=admin&password=s3cr3t&sslmode=require",
)
.unwrap();
assert_eq!(normalized, "postgresql://admin:s3cr3t@db.example.com:5432/app?sslmode=require");
}
#[test]
fn preserves_existing_credentials() {
let normalized =
normalize_postgres_url("//db.example.com:5432/app?sslmode=prefer").unwrap();
assert_eq!(normalized, "postgresql://db.example.com:5432/app?sslmode=prefer");
}
#[test]
fn rejects_empty_input() {
assert!(normalize_postgres_url("").is_err());
}
}

View file

@ -15,10 +15,10 @@ pub fn process_captures(captures: &SerializableCaptures) -> Vec<(String, String,
.iter()
.filter_map(|cap| {
if let Some(name) = &cap.name {
Some((name.to_uppercase(), cap.value.to_string(), cap.start, cap.end))
Some((name.to_uppercase(), cap.raw_value().to_string(), cap.start, cap.end))
} else if !saw_unnamed {
saw_unnamed = true;
Some(("TOKEN".to_string(), cap.value.to_string(), cap.start, cap.end))
Some(("TOKEN".to_string(), cap.raw_value().to_string(), cap.start, cap.end))
} else {
// Ignore any additional unnamed captures (e.g., from unintended groups)
None
@ -201,7 +201,7 @@ mod tests {
match_number: 2, // Corrected match_number
start: 4,
end: 6,
value: "cc"
value: "cc",
},
],
};

View file

@ -157,7 +157,7 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
assert!(!matches.is_empty());
for m_arc in matches {
let m = &m_arc.2;
assert!(m.groups.captures.iter().any(|cap| cap.value.starts_with("[REDACTED:")));
assert!(m.groups.captures.iter().any(|cap| cap.display_value().starts_with("[REDACTED:")));
}
Ok(())

79
tests/jdbc_rule.rs Normal file
View file

@ -0,0 +1,79 @@
use std::collections::BTreeSet;
use anyhow::{anyhow, Result};
use kingfisher::{rules::rule::RuleSyntax, safe_list};
fn load_jdbc_rule() -> Result<RuleSyntax> {
let rules = RuleSyntax::from_yaml_file("data/rules/jdbc.yml")?;
rules
.into_iter()
.find(|rule| rule.id == "kingfisher.jdbc.1")
.ok_or_else(|| anyhow!("JDBC rule not found"))
}
#[test]
fn jdbc_rule_matches_expected_patterns() -> Result<()> {
let rule = load_jdbc_rule()?;
let regex = rule.as_regex()?;
let sample = r#"
datasource.url=jdbc:postgresql://db.acme.local:5432/app?user=svc_writer&password=P@s5w0rd
connection.read=jdbc:mysql://analyst:letmein@reports.internal:3306/analytics
cache="jdbc:sqlite:/var/lib/app/cache.db"
vendor.dsn=jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1
backup=jdbc:mysql://host:3306/db,other_token
jdbc:xyz:short // this should be ignored
somejdbc:mysql://host/db // false prefix
jdbc:mysql://host/db>next // malformed with trailing bracket
"#;
let matches: BTreeSet<String> = regex
.captures_iter(sample.as_bytes())
.filter_map(|caps| caps.get(1))
.map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned())
.collect();
let expected = BTreeSet::from([
"jdbc:postgresql://db.acme.local:5432/app?user=svc_writer&password=P@s5w0rd".to_string(),
"jdbc:mysql://analyst:letmein@reports.internal:3306/analytics".to_string(),
"jdbc:sqlite:/var/lib/app/cache.db".to_string(),
"jdbc:oracle:thin:@ora.example.net:1521/ORCLPDB1".to_string(),
"jdbc:mysql://host:3306/db".to_string(),
]);
assert_eq!(matches, expected);
Ok(())
}
#[test]
fn jdbc_rule_respects_user_skip_regex() -> Result<()> {
safe_list::clear_user_filters_for_tests();
safe_list::add_user_regex(r"^jdbc:sqlite::temporary_ignore_secret$")?;
let rule = load_jdbc_rule()?;
let regex = rule.as_regex()?;
let sample = r#"
jdbc:sqlite::temporary_ignore_secret
jdbc:mysql://data_ingest:pa55word@analytics.internal:3306/raw
"#;
let matches: Vec<String> = regex
.captures_iter(sample.as_bytes())
.filter_map(|caps| caps.get(1))
.map(|m| String::from_utf8_lossy(m.as_bytes()).into_owned())
.collect();
let retained: Vec<String> = matches
.into_iter()
.filter(|m| !safe_list::is_user_match(m.as_bytes(), m.as_bytes()))
.collect();
safe_list::clear_user_filters_for_tests();
assert_eq!(
retained,
vec!["jdbc:mysql://data_ingest:pa55word@analytics.internal:3306/raw".to_string()]
);
Ok(())
}