This commit is contained in:
Mick Grove 2026-02-10 19:24:19 -08:00
commit e518fb30f2
139 changed files with 1185 additions and 221 deletions

View file

@ -29,7 +29,8 @@ use crate::{
azure::validate_azure_storage_credentials,
coinbase::validate_cdp_api_key,
gcp::GcpValidator,
httpvalidation::{build_request_builder, retry_request, validate_response},
httpvalidation::validate_response,
httpvalidation::{build_request_builder, retry_request},
jdbc::validate_jdbc,
jwt::validate_jwt,
mongodb::validate_mongodb,
@ -40,6 +41,8 @@ use crate::{
validation_body,
};
use crate::grpc_validation;
/// Result of a direct validation attempt.
#[derive(Debug, Clone, Serialize)]
pub struct DirectValidationResult {
@ -135,6 +138,21 @@ fn extract_validation_vars(validation: &Validation) -> BTreeSet<String> {
vars.extend(extract_template_vars(body));
}
}
Validation::Grpc(grpc) => {
// Extract from URL
vars.extend(extract_template_vars(&grpc.request.url));
// Extract from headers
for (key, value) in &grpc.request.headers {
vars.extend(extract_template_vars(key));
vars.extend(extract_template_vars(value));
}
// Extract from body
if let Some(body) = &grpc.request.body {
vars.extend(extract_template_vars(body));
}
}
// Non-HTTP validators typically use fixed variable names
Validation::AWS => {
vars.insert("AKID".to_string());
@ -312,6 +330,60 @@ async fn execute_http_validation(
})
}
/// Execute gRPC validation against the provided rule.
async fn execute_grpc_validation(
grpc_validation_cfg: &kingfisher_rules::GrpcValidation,
globals: &Object,
parser: &liquid::Parser,
timeout: Duration,
) -> Result<DirectValidationResult> {
// Render the URL
let url = render_and_parse_url(parser, globals, &grpc_validation_cfg.request.url).await?;
debug!("Validating against gRPC URL: {}", url);
let res = grpc_validation::grpc_unary_call_from_rule(
&url,
&grpc_validation_cfg.request.headers,
&grpc_validation_cfg.request.body,
parser,
globals,
timeout,
)
.await
.map_err(|e| anyhow!("gRPC request failed: {e}"))?;
let status = res.http_status;
let headers = res.headers;
let mut body = String::from_utf8_lossy(&res.body_bytes).to_string();
let grpc_status =
headers.get("grpc-status").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
let grpc_message =
headers.get("grpc-message").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
if grpc_status == "0" {
body = "grpc-status=0".to_string();
} else if body.trim().is_empty() && (!grpc_status.is_empty() || !grpc_message.is_empty()) {
body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
} else if body.as_bytes().contains(&0) {
body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
}
// Truncate body for display if too long
let display_body = if body.len() > 500 { format!("{}...", &body[..500]) } else { body.clone() };
// Validate the response
let matchers = grpc_validation_cfg.request.response_matcher.as_deref().unwrap_or(&[]);
let is_valid = validate_response(matchers, &body, &status, &headers, false);
Ok(DirectValidationResult {
rule_id: String::new(), // Will be filled in by caller
rule_name: String::new(),
is_valid,
status_code: Some(status.as_u16()),
message: display_body,
})
}
/// Run direct validation of a secret against one or more rules.
///
/// If the rule selector matches multiple rules, all matching rules are tried.
@ -476,6 +548,9 @@ pub async fn run_direct_validation(
)
.await?
}
Validation::Grpc(grpc_validation_cfg) => {
execute_grpc_validation(grpc_validation_cfg, &globals, &parser, timeout).await?
}
Validation::AWS => {
// AWS needs AKID and TOKEN (secret access key)

202
src/grpc_validation.rs Normal file
View file

@ -0,0 +1,202 @@
use std::{collections::BTreeMap, sync::Arc, time::Duration};
use anyhow::{anyhow, Context, Result};
use bytes::Bytes;
use h2::client;
use http::{header::HeaderName, HeaderMap, HeaderValue, Request, Uri};
use liquid::Object;
use reqwest::Url;
use rustls::{ClientConfig, RootCertStore};
use tokio::net::TcpStream;
use tokio_rustls::TlsConnector;
/// Result of a gRPC unary call over HTTP/2.
pub struct GrpcCallResult {
pub http_status: http::StatusCode,
/// Response headers + trailers merged into one map.
pub headers: HeaderMap,
pub body_bytes: Vec<u8>,
}
fn build_root_store() -> Result<RootCertStore> {
let mut roots = RootCertStore::empty();
let native = rustls_native_certs::load_native_certs();
if !native.errors.is_empty() {
// Best-effort: still proceed if we got any certs.
// (Some platforms may have a few unparsable roots.)
}
for cert in native.certs {
roots.add(cert).map_err(|e| anyhow!("Failed to add native root cert: {e:?}"))?;
}
Ok(roots)
}
fn url_to_h2_uri(url: &Url) -> Result<Uri> {
let scheme = url.scheme();
if scheme != "https" {
return Err(anyhow!("gRPC validation only supports https URLs, got: {scheme}"));
}
let host = url.host_str().ok_or_else(|| anyhow!("URL is missing host: {url}"))?;
let authority = match url.port() {
Some(p) => format!("{host}:{p}"),
None => host.to_string(),
};
let path_and_query = &url[url::Position::BeforePath..];
Uri::builder()
.scheme("https")
.authority(authority)
.path_and_query(path_and_query)
.build()
.context("Failed to build HTTP/2 URI for gRPC request")
}
fn header_map_from_templates(
templates: &BTreeMap<String, String>,
parser: &liquid::Parser,
globals: &Object,
) -> Result<HeaderMap> {
let mut out = HeaderMap::new();
for (k, v_template) in templates {
// Header names in YAML are expected to be static.
let name = HeaderName::from_bytes(k.as_bytes())
.with_context(|| format!("Invalid header name in GrpcValidation: '{k}'"))?;
let tmpl = parser
.parse(v_template)
.map_err(|e| anyhow!("Failed to parse header template '{k}': {e}"))?;
let rendered = tmpl
.render(globals)
.map_err(|e| anyhow!("Failed to render header template '{k}': {e}"))?;
let value = HeaderValue::from_str(&rendered)
.with_context(|| format!("Invalid header value for '{k}'"))?;
out.append(name, value);
}
Ok(out)
}
/// Execute a single unary gRPC request over HTTP/2 and return headers + trailers.
///
/// This is intentionally low-level so that rules can validate gRPC-only APIs
/// without pretending they are REST endpoints.
pub async fn grpc_unary_call(
url: &Url,
headers: HeaderMap,
body: Vec<u8>,
timeout: Duration,
) -> Result<GrpcCallResult> {
let host = url.host_str().ok_or_else(|| anyhow!("URL is missing host: {url}"))?;
let port = url.port_or_known_default().unwrap_or(443);
let addr = format!("{host}:{port}");
let tcp = tokio::time::timeout(timeout, TcpStream::connect(addr))
.await
.context("Timed out connecting to gRPC host")?
.context("Failed to connect to gRPC host")?;
let mut tls_config =
ClientConfig::builder().with_root_certificates(build_root_store()?).with_no_client_auth();
tls_config.alpn_protocols = vec![b"h2".to_vec()];
let connector = TlsConnector::from(Arc::new(tls_config));
let server_name = rustls::pki_types::ServerName::try_from(host.to_string())
.map_err(|_| anyhow!("Invalid TLS server name: {host}"))?;
let tls = tokio::time::timeout(timeout, connector.connect(server_name, tcp))
.await
.context("Timed out during TLS handshake")?
.context("TLS handshake failed")?;
let (mut h2_client, connection) = tokio::time::timeout(timeout, client::handshake(tls))
.await
.context("Timed out during HTTP/2 handshake")?
.context("HTTP/2 handshake failed")?;
// Drive the HTTP/2 connection in the background.
tokio::spawn(async move {
let _ = connection.await;
});
let uri = url_to_h2_uri(url)?;
let mut req_builder = Request::builder().method("POST").uri(uri);
{
let hdrs = req_builder.headers_mut().expect("headers_mut should exist");
for (k, v) in headers.iter() {
hdrs.append(k, v.clone());
}
}
let request = req_builder.body(()).context("Failed to build HTTP/2 request")?;
let (response_future, mut send_stream) =
h2_client.send_request(request, false).context("Failed to send gRPC request headers")?;
// Send gRPC request bytes (including the 5-byte gRPC frame prefix).
send_stream.send_data(Bytes::from(body), true).context("Failed to send gRPC request body")?;
let response = tokio::time::timeout(timeout, response_future)
.await
.context("Timed out waiting for gRPC response headers")?
.context("Failed to receive gRPC response headers")?;
let http_status = response.status();
let (parts, mut recv_stream) = response.into_parts();
let mut merged_headers = parts.headers;
// Read data frames (may be empty).
let mut body_bytes: Vec<u8> = Vec::new();
loop {
// h2 returns `Option<Result<Bytes, h2::Error>>` here:
// - None => end of stream
// - Some(Ok(bytes)) => a data chunk
// - Some(Err(err)) => stream error
let next_opt = tokio::time::timeout(timeout, recv_stream.data())
.await
.context("Timed out reading gRPC response data")?;
match next_opt {
Some(Ok(b)) => body_bytes.extend_from_slice(b.as_ref()),
Some(Err(e)) => return Err(anyhow!("Error reading gRPC response data: {e}")),
None => break,
}
}
// Read trailers (where grpc-status is typically reported).
if let Some(trailers) = tokio::time::timeout(timeout, recv_stream.trailers())
.await
.context("Timed out reading gRPC response trailers")?
.context("Error reading gRPC response trailers")?
{
for (k, v) in trailers.iter() {
merged_headers.append(k, v.clone());
}
}
Ok(GrpcCallResult { http_status, headers: merged_headers, body_bytes })
}
/// Helper to render & execute a gRPC request from rule templates.
pub async fn grpc_unary_call_from_rule(
url: &Url,
header_templates: &BTreeMap<String, String>,
body_template: &Option<String>,
parser: &liquid::Parser,
globals: &Object,
timeout: Duration,
) -> Result<GrpcCallResult> {
let headers = header_map_from_templates(header_templates, parser, globals)?;
let body = match body_template {
Some(t) => {
let tmpl =
parser.parse(t).map_err(|e| anyhow!("Failed to parse gRPC body template: {e}"))?;
let rendered = tmpl
.render(globals)
.map_err(|e| anyhow!("Failed to render gRPC body template: {e}"))?;
rendered.into_bytes()
}
None => Vec::new(),
};
grpc_unary_call(url, headers, body, timeout).await
}

View file

@ -30,6 +30,7 @@ pub mod git_url;
pub mod gitea;
pub mod github;
pub mod gitlab;
pub mod grpc_validation;
pub mod huggingface;
pub mod inline_ignore;
pub mod jira;

View file

@ -80,6 +80,9 @@ use crate::cli::commands::{
fn main() -> anyhow::Result<()> {
color_backtrace::install();
// Rustls 0.23 requires an explicit crypto provider selection when multiple
// providers are present in the dependency graph.
let _ = rustls::crypto::ring::default_provider().install_default();
// Parse command-line arguments
let CommandLineArgs { command, global_args } = CommandLineArgs::parse_args();
@ -583,55 +586,112 @@ pub fn run_rules_check(args: &RulesCheckArgs) -> Result<()> {
num_errors += 1;
continue;
}
// Test each example against both vectorscan and regex
// Test each example against regex and pattern_requirements
for (example_index, example) in rule_syntax.examples.iter().enumerate() {
// Create a test blob from the example
// let blob = Blob::new(BlobId::new(example.as_bytes()),
// example.as_bytes().to_vec()); let origin = OriginSet::new(
// Origin::from_file(PathBuf::from("test_example")),
// Vec::new(),
// );
// // Check vectorscan match
// let vectorscan_matched = match matcher.scan_blob(&blob, &origin, None)? {
// ScanResult::New(matches) => !matches.is_empty(),
// _ => false,
// };
// Check regex match
// Get the regex using the public method
let re =
rules_db.get_regex_by_rule_id(rule.id()).expect("Failed to get regex for rule");
let regex_matched = re.is_match(example.as_bytes());
// Check if the example matches the pattern
let example_bytes = example.as_bytes();
let regex_matched = re.is_match(example_bytes);
if !regex_matched {
// ||!vectorscan_matched {
println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name);
println!(" Processing example {}", example_index + 1);
println!(" [!] Mismatch detected for example: {}", example);
// if !vectorscan_matched {
// println!(" Vectorscan match: {}", vectorscan_matched);
// num_errors += 1;
// }
if !regex_matched {
println!(" Regex match: {}", regex_matched);
num_errors += 1;
}
println!(" [!] Pattern mismatch detected for example: {}", example);
println!(" Regex match: {}", regex_matched);
num_errors += 1;
continue;
}
// // Report any mismatches
// if !vectorscan_matched || !regex_matched {
// error!("Rule '{}' example {} failed validation:",
// rule.name(), example_index + 1); println!("
// Example text: {}", example);
// If the rule has pattern_requirements, validate them against the match
if let Some(pattern_reqs) = rule.pattern_requirements() {
// Get the captures from the match
if let Some(captures) = re.captures(example_bytes) {
// Get the full match (group 0)
let full_capture = captures.get(0).expect("Group 0 should always exist");
let full_bytes = full_capture.as_bytes();
// if !vectorscan_matched {
// error!(" - Vectorscan pattern did not match example");
// num_errors += 1;
// }
// Determine which bytes to validate (same logic as in matcher.rs)
// Find the primary capture group for validation
let matching_input_for_validation = 'block: {
// 1. Look for a named capture "secret" (case-insensitive).
if let Some(secret_cap) =
captures.name("secret").or_else(|| captures.name("SECRET"))
{
break 'block secret_cap;
}
// if !regex_matched {
// error!(" - Regex pattern did not match example");
// num_errors += 1;
// }
// }
// 2. Look for any other named capture.
if let Some(named_cap) = (1..captures.len()).find_map(|i| {
let name_opt = re.capture_names().nth(i).and_then(|n| n);
name_opt.and_then(|_| captures.get(i))
}) {
break 'block named_cap;
}
// 3. Fall back to first positional capture (group 1) if it exists.
if let Some(pos_cap) = captures.get(1) {
break 'block pos_cap;
}
// 4. Finally, fall back to the full match (group 0).
break 'block full_capture;
};
let validation_bytes = matching_input_for_validation.as_bytes();
// Create context for pattern requirements validation
use kingfisher_rules::PatternRequirementContext;
let context = PatternRequirementContext {
regex: re,
captures: &captures,
full_match: full_bytes,
};
// Validate pattern requirements (without respect_ignore_if_contains for examples)
use kingfisher_rules::PatternValidationResult;
match pattern_reqs.validate(validation_bytes, Some(context), false) {
PatternValidationResult::Passed => {
// All requirements met
}
PatternValidationResult::Failed => {
println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name);
println!(" Processing example {}", example_index + 1);
println!(
" [!] Pattern requirements not met for example: {}",
example
);
println!(" The match does not satisfy the character requirements (min_digits, min_uppercase, etc.)");
num_errors += 1;
}
PatternValidationResult::FailedChecksum { actual_len, expected_len } => {
println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name);
println!(" Processing example {}", example_index + 1);
println!(" [!] Checksum validation failed for example: {}", example);
println!(
" Actual checksum length: {}, Expected checksum length: {}",
actual_len, expected_len
);
num_errors += 1;
}
PatternValidationResult::IgnoredBySubstring { matched_term } => {
// For examples, we don't want to treat this as an error in check mode
// since ignore_if_contains is meant for runtime filtering
// But we can warn about it
println!("\nTesting rule {} - {}", rule_index + 1, rule_syntax.name);
println!(" Processing example {}", example_index + 1);
println!(
" [!] Example would be ignored due to containing term: {}",
matched_term
);
println!(" Example: {}", example);
num_errors += 1;
}
}
}
}
}
}
// Print summary

View file

@ -181,6 +181,15 @@ fn build_validate_command(
escape_for_shell(snippet)
))
}
Validation::Grpc(_) => {
// gRPC-based validation with dependent variables
Some(format!(
"kingfisher validate --rule {} {}{}",
rule_id,
var_args,
escape_for_shell(snippet)
))
}
Validation::MongoDB
| Validation::MySQL
| Validation::Postgres
@ -645,8 +654,17 @@ impl DetailsReporter {
let source_span = rm.m.location.resolved_source_span();
let line_num = source_span.start.line;
// Get raw snippet value (for revoke command) and display snippet (for output)
let (raw_snippet, snippet) = if let Some(capture) = rm.m.groups.captures.get(0) {
// Prefer the named TOKEN capture (when present) for display + validate/revoke commands.
// This avoids cases like Modal CLI pairs where capture(0) is an ID and TOKEN is the secret.
let snippet_capture =
rm.m.groups
.captures
.iter()
.find(|c| c.name.map(|n| n.eq_ignore_ascii_case("TOKEN")).unwrap_or(false))
.or_else(|| rm.m.groups.captures.get(0));
// Get raw snippet value (for revoke/validate command) and display snippet (for output)
let (raw_snippet, snippet) = if let Some(capture) = snippet_capture {
let raw = capture.raw_value().to_string();
let displayed = capture.display_value();
(raw, Escaped(displayed.as_ref().as_bytes()).to_string())
@ -718,11 +736,24 @@ impl DetailsReporter {
// Generate validate command for findings with validation support
let validate_cmd = if let Some(validation) = &rm.m.rule.syntax().validation {
// Merge dependent captures with named regex captures so the generated command is runnable.
// (E.g., Modal needs TOKEN_ID, which is a named capture on the same rule.)
let mut merged_vars = rm.m.dependent_captures.clone();
for cap in rm.m.groups.captures.iter() {
let Some(name) = cap.name else { continue };
if name.eq_ignore_ascii_case("TOKEN") {
continue;
}
merged_vars
.entry(name.to_uppercase())
.or_insert_with(|| cap.raw_value().to_string());
}
build_validate_command(
rm.m.rule.id(),
validation,
&raw_snippet,
&rm.m.dependent_captures,
&merged_vars,
akid_from_captures.as_deref(),
akid_from_body.as_deref(),
)

View file

@ -11,8 +11,8 @@ pub mod rule {
pub use kingfisher_rules::rule::Revocation;
pub use kingfisher_rules::rules::{Rules, RulesError};
pub use kingfisher_rules::{
ChecksumActual, ChecksumRequirement, Confidence, DependsOnRule, HttpRequest, HttpValidation,
MultipartConfig, MultipartPart, PatternRequirementContext, PatternRequirements,
PatternValidationResult, ReportResponseData, ResponseMatcher, Rule, RuleSyntax, Validation,
RULE_COMMENTS_PATTERN,
ChecksumActual, ChecksumRequirement, Confidence, DependsOnRule, GrpcRequest, GrpcValidation,
HttpRequest, HttpValidation, MultipartConfig, MultipartPart, PatternRequirementContext,
PatternRequirements, PatternValidationResult, ReportResponseData, ResponseMatcher, Rule,
RuleSyntax, Validation, RULE_COMMENTS_PATTERN,
};

View file

@ -25,6 +25,8 @@ use crate::{
validation_body::{self},
};
use crate::grpc_validation;
// Re-export TlsMode from kingfisher_rules for use in client_for_rule
pub use kingfisher_rules::TlsMode as RuleTlsMode;
@ -464,6 +466,15 @@ async fn timed_validate_single_match<'a>(
let mut globals = Object::new();
populate_globals_from_captures(&mut globals, &captured_values);
// Persist named captures (non-TOKEN) for validate/revoke command generation.
// This is especially important for gRPC validators like Modal where TOKEN_ID is required.
for (k, v, ..) in &captured_values {
if k.eq_ignore_ascii_case("TOKEN") {
continue;
}
m.dependent_captures.entry(k.to_uppercase()).or_insert_with(|| v.clone());
}
let rule_syntax = m.rule.syntax();
// ──────────────────────────────────────────────────────────
@ -717,6 +728,87 @@ async fn timed_validate_single_match<'a>(
}
}
// ---------------------------------------------------- gRPC validator
Some(Validation::Grpc(grpc_validation_cfg)) => {
let request_timeout = validation_timeout;
// Render URL
let url = match render_and_parse_url(
parser,
&globals,
&rule_syntax.name,
&grpc_validation_cfg.request.url,
)
.await
{
Ok(u) => u,
Err(e) => {
m.validation_success = false;
m.validation_response_body = validation_body::from_string(e);
m.validation_response_status = StatusCode::BAD_REQUEST;
commit_and_return(m);
return;
}
};
// Execute gRPC unary call (HTTP/2 + trailers).
let res = match grpc_validation::grpc_unary_call_from_rule(
&url,
&grpc_validation_cfg.request.headers,
&grpc_validation_cfg.request.body,
parser,
&globals,
request_timeout,
)
.await
{
Ok(r) => r,
Err(e) => {
m.validation_success = false;
m.validation_response_body =
validation_body::from_string(format!("gRPC error: {}", e));
m.validation_response_status = StatusCode::BAD_GATEWAY;
commit_and_return(m);
return;
}
};
let status = StatusCode::from_u16(res.http_status.as_u16()).unwrap_or(StatusCode::OK);
let headers = res.headers;
let mut body = String::from_utf8_lossy(&res.body_bytes).to_string();
// gRPC errors are typically reported in trailers, not the body.
// Surface them for debugging and for `--full-validation-response` output.
let grpc_status =
headers.get("grpc-status").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
let grpc_message =
headers.get("grpc-message").and_then(|v| v.to_str().ok()).unwrap_or("").to_string();
// Avoid storing raw protobuf bytes in the report (they often contain NULs and make
// output logs non-UTF8). Prefer a compact status/message string.
if grpc_status == "0" {
body = "grpc-status=0".to_string();
} else if body.trim().is_empty()
&& (!grpc_status.is_empty() || !grpc_message.is_empty())
{
body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
} else if body.as_bytes().contains(&0) {
body = format!("grpc-status={grpc_status} grpc-message={grpc_message}");
}
truncate_to_char_boundary(&mut body, MAX_VALIDATION_BODY_LEN);
m.validation_response_status = status;
m.validation_response_body = validation_body::from_string(body.clone());
let matchers = grpc_validation_cfg
.request
.response_matcher
.as_ref()
.expect("missing response_matcher");
m.validation_success =
httpvalidation::validate_response(matchers, &body, &status, &headers, false);
}
// ---------------------------------------------------- MongoDB validator
Some(Validation::MongoDB) => {
let uri = globals