diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 819b8a4..0e27143 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,6 +16,8 @@ jobs: linux-arm64: name: Linux arm64 runs-on: ubuntu-24.04-arm + env: + CARGO_HOME: ${{ github.workspace }}/.cargo-home steps: - uses: actions/checkout@v4 @@ -36,13 +38,17 @@ jobs: override: true - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-directories: | + .cargo-home + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} + save-if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }} - - name: Build (Makefile linux-arm64) - run: make ubuntu-arm64 - - name: Run tests - run: make tests - env: - CARGO_BUILD_JOBS: 1 + - name: Build and test (Makefile linux-arm64) + run: make linux-arm64 macos-arm64: name: macOS arm64 @@ -55,6 +61,12 @@ jobs: profile: minimal override: true - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} + save-if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }} - name: Build (Makefile darwin-arm64) run: make darwin-arm64 - name: Run tests @@ -136,6 +148,12 @@ jobs: Get-ChildItem $env:VCPKG_DOWNLOADS - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} + save-if: ${{ github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository }} - name: Build run: .\buildwin.bat diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b80f8b7..bc58fc6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -24,6 +24,8 @@ jobs: linux-x64: name: Linux x64 runs-on: ubuntu-24.04 + env: + CARGO_HOME: ${{ github.workspace }}/.cargo-home steps: - uses: actions/checkout@v4 @@ -44,6 +46,13 @@ jobs: override: true - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-directories: | + .cargo-home + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} - name: Install packaging tools run: cargo install cargo-deb cargo-generate-rpm @@ -52,7 +61,7 @@ jobs: run: make linux-x64 - name: Fix permissions - run: sudo chown -R $(id -u):$(id -g) target + run: sudo chown -R $(id -u):$(id -g) target .cargo-home || true - name: Build Debian package run: | @@ -88,6 +97,8 @@ jobs: linux-arm64: name: Linux arm64 runs-on: ubuntu-24.04-arm + env: + CARGO_HOME: ${{ github.workspace }}/.cargo-home steps: - uses: actions/checkout@v4 @@ -108,6 +119,13 @@ jobs: override: true - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-directories: | + .cargo-home + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} - name: Install packaging tools run: cargo install cargo-deb cargo-generate-rpm @@ -116,7 +134,7 @@ jobs: run: make linux-arm64 - name: Fix permissions - run: sudo chown -R $(id -u):$(id -g) target + run: sudo chown -R $(id -u):$(id -g) target .cargo-home || true - name: Build Debian package run: | @@ -162,6 +180,11 @@ jobs: override: true - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} - name: Build Darwin x64 run: make darwin-x64 @@ -190,6 +213,11 @@ jobs: override: true - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} - name: Build Darwin arm64 run: make darwin-arm64 @@ -285,6 +313,11 @@ jobs: - uses: swatinem/rust-cache@v2 + with: + workspaces: | + . -> target + cache-all-crates: true + shared-key: kingfisher-${{ runner.os }}-${{ runner.arch }} - name: Build run: .\buildwin.bat diff --git a/.gitignore b/.gitignore index 7f860a0..ac1e268 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,8 @@ Temporary Items debug/ target/ bin/ +.cargo-home/ +.rustup-home/ # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ff6d43..29cc8ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file. - Fixed validate/revoke command generation to omit regex named captures (e.g., `BODY`, `CHECKSUM`) when they are not used by validation/revocation templates, so rules like Vercel no longer produce unnecessary `--var BODY=...` arguments. - Fixed HTTP validation incorrectly marking valid credentials as inactive when response bodies exceeded 2048 bytes. Matchers (`JsonValid`, `WordMatch`, etc.) now run against the full response; only the stored preview remains truncated for reporting. - Fixed validation flakiness under service rate limiting by retrying HTTP validations on 429/408 in addition to transient 5xx failures. +- Added optional validation rate limiting via `--validation-rps` (global) and repeatable `--validation-rps-rule ` (per-rule override) for both `scan` and `validate`. Throttling now applies across built-in validator types (HTTP/gRPC plus AWS, GCP, Coinbase, MongoDB, Postgres, MySQL, JDBC, JWT, and Azure Storage). Rule selectors support the short form (for example, `github=2` matches `kingfisher.github.*`) with longest-prefix precedence when multiple selectors apply. - Prevented transient HTTP validation failures (429/5xx) from being cached, avoiding cache poisoning that could suppress later successful validations in the same scan. - Added `kingfisher.temporal.1` rule for Temporal Cloud API keys (namespace-scoped and user-scoped JWT formats) with Temporal-specific pattern matching. - Added Temporal Cloud active credential validation via `GET https://saas-api.tmprl.cloud/cloud/current-identity` using bearer auth, so Temporal keys validate against provider APIs instead of generic OIDC discovery. diff --git a/Makefile b/Makefile index b9b69ff..5ef842a 100644 --- a/Makefile +++ b/Makefile @@ -248,7 +248,9 @@ endif linux-x64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/amd64 --rm \ + -e CARGO_HOME=/src/.cargo-home \ -v "$$(pwd):/src" -w /src rust:1.92-alpine sh -eu -c '\ + mkdir -p /src/.cargo-home && \ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -256,8 +258,8 @@ linux-x64: check-docker create-dockerignore bzip2-dev bzip2-static \ xz-dev xz-static \ boost-dev linux-headers \ - patch perl ragel && \ - git openssl-dev curl && \ + patch perl ragel \ + git openssl-dev curl && \ \ cargo test --workspace --all-targets ; \ \ @@ -277,7 +279,9 @@ linux-x64: check-docker create-dockerignore linux-arm64: check-docker create-dockerignore @mkdir -p target/release docker run --platform linux/arm64 --rm \ + -e CARGO_HOME=/src/.cargo-home \ -v "$$(pwd):/src" -w /src rust:1.92-alpine sh -eu -c '\ + mkdir -p /src/.cargo-home && \ apk add --no-cache \ musl-dev \ gcc g++ make cmake pkgconfig \ @@ -285,8 +289,8 @@ linux-arm64: check-docker create-dockerignore bzip2-dev bzip2-static \ xz-dev xz-static \ boost-dev linux-headers \ - patch perl ragel && \ - git openssl-dev curl && \ + patch perl ragel \ + git openssl-dev curl && \ \ rustup target add aarch64-unknown-linux-musl && \ \ diff --git a/README.md b/README.md index cd4b44c..72f7b10 100644 --- a/README.md +++ b/README.md @@ -355,6 +355,18 @@ kingfisher revoke --rule slack "xoxb-..." kingfisher revoke --rule github "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" ``` +Validation throttling is also available for direct validation: + +- `--validation-rps ` sets a global request rate. +- `--validation-rps-rule ` sets per-rule overrides (repeatable). +- Rule selectors accept short names, so `github=2` matches `kingfisher.github.*`. + +```bash +# Limit direct validation to 1 req/sec for GitHub rules +kingfisher validate --rule github "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \ + --validation-rps-rule github=1 +``` + ## Advanced Scanning Options ```bash @@ -370,6 +382,15 @@ kingfisher scan /path/to/repo --rule kingfisher.aws # Display rule performance statistics kingfisher scan /path/to/repo --rule-stats +# Throttle validation request rate globally +kingfisher scan /path/to/repo --validation-rps 5 + +# Override specific rule families (kingfisher. prefix optional) +kingfisher scan /path/to/repo \ + --validation-rps 10 \ + --validation-rps-rule github=2 \ + --validation-rps-rule pypi=0.5 + # Include full validation response bodies (not truncated to 512 characters) # Useful for parsing complete validation responses (e.g., GitHub token metadata) kingfisher scan /path/to/repo --full-validation-response @@ -385,6 +406,8 @@ kingfisher scan . \ --branch "$CI_BRANCH" ``` +> Validation rate limiting applies to all built-in validator types (HTTP/gRPC, cloud SDK validators such as AWS/GCP/Coinbase, and database/token validators such as MongoDB, Postgres, MySQL, JDBC, JWT, and Azure Storage). `Raw` validators are excluded. + # Platform Integrations Kingfisher can scan multiple platforms and services directly: diff --git a/docs/USAGE.md b/docs/USAGE.md index 8622331..9660306 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -116,6 +116,28 @@ This is useful for: > **Note:** The `kingfisher.` prefix is optional for built-in rules. You can use `--rule aws` instead of `--rule kingfisher.aws`. +To reduce API pressure during validation, you can limit request rate: + +- `--validation-rps ` applies a global rate limit to network validators. +- `--validation-rps-rule ` applies a rule-scoped override and can be repeated. + +Rule selectors use the same prefix behavior as `--rule`: `github=2` targets `kingfisher.github.*`. + +```bash +# Global limit for all validation requests +kingfisher scan ./repo --validation-rps 5 + +# Per-rule overrides (prefix match, kingfisher. prefix optional) +kingfisher scan ./repo \ + --validation-rps 10 \ + --validation-rps-rule github=2 \ + --validation-rps-rule pypi=0.5 + +# Direct validation can use the same limiter options +kingfisher validate --rule github "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" \ + --validation-rps-rule github=1 +``` + ```bash # Validate an OpsGenie API key (using rule prefix matching) kingfisher validate --rule opsgenie "12345678-9abc-def0-1234-56789abcdef0" diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index 9cacc22..49a3e35 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -102,6 +102,14 @@ pub struct ScanArgs { )] pub validation_retries: u32, + /// Global validation request rate limit in requests per second + #[arg(global = true, long = "validation-rps", value_name = "RPS")] + pub validation_rps: Option, + + /// Rule-scoped validation request rate limit (RULE_SELECTOR=RPS), repeatable + #[arg(global = true, long = "validation-rps-rule", value_name = "RULE_SELECTOR=RPS")] + pub validation_rps_rule: Vec, + /// Include full validation response bodies without truncation #[arg(global = true, long, default_value_t = false)] pub full_validation_response: bool, diff --git a/src/cli/commands/validate.rs b/src/cli/commands/validate.rs index e1ee48a..138e650 100644 --- a/src/cli/commands/validate.rs +++ b/src/cli/commands/validate.rs @@ -42,6 +42,14 @@ pub struct ValidateArgs { )] pub retries: u32, + /// Global validation request rate limit in requests per second + #[arg(long = "validation-rps", value_name = "RPS")] + pub validation_rps: Option, + + /// Rule-scoped validation request rate limit (RULE_SELECTOR=RPS), repeatable + #[arg(long = "validation-rps-rule", value_name = "RULE_SELECTOR=RPS")] + pub validation_rps_rule: Vec, + /// Path to custom rules file or directory #[arg(long = "rules-path", value_hint = ValueHint::AnyPath)] pub rules_path: Vec, diff --git a/src/direct_validate.rs b/src/direct_validate.rs index 95570c2..a3d4a07 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -39,6 +39,7 @@ use crate::{ GLOBAL_USER_AGENT, }, validation_body, + validation_rate_limit::{should_rate_limit_validation, ValidationRateLimiter}, }; use crate::grpc_validation; @@ -452,6 +453,9 @@ pub async fn run_direct_validation( let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; let timeout = Duration::from_secs(args.timeout); + let rate_limiter = + ValidationRateLimiter::from_cli(args.validation_rps, &args.validation_rps_rule)? + .map(Arc::new); let mut results = Vec::new(); @@ -549,6 +553,12 @@ pub async fn run_direct_validation( ); } + if let Some(limiter) = rate_limiter.as_deref() { + if should_rate_limit_validation(validation) { + limiter.wait_for_rule(&rule_id).await; + } + } + // Execute validation based on type let mut result = match validation { Validation::Http(http_validation) => { @@ -563,7 +573,13 @@ pub async fn run_direct_validation( .await? } Validation::Grpc(grpc_validation_cfg) => { - execute_grpc_validation(grpc_validation_cfg, &globals, &parser, timeout).await? + execute_grpc_validation( + grpc_validation_cfg, + &globals, + &parser, + timeout, + ) + .await? } Validation::AWS => { @@ -955,6 +971,8 @@ pub(crate) fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs no_ignore_if_contains: false, validation_timeout: 10, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), full_validation_response: false, } } diff --git a/src/lib.rs b/src/lib.rs index 5c2e716..f1ca2a5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,6 +54,7 @@ pub mod update; pub mod util; pub mod validation; pub mod validation_body; +pub mod validation_rate_limit; use std::path::{Path, PathBuf}; diff --git a/src/main.rs b/src/main.rs index de79caa..ecb0a44 100644 --- a/src/main.rs +++ b/src/main.rs @@ -557,6 +557,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { no_ignore_if_contains: false, validation_timeout: 10, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), full_validation_response: false, } } diff --git a/src/reporter.rs b/src/reporter.rs index 20c5327..fc545b2 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1492,6 +1492,8 @@ mod tests { no_ignore_if_contains: false, validation_timeout: 10, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), full_validation_response: false, } } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index 00b94c6..e78cf6a 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -197,6 +197,8 @@ mod tests { no_ignore_if_contains: false, validation_timeout: 10, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), full_validation_response: false, } } diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index adb72df..8f2d679 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -41,6 +41,7 @@ use crate::{ summary::{compute_scan_totals, print_scan_summary}, AccessMapCollector, }, + validation_rate_limit::ValidationRateLimiter, util::set_redaction_enabled, }; @@ -273,12 +274,17 @@ pub async fn run_async_scan( repo_roots.iter().filter(|p| p.join(".git").is_dir()).count() + repo_urls.len(); let use_parallel_repo_scan = git_repo_count > 10; + let validation_rate_limiter = + ValidationRateLimiter::from_cli(args.validation_rps, &args.validation_rps_rule)? + .map(Arc::new); + let validation_deps = if !args.no_validate { info!("Starting secret validation phase..."); Some(Arc::new(( register_all(liquid::ParserBuilder::with_stdlib()).build()?, crate::validation::ValidationClients::new(global_args.tls_mode)?, Arc::new(SkipMap::new()), + validation_rate_limiter.clone(), ))) } else { None @@ -347,7 +353,8 @@ pub async fn run_async_scan( } if let Some(validation) = &validation_deps { - let (parser, clients, cache) = (&validation.0, &validation.1, &validation.2); + let (parser, clients, cache, rate_limiter) = + (&validation.0, &validation.1, &validation.2, &validation.3); run_secret_validation( Arc::clone(&datastore), parser, @@ -356,6 +363,7 @@ pub async fn run_async_scan( args.num_jobs, None, access_map_collector.clone(), + rate_limiter.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, ) @@ -430,7 +438,8 @@ pub async fn run_async_scan( } if let Some(validation) = &validation_deps { - let (parser, clients, cache) = (&validation.0, &validation.1, &validation.2); + let (parser, clients, cache, rate_limiter) = + (&validation.0, &validation.1, &validation.2, &validation.3); let initial_match_count = { datastore.lock().unwrap().get_matches().len() }; if initial_match_count > 0 { run_secret_validation( @@ -441,6 +450,7 @@ pub async fn run_async_scan( args.num_jobs, Some(0..initial_match_count), access_map_collector.clone(), + rate_limiter.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, ) @@ -512,8 +522,8 @@ pub async fn run_async_scan( } if let Some(validation) = validation_deps.clone() { - let (parser, clients, cache) = - (&validation.0, &validation.1, &validation.2); + let (parser, clients, cache, rate_limiter) = + (&validation.0, &validation.1, &validation.2, &validation.3); let match_count = { repo_datastore.lock().unwrap().get_matches().len() }; if match_count > 0 { @@ -525,6 +535,7 @@ pub async fn run_async_scan( args.num_jobs, Some(0..match_count), access_map.clone(), + rate_limiter.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, ))?; @@ -593,7 +604,8 @@ pub async fn run_async_scan( } if let Some(validation) = &validation_deps { - let (parser, clients, cache) = (&validation.0, &validation.1, &validation.2); + let (parser, clients, cache, rate_limiter) = + (&validation.0, &validation.1, &validation.2, &validation.3); run_secret_validation( Arc::clone(&datastore), parser, @@ -602,6 +614,7 @@ pub async fn run_async_scan( args.num_jobs, None, access_map_collector.clone(), + rate_limiter.clone(), Duration::from_secs(args.validation_timeout), args.validation_retries, ) diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 3969b80..335faa6 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -28,6 +28,7 @@ use crate::{ collect_variables_and_dependencies, utils, validate_single_match, CachedResponse, }, validation_body, + validation_rate_limit::ValidationRateLimiter, }; #[derive(Clone, Default)] @@ -113,6 +114,7 @@ pub async fn run_secret_validation( num_jobs: usize, range: Option>, access_map: Option, + rate_limiter: Option>, validation_timeout: Duration, validation_retries: u32, ) -> Result<()> { @@ -213,6 +215,7 @@ pub async fn run_secret_validation( // *** FIX: Clone the progress bar for each concurrent task *** let pb = pb.clone(); let access_map = access_map.clone(); + let rate_limiter = rate_limiter.clone(); async move { // VALIDATION DEDUP: Use get(0) for the primary secret value. @@ -250,6 +253,7 @@ pub async fn run_secret_validation( &fail, &cache_glob, access_map.as_ref(), + rate_limiter.as_deref(), validation_timeout, validation_retries, ) @@ -325,6 +329,7 @@ pub async fn run_secret_validation( let fail = fail_count.clone(); let cache_glob = cache.clone(); let access_map = access_map.clone(); + let rate_limiter = rate_limiter.clone(); let validation_timeout = validation_timeout; let validation_retries = validation_retries; @@ -361,6 +366,7 @@ pub async fn run_secret_validation( let fail = fail.clone(); let cache_glob = cache_glob.clone(); let access_map = access_map.clone(); + let rate_limiter = rate_limiter.clone(); async move { validate_single( &mut rep, @@ -374,6 +380,7 @@ pub async fn run_secret_validation( &fail, &cache_glob, access_map.as_ref(), + rate_limiter.as_deref(), validation_timeout, validation_retries, ) @@ -461,6 +468,7 @@ async fn validate_single( fail_count: &AtomicUsize, cache2: &Arc>, access_map: Option<&AccessMapCollector>, + rate_limiter: Option<&ValidationRateLimiter>, validation_timeout: Duration, validation_retries: u32, ) { @@ -523,6 +531,7 @@ async fn validate_single( cache2, validation_timeout, validation_retries, + rate_limiter, ) .await }) diff --git a/src/validation.rs b/src/validation.rs index 94324de..2c8e1c5 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -26,6 +26,7 @@ use crate::{ }; use crate::grpc_validation; +use crate::validation_rate_limit::should_rate_limit_validation; // Re-export TlsMode from kingfisher_rules for use in client_for_rule pub use kingfisher_rules::TlsMode as RuleTlsMode; @@ -331,6 +332,7 @@ pub async fn validate_single_match( cache: &Cache, validation_timeout: Duration, validation_retries: u32, + rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>, ) { let timeout_result = time::timeout(validation_timeout, async { timed_validate_single_match( @@ -342,6 +344,7 @@ pub async fn validate_single_match( cache, validation_timeout, validation_retries, + rate_limiter, ) .await }) @@ -369,6 +372,7 @@ async fn timed_validate_single_match<'a>( cache: &Cache, validation_timeout: Duration, validation_retries: u32, + rate_limiter: Option<&crate::validation_rate_limit::ValidationRateLimiter>, ) { // Select the appropriate HTTP client based on rule's TLS mode preference let rule_tls_mode = m.rule.tls_mode(); @@ -477,6 +481,12 @@ async fn timed_validate_single_match<'a>( let rule_syntax = m.rule.syntax(); + if let (Some(limiter), Some(validation)) = (rate_limiter, rule_syntax.validation.as_ref()) { + if should_rate_limit_validation(validation) { + limiter.wait_for_rule(m.rule.id()).await; + } + } + // ────────────────────────────────────────────────────────── // 4. validator switch // ────────────────────────────────────────────────────────── diff --git a/src/validation_rate_limit.rs b/src/validation_rate_limit.rs new file mode 100644 index 0000000..9912f1e --- /dev/null +++ b/src/validation_rate_limit.rs @@ -0,0 +1,187 @@ +use std::{sync::Arc, time::Duration}; + +use anyhow::{bail, Result}; +use dashmap::DashMap; +use tokio::{ + sync::Mutex, + time::{sleep_until, Instant}, +}; + +use crate::rules::rule::Validation; + +const DEFAULT_BUCKET: &str = "__default__"; + +#[derive(Clone, Debug)] +pub struct ValidationRateLimiter { + default_rps: Option, + per_rule: Vec<(String, f64)>, + next_allowed: Arc>>>, +} + +impl ValidationRateLimiter { + pub fn from_cli(default_rps: Option, per_rule: &[String]) -> Result> { + let default_rps = default_rps.map(validate_rps).transpose()?; + let mut normalized = Vec::with_capacity(per_rule.len()); + for item in per_rule { + let (selector, rps) = parse_rule_rps_mapping(item)?; + normalized.push((selector, rps)); + } + + if default_rps.is_none() && normalized.is_empty() { + return Ok(None); + } + + Ok(Some(Self { + default_rps, + per_rule: normalized, + next_allowed: Arc::new(DashMap::new()), + })) + } + + pub fn effective_rps(&self, rule_id: &str) -> Option { + self.effective_limit(rule_id).map(|(_, rps)| rps) + } + + pub async fn wait_for_rule(&self, rule_id: &str) { + let Some((bucket, rps)) = self.effective_limit(rule_id) else { + return; + }; + + let interval = Duration::from_secs_f64(1.0 / rps); + let gate = self + .next_allowed + .entry(bucket) + .or_insert_with(|| Arc::new(Mutex::new(Instant::now()))) + .clone(); + + let mut next_slot = gate.lock().await; + let now = Instant::now(); + if *next_slot > now { + sleep_until(*next_slot).await; + } + + *next_slot = Instant::now() + interval; + } + + fn effective_limit(&self, rule_id: &str) -> Option<(String, f64)> { + let mut best: Option<(&str, f64)> = None; + for (selector, rps) in &self.per_rule { + if selector_matches(rule_id, selector) + && best.as_ref().is_none_or(|(current, _)| selector.len() > current.len()) + { + best = Some((selector.as_str(), *rps)); + } + } + + if let Some((selector, rps)) = best { + return Some((selector.to_string(), rps)); + } + + self.default_rps.map(|rps| (DEFAULT_BUCKET.to_string(), rps)) + } +} + +pub fn parse_rule_rps_mapping(input: &str) -> Result<(String, f64)> { + let (raw_selector, raw_rps) = input + .split_once('=') + .ok_or_else(|| anyhow::anyhow!("Invalid value '{input}'. Expected RULE=RPS"))?; + let selector = normalize_rule_selector(raw_selector)?; + let rps = validate_rps(raw_rps.parse::().map_err(|_| { + anyhow::anyhow!("Invalid RPS value '{raw_rps}' for selector '{raw_selector}'") + })?)?; + Ok((selector, rps)) +} + +pub fn normalize_rule_selector(input: &str) -> Result { + let selector = input.trim(); + if selector.is_empty() { + bail!("Rule selector cannot be empty"); + } + + if selector.starts_with("kingfisher.") { + return Ok(selector.to_string()); + } + + if selector == "kingfisher" { + return Ok("kingfisher".to_string()); + } + + Ok(format!("kingfisher.{selector}")) +} + +fn validate_rps(value: f64) -> Result { + if !value.is_finite() || value <= 0.0 { + bail!("RPS must be a positive number"); + } + Ok(value) +} + +fn selector_matches(rule_id: &str, selector: &str) -> bool { + rule_id == selector + || rule_id + .strip_prefix(selector) + .is_some_and(|suffix| suffix.starts_with('.')) +} + +pub fn should_rate_limit_validation(validation: &Validation) -> bool { + !matches!(validation, Validation::Raw(_)) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize_rule_selector_allows_short_names() { + assert_eq!(normalize_rule_selector("github").unwrap(), "kingfisher.github"); + assert_eq!(normalize_rule_selector(" kingfisher.github ").unwrap(), "kingfisher.github"); + } + + #[test] + fn parse_rule_rps_mapping_parses_rule_and_rate() { + let (selector, rps) = parse_rule_rps_mapping("github=2.5").unwrap(); + assert_eq!(selector, "kingfisher.github"); + assert_eq!(rps, 2.5); + } + + #[test] + fn effective_rps_uses_longest_prefix_match() { + let limiter = ValidationRateLimiter::from_cli( + Some(10.0), + &["github=2".to_string(), "kingfisher.github.1=1".to_string()], + ) + .unwrap() + .unwrap(); + + assert_eq!(limiter.effective_rps("kingfisher.github.1"), Some(1.0)); + assert_eq!(limiter.effective_rps("kingfisher.github.9"), Some(2.0)); + assert_eq!(limiter.effective_rps("kingfisher.gitlab.1"), Some(10.0)); + } + + #[tokio::test] + async fn wait_for_rule_spaces_requests_for_same_bucket() { + let limiter = ValidationRateLimiter::from_cli(Some(50.0), &[]).unwrap().unwrap(); + + limiter.wait_for_rule("kingfisher.github.1").await; + + let start = std::time::Instant::now(); + limiter.wait_for_rule("kingfisher.github.2").await; + + // Allow timing jitter from runtime scheduling while still asserting spacing happened. + assert!(start.elapsed() >= Duration::from_millis(15)); + } + + #[test] + fn should_rate_limit_non_http_validators() { + assert!(should_rate_limit_validation(&Validation::AWS)); + assert!(should_rate_limit_validation(&Validation::GCP)); + assert!(should_rate_limit_validation(&Validation::MongoDB)); + assert!(should_rate_limit_validation(&Validation::Postgres)); + assert!(should_rate_limit_validation(&Validation::Coinbase)); + } + + #[test] + fn should_skip_rate_limit_for_raw_validation() { + assert!(!should_rate_limit_validation(&Validation::Raw("custom".to_string()))); + } +} diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index 76b2239..cddaac9 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -158,6 +158,8 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 1b2d1c4..b2ac6a5 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -178,6 +178,8 @@ rules: no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_github.rs b/tests/int_github.rs index 7aeb386..e388699 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -165,6 +165,8 @@ fn test_github_remote_scan() -> Result<()> { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index 0cbf2cd..4603a30 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -163,6 +163,8 @@ fn test_gitlab_remote_scan() -> Result<()> { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; @@ -328,6 +330,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { no_ignore_if_contains: false, view_report: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 87c3879..70114aa 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -141,6 +141,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_slack.rs b/tests/int_slack.rs index 2f9afd3..b3660af 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -146,6 +146,8 @@ impl TestContext { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; @@ -297,6 +299,8 @@ async fn test_scan_slack_messages() -> Result<()> { no_ignore_if_contains: false, view_report: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index 64e1cb5..7ba7909 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -221,6 +221,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 76e984f..8f058bf 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -164,6 +164,8 @@ impl TestContext { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, }; @@ -302,6 +304,8 @@ impl TestContext { no_inline_ignore: false, no_ignore_if_contains: false, validation_retries: 1, + validation_rps: None, + validation_rps_rule: Vec::new(), validation_timeout: 10, full_validation_response: false, };