diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..add9557 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,126 @@ +name: pypi-wheels + +on: + release: + types: [published] + workflow_dispatch: + inputs: + tag: + description: "Release tag to package (e.g., v1.2.3). Leave blank to use Cargo.toml." + required: false + type: string + +jobs: + build-wheels: + name: Build PyPI wheels + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + steps: + - uses: actions/checkout@v4 + + - name: Determine version/tag + id: version + shell: bash + run: | + set -euo pipefail + if [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then + TAG="${{ github.event.release.tag_name }}" + elif [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" && -n "${{ github.event.inputs.tag }}" ]]; then + TAG="${{ github.event.inputs.tag }}" + else + VERSION=$(grep -m1 '^version\s*=' Cargo.toml | cut -d '"' -f2) + TAG="v${VERSION}" + fi + VERSION="${TAG#v}" + echo "tag=${TAG}" >> "$GITHUB_OUTPUT" + echo "version=${VERSION}" >> "$GITHUB_OUTPUT" + + - name: Download release assets + env: + GH_TOKEN: ${{ github.token }} + run: | + mkdir -p release-assets + gh release download "${{ steps.version.outputs.tag }}" \ + -p "kingfisher-*.tgz" \ + -p "kingfisher-*.zip" \ + -D release-assets + + - name: Extract binaries + shell: bash + run: | + set -euo pipefail + mkdir -p extracted + for archive in release-assets/*; do + name=$(basename "$archive") + dir="extracted/${name%.*}" + mkdir -p "$dir" + case "$archive" in + *.tgz) + tar -xzf "$archive" -C "$dir" + ;; + *.zip) + unzip -q "$archive" -d "$dir" + ;; + *) + echo "Unknown archive: $archive" >&2 + exit 1 + ;; + esac + done + + mkdir -p extracted/bin + for bin in $(find extracted -type f \( -name "kingfisher" -o -name "kingfisher.exe" \)); do + chmod +x "$bin" || true + done + + - name: Install build tooling + run: python -m pip install --upgrade build + + - name: Build wheels + shell: bash + run: | + set -euo pipefail + version="${{ steps.version.outputs.version }}" + + linux_x64=$(find extracted -type f -name "kingfisher" | rg -m1 "linux-x64" -) + linux_arm64=$(find extracted -type f -name "kingfisher" | rg -m1 "linux-arm64" -) + mac_x64=$(find extracted -type f -name "kingfisher" | rg -m1 "darwin-x64" -) + mac_arm64=$(find extracted -type f -name "kingfisher" | rg -m1 "darwin-arm64" -) + win_x64=$(find extracted -type f -name "kingfisher.exe" | rg -m1 "windows-x64" -) + + scripts/build-pypi-wheel.sh \ + --binary "$linux_x64" \ + --version "$version" \ + --plat-name musllinux_1_2_x86_64 + + scripts/build-pypi-wheel.sh \ + --binary "$linux_arm64" \ + --version "$version" \ + --plat-name musllinux_1_2_aarch64 + + scripts/build-pypi-wheel.sh \ + --binary "$mac_x64" \ + --version "$version" \ + --plat-name macosx_10_9_x86_64 + + scripts/build-pypi-wheel.sh \ + --binary "$mac_arm64" \ + --version "$version" \ + --plat-name macosx_11_0_arm64 + + scripts/build-pypi-wheel.sh \ + --binary "$win_x64" \ + --version "$version" \ + --plat-name win_amd64 + + scripts/build-pypi-wheel.sh \ + --binary "$win_x64" \ + --version "$version" \ + --plat-name win_arm64 + + - name: Publish to PyPI (Trusted Publishing) + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: dist-pypi diff --git a/.gitignore b/.gitignore index ec3fdbb..f975797 100644 --- a/.gitignore +++ b/.gitignore @@ -212,4 +212,7 @@ fabric.properties rust-project.json .dockerignore -# End of https://www.toptal.com/developers/gitignore/api/intellij,rust-analyzer \ No newline at end of file +# End of https://www.toptal.com/developers/gitignore/api/intellij,rust-analyzer +# build artifacts +/dist-pypi/ +*.whl diff --git a/CHANGELOG.md b/CHANGELOG.md index b862682..55b67a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [v1.79.0] +- Added revocation support for SendGrid, Tailscale, MongoDB Atlas, Twilio, and NPM using multi-step (lookup ID then delete) pattern. +- Added new Sumo Logic rule with direct revocation support. +- Added `docs/TOKEN_REVOCATION_SUPPORT.md` with detailed revocation implementation guide and testing examples. +- Fixed AWS access key validation to support temporary/session keys (ASIA prefix) in addition to long-lived keys (AKIA prefix). +- Consolidated all validator implementations into the `kingfisher-scanner` crate to eliminate code duplication. Validators for AWS, Azure, Coinbase, GCP, JWT, JDBC, MongoDB, MySQL, Postgres, and HTTP are now maintained in a single location with proper feature gating. + ## [v1.78.0] - Added "Skipped Validations" counter to scan summary output to distinguish between validations that failed (HTTP errors, connection failures) and validations that were skipped due to missing preconditions (e.g., missing dependent rules). This provides better visibility into validation coverage for large scans. - Improved error messages for `kingfisher validate` command when rules require dependent variables from `depends_on` sections. Now clearly explains which variables are needed and from which dependent rules they are normally captured. diff --git a/Cargo.toml b/Cargo.toml index a7e79f6..133cf6e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ publish = false [package] name = "kingfisher" -version = "1.78.0" +version = "1.79.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -52,7 +52,7 @@ assets = [ # Library crates kingfisher-core = { path = "crates/kingfisher-core" } kingfisher-rules = { path = "crates/kingfisher-rules" } -kingfisher-scanner = { path = "crates/kingfisher-scanner" } +kingfisher-scanner = { path = "crates/kingfisher-scanner", features = ["validation-all"] } clap = { version = "4.5", features = [ "cargo", diff --git a/crates/kingfisher-rules/data/rules/mongodb.yml b/crates/kingfisher-rules/data/rules/mongodb.yml index cf775b1..fb10f65 100644 --- a/crates/kingfisher-rules/data/rules/mongodb.yml +++ b/crates/kingfisher-rules/data/rules/mongodb.yml @@ -51,6 +51,42 @@ rules: depends_on_rule: - rule_id: "kingfisher.mongodb.2" variable: PUBKEY + + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: Get the organization ID + - name: lookup_org_id + request: + method: GET + url: https://cloud.mongodb.com/api/atlas/v2/orgs + headers: + Accept: application/vnd.atlas.2023-02-01+json + Content-Type: application/json + digest: "{{ PUBKEY | append: ':' | append: TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + # Extract the first organization ID + ORG_ID: + type: JsonPath + path: "$.results[0].id" + + # Step 2: Delete the API key using the organization ID and public key + - name: delete_api_key + request: + method: DELETE + url: https://cloud.mongodb.com/api/atlas/v2/orgs/{{ ORG_ID }}/apiKeys/{{ PUBKEY }} + headers: + Accept: application/vnd.atlas.2023-02-01+json + digest: "{{ PUBKEY | append: ':' | append: TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] - name: MongoDB API PUBLIC Key id: kingfisher.mongodb.2 diff --git a/crates/kingfisher-rules/data/rules/npm.yml b/crates/kingfisher-rules/data/rules/npm.yml index 6132d80..a626205 100644 --- a/crates/kingfisher-rules/data/rules/npm.yml +++ b/crates/kingfisher-rules/data/rules/npm.yml @@ -37,7 +37,41 @@ rules: status: [200] - type: WordMatch words: ['"name":'] - url: https://registry.npmjs.org/-/npm/v1/user + url: https://registry.npmjs.org/-/npm/v1/user + + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: List all tokens to find the current token's key ID + - name: lookup_token_id + request: + method: GET + url: https://registry.npmjs.org/-/npm/v1/tokens + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + # Extract the first token's key + TOKEN_KEY: + type: JsonPath + path: "$.objects[0].token.key" + + # Step 2: Revoke the token using its key + - name: revoke_token + request: + method: DELETE + url: https://registry.npmjs.org/-/npm/v1/tokens/token/{{ TOKEN_KEY }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 204] - name: NPM Access Token (old format) id: kingfisher.npm.2 @@ -69,4 +103,38 @@ rules: status: [200] - type: WordMatch words: ['"name":'] - url: https://registry.npmjs.org/-/npm/v1/user \ No newline at end of file + url: https://registry.npmjs.org/-/npm/v1/user + + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: List all tokens to find the current token's key ID + - name: lookup_token_id + request: + method: GET + url: https://registry.npmjs.org/-/npm/v1/tokens + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + # Extract the first token's key + TOKEN_KEY: + type: JsonPath + path: "$.objects[0].token.key" + + # Step 2: Revoke the token using its key + - name: revoke_token + request: + method: DELETE + url: https://registry.npmjs.org/-/npm/v1/tokens/token/{{ TOKEN_KEY }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 204] \ No newline at end of file diff --git a/crates/kingfisher-rules/data/rules/sendgrid.yml b/crates/kingfisher-rules/data/rules/sendgrid.yml index 122c222..4383934 100644 --- a/crates/kingfisher-rules/data/rules/sendgrid.yml +++ b/crates/kingfisher-rules/data/rules/sendgrid.yml @@ -37,4 +37,40 @@ rules: status: [200] - type: WordMatch match_all_words: true - words: ['"reputation"', '"type"'] \ No newline at end of file + words: ['"reputation"', '"type"'] + + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: List all API keys to find the current key's ID + - name: lookup_api_key_id + request: + method: GET + url: https://api.sendgrid.com/v3/api_keys + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + # Extract the first API key ID from the list + # Note: SendGrid only shows partial keys, so we extract the first one + # which should be the current token if there's only one active key + API_KEY_ID: + type: JsonPath + path: "$.result[0].api_key_id" + + # Step 2: Delete the API key using its ID + - name: delete_api_key + request: + method: DELETE + url: https://api.sendgrid.com/v3/api_keys/{{ API_KEY_ID }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] \ No newline at end of file diff --git a/crates/kingfisher-rules/data/rules/sourcegraph.yml b/crates/kingfisher-rules/data/rules/sourcegraph.yml index a8838de..431e7c6 100644 --- a/crates/kingfisher-rules/data/rules/sourcegraph.yml +++ b/crates/kingfisher-rules/data/rules/sourcegraph.yml @@ -2,10 +2,10 @@ rules: - name: Sourcegraph Access Token id: kingfisher.sourcegraph.1 pattern: | - (?xi) + (?x) \b ( - sgp_(?:[A-F0-9]{16}|local)_[A-F0-9]{40}|sgp_[A-F0-9]{40} + sgp_(?:[a-fA-F0-9]{16}|local)_[a-fA-F0-9]{40}|sgp_[a-fA-F0-9]{40} ) \b pattern_requirements: diff --git a/crates/kingfisher-rules/data/rules/sumologic.yml b/crates/kingfisher-rules/data/rules/sumologic.yml new file mode 100644 index 0000000..6942a6a --- /dev/null +++ b/crates/kingfisher-rules/data/rules/sumologic.yml @@ -0,0 +1,78 @@ +rules: + - name: Sumo Logic Access ID + id: kingfisher.sumologic.1 + pattern: | + (?xi) + \b + sumo + (?:.|[\n\r]){0,32}? + (?:access|id) + (?:.|[\n\r]){0,16}? + \b + ( + su[A-Za-z0-9]{10,14} + ) + \b + pattern_requirements: + min_digits: 2 + min_entropy: 3.5 + confidence: medium + visible: false + examples: + - 'config.sumologic.access.id = "suK9mP2nQ7rT4wX8"' + + - name: Sumo Logic Access Key + id: kingfisher.sumologic.2 + pattern: | + (?xi) + \b + sumo + (?:.|[\n\r]){0,32}? + (?:SECRET|PRIVATE|ACCESS|KEY|TOKEN) + (?:.|[\n\r]){0,32}? + \b + ( + [A-Za-z0-9]{62,64} + ) + \b + pattern_requirements: + min_digits: 2 + min_uppercase: 2 + min_lowercase: 2 + min_entropy: 3.5 + confidence: medium + examples: + - '// SumoLogic Private Token: M7nP4qR2tV9wX5yZ8aB1cD3eF5gH7iJ9kL2mN4oP6qR8sT0uV2wX4yZ6aB8cD0eF' + references: + - https://help.sumologic.com/docs/manage/security/access-keys/ + + validation: + type: Http + content: + request: + method: GET + url: https://api.sumologic.com/api/v1/accessKeys + headers: + Accept: application/json + Authorization: "Basic {{ ACCESS_ID | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] + - type: JsonValid + depends_on_rule: + - rule_id: "kingfisher.sumologic.1" + variable: ACCESS_ID + + revocation: + type: Http + content: + request: + method: DELETE + url: https://api.sumologic.com/api/v1/accessKeys/{{ ACCESS_ID }} + headers: + Authorization: "Basic {{ ACCESS_ID | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] diff --git a/crates/kingfisher-rules/data/rules/tailscale.yml b/crates/kingfisher-rules/data/rules/tailscale.yml index 08fa9eb..8eb8997 100644 --- a/crates/kingfisher-rules/data/rules/tailscale.yml +++ b/crates/kingfisher-rules/data/rules/tailscale.yml @@ -30,3 +30,37 @@ rules: - report_response: true - type: StatusMatch status: [200] + + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: List all keys to find the current key's ID + - name: lookup_key_id + request: + method: GET + url: https://api.tailscale.com/api/v2/tailnet/-/keys + headers: + Authorization: "Bearer {{ TOKEN }}" + Accept: application/json + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + # Extract the first key ID from the list + KEY_ID: + type: JsonPath + path: "$.keys[0].id" + + # Step 2: Delete the key using its ID + - name: delete_key + request: + method: DELETE + url: https://api.tailscale.com/api/v2/tailnet/-/keys/{{ KEY_ID }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200] diff --git a/crates/kingfisher-rules/data/rules/twilio.yml b/crates/kingfisher-rules/data/rules/twilio.yml index 14c5b0e..b626499 100644 --- a/crates/kingfisher-rules/data/rules/twilio.yml +++ b/crates/kingfisher-rules/data/rules/twilio.yml @@ -60,4 +60,40 @@ rules: url: https://api.twilio.com/2010-04-01/Accounts.json depends_on_rule: - rule_id: "kingfisher.twilio.1" - variable: TWILIOID \ No newline at end of file + variable: TWILIOID + + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: Get the Account SID (needed if TWILIOID is an API Key starting with SK) + - name: lookup_account_sid + request: + method: GET + url: https://api.twilio.com/2010-04-01/Accounts.json + headers: + Accept: application/json + Authorization: "Basic {{ TWILIOID | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + # Extract the first account SID + ACCOUNT_SID: + type: JsonPath + path: "$.accounts[0].sid" + + # Step 2: Delete the API key using the TWILIOID (should be SK...) and ACCOUNT_SID + # Note: This assumes TWILIOID is an API Key SID (SK...). If it's an Account SID (AC...), + # this will fail, but you typically don't delete account SIDs. + - name: delete_api_key + request: + method: DELETE + url: https://api.twilio.com/2010-04-01/Accounts/{{ ACCOUNT_SID }}/Keys/{{ TWILIOID }}.json + headers: + Authorization: "Basic {{ ACCOUNT_SID | append: ':' | append: TOKEN | b64enc }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] \ No newline at end of file diff --git a/crates/kingfisher-rules/src/lib.rs b/crates/kingfisher-rules/src/lib.rs index 9eea7f1..cd6ecb9 100644 --- a/crates/kingfisher-rules/src/lib.rs +++ b/crates/kingfisher-rules/src/lib.rs @@ -16,10 +16,11 @@ pub mod rules_database; // Re-export rule types pub use rule::{ - ChecksumActual, ChecksumRequirement, Confidence, DependsOnRule, HttpRequest, HttpValidation, - MultipartConfig, MultipartPart, PatternRequirementContext, PatternRequirements, - PatternValidationResult, ReportResponseData, ResponseMatcher, Revocation, Rule, RuleSyntax, - TlsMode, Validation, RULE_COMMENTS_PATTERN, + ChecksumActual, ChecksumRequirement, Confidence, DependsOnRule, HttpMultiStepRevocation, + HttpRequest, HttpValidation, MultipartConfig, MultipartPart, PatternRequirementContext, + PatternRequirements, PatternValidationResult, ReportResponseData, ResponseExtractor, + ResponseMatcher, Revocation, RevocationStep, Rule, RuleSyntax, TlsMode, Validation, + RULE_COMMENTS_PATTERN, }; // Re-export Rules collection diff --git a/crates/kingfisher-rules/src/rule.rs b/crates/kingfisher-rules/src/rule.rs index ee8c5fc..1d22899 100644 --- a/crates/kingfisher-rules/src/rule.rs +++ b/crates/kingfisher-rules/src/rule.rs @@ -88,6 +88,61 @@ pub enum Revocation { AWS, GCP, Http(HttpValidation), + /// Multi-step HTTP revocation (up to 2 steps). + /// Some services require looking up an ID before deletion. + HttpMultiStep(HttpMultiStepRevocation), +} + +/// Configuration for multi-step HTTP revocation. +/// +/// This allows up to 2 steps where the first step can extract values +/// (e.g., an ID) from its response, which are then used in the second step. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct HttpMultiStepRevocation { + /// Sequential steps to execute (minimum 1, maximum 2). + pub steps: Vec, +} + +/// A single step in a multi-step revocation process. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +pub struct RevocationStep { + /// Human-readable name for this step (e.g., "lookup_id", "delete"). + #[serde(default)] + pub name: Option, + + /// HTTP request configuration for this step. + pub request: HttpRequest, + + /// Optional multipart configuration for this step. + #[serde(default)] + pub multipart: Option, + + /// Variables to extract from the response for use in subsequent steps. + /// Keys are variable names (uppercase), values are extraction patterns. + #[serde(default)] + pub extract: Option>, +} + +/// Describes how to extract a value from an HTTP response. +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash, Clone)] +#[serde(tag = "type")] +pub enum ResponseExtractor { + /// Extract from JSON response using a JSONPath-like syntax. + /// Example: "$.data.id" or "$.items[0].token_id" + JsonPath { path: String }, + + /// Extract using a regex pattern with a capture group. + /// The first capture group is used as the extracted value. + Regex { pattern: String }, + + /// Extract an HTTP response header value. + Header { name: String }, + + /// Extract the entire response body as-is. + Body, + + /// Extract the HTTP status code as a string. + StatusCode, } /// Specifies that a rule depends on a variable from another rule. @@ -561,14 +616,7 @@ pub struct RuleSyntax { /// Optional character type requirements for matched secrets. #[serde(default)] pub pattern_requirements: Option, - /// TLS validation mode for this rule's validation requests. - /// - /// When set to `Lax`, the rule opts into relaxed TLS validation - /// (accepting self-signed/unknown CA certs) when the user enables - /// `--tls-mode=lax` on the command line. - /// - /// This is useful for rules that validate against endpoints commonly - /// using self-signed certificates, such as database connections. + /// Optional TLS mode for validation connections. #[serde(default)] pub tls_mode: Option, } @@ -749,11 +797,7 @@ impl Rule { self.syntax.pattern_requirements.as_ref() } - /// Returns the TLS validation mode for this rule, if specified. - /// - /// When a rule returns `Some(TlsMode::Lax)`, it indicates the rule - /// is eligible for relaxed TLS validation when the user enables - /// `--tls-mode=lax` on the command line. + /// Returns the TLS mode for this rule, if specified. pub fn tls_mode(&self) -> Option { self.syntax.tls_mode } @@ -1053,104 +1097,4 @@ mod tests { assert!(matches!(reqs.validate(b"123", None, true), PatternValidationResult::Passed)); assert!(matches!(reqs.validate(b"!@#", None, true), PatternValidationResult::Passed)); } - - #[test] - fn tls_mode_default_is_strict() { - assert_eq!(TlsMode::default(), TlsMode::Strict); - } - - #[test] - fn tls_mode_serializes_to_lowercase() { - assert_eq!(serde_yaml::to_string(&TlsMode::Strict).unwrap().trim(), "strict"); - assert_eq!(serde_yaml::to_string(&TlsMode::Lax).unwrap().trim(), "lax"); - assert_eq!(serde_yaml::to_string(&TlsMode::Off).unwrap().trim(), "off"); - } - - #[test] - fn tls_mode_deserializes_from_lowercase() { - let strict: TlsMode = serde_yaml::from_str("strict").unwrap(); - assert_eq!(strict, TlsMode::Strict); - - let lax: TlsMode = serde_yaml::from_str("lax").unwrap(); - assert_eq!(lax, TlsMode::Lax); - - let off: TlsMode = serde_yaml::from_str("off").unwrap(); - assert_eq!(off, TlsMode::Off); - } - - #[derive(serde::Deserialize)] - struct TestRules { - rules: Vec, - } - - #[test] - fn rule_syntax_parses_tls_mode_from_yaml() { - let yaml = r#" -rules: - - name: Test Rule - id: test.rule.1 - pattern: "test" - tls_mode: lax -"#; - let parsed: TestRules = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(parsed.rules.len(), 1); - assert_eq!(parsed.rules[0].tls_mode, Some(TlsMode::Lax)); - } - - #[test] - fn rule_syntax_tls_mode_defaults_to_none_when_missing() { - let yaml = r#" -rules: - - name: Test Rule - id: test.rule.1 - pattern: "test" -"#; - let parsed: TestRules = serde_yaml::from_str(yaml).unwrap(); - assert_eq!(parsed.rules.len(), 1); - assert_eq!(parsed.rules[0].tls_mode, None); - } - - #[test] - fn rule_tls_mode_method_returns_syntax_value() { - let rule = Rule::new(RuleSyntax { - name: "Test".to_string(), - id: "test.1".to_string(), - pattern: "test".to_string(), - min_entropy: 0.0, - confidence: Confidence::Low, - visible: true, - examples: vec![], - negative_examples: vec![], - references: vec![], - validation: None, - revocation: None, - depends_on_rule: vec![], - pattern_requirements: None, - tls_mode: Some(TlsMode::Lax), - }); - - assert_eq!(rule.tls_mode(), Some(TlsMode::Lax)); - } - - #[test] - fn rule_tls_mode_method_returns_none_when_not_set() { - let rule = Rule::new(RuleSyntax { - name: "Test".to_string(), - id: "test.1".to_string(), - pattern: "test".to_string(), - min_entropy: 0.0, - confidence: Confidence::Low, - visible: true, - examples: vec![], - negative_examples: vec![], - references: vec![], - validation: None, - revocation: None, - depends_on_rule: vec![], - pattern_requirements: None, - tls_mode: None, - }); - - assert_eq!(rule.tls_mode(), None); - } } diff --git a/crates/kingfisher-scanner/Cargo.toml b/crates/kingfisher-scanner/Cargo.toml index 563ab7e..edc9e84 100644 --- a/crates/kingfisher-scanner/Cargo.toml +++ b/crates/kingfisher-scanner/Cargo.toml @@ -31,6 +31,7 @@ validation-aws = [ "validation-http", "dep:aws-config", "dep:aws-credential-types", + "dep:aws-sdk-iam", "dep:aws-sdk-sts", "dep:aws-types", "dep:aws-smithy-http-client", @@ -41,10 +42,67 @@ validation-aws = [ "dep:rand", ] +# Azure credential validation +validation-azure = [ + "validation-http", + "dep:chrono", + "dep:hmac", + "dep:sha2", +] + +# Coinbase credential validation +validation-coinbase = [ + "validation-http", + "dep:chrono", + "dep:ed25519-dalek", + "dep:p256", + "dep:rand", + "dep:hex", +] + +# GCP credential validation +validation-gcp = [ + "validation-http", + "dep:chrono", + "dep:pem", + "dep:percent-encoding", + "dep:ring", + "dep:tokio", +] + +# JWT validation +validation-jwt = [ + "validation-http", + "dep:chrono", + "dep:ipnet", + "dep:jsonwebtoken", + "dep:serde", + "dep:tokio", +] + +# Database validation (MongoDB/MySQL/Postgres/JDBC) +validation-database = [ + "validation-http", + "dep:bson", + "dep:mongodb", + "dep:mysql_async", + "dep:tokio-postgres", + "dep:tokio-postgres-rustls", + "dep:rustls", + "dep:rustls-native-certs", + "dep:url", + "dep:sha1", +] + # All validation features validation-all = [ "validation", "validation-aws", + "validation-azure", + "validation-coinbase", + "validation-gcp", + "validation-jwt", + "validation-database", ] [dependencies] @@ -57,7 +115,7 @@ anyhow = "1.0" thiserror = "1.0" # Serialization -serde = { version = "1.0", features = ["derive"] } +serde = { version = "1.0", features = ["derive"], optional = true } serde_json = "1.0" schemars = "0.8" @@ -78,6 +136,7 @@ rustc-hash = "2.1" parking_lot = "0.12" thread_local = "1.1" once_cell = "1.21" +crossbeam-skiplist = "0.1.3" # HTTP status codes http = "1.4" @@ -102,10 +161,30 @@ liquid = { version = "0.26", optional = true } liquid-core = { version = "0.26", optional = true } quick-xml = { version = "0.38", features = ["serde", "serialize"], optional = true } sha1 = { version = "0.10", optional = true } +chrono = { version = "0.4.42", optional = true } +hmac = { version = "0.12", optional = true } +sha2 = { version = "0.10", optional = true } +pem = { version = "3.0.6", optional = true } +percent-encoding = { version = "2.3.2", optional = true } +ring = { version = "0.17", optional = true } +ipnet = { version = "2.11", optional = true } +jsonwebtoken = { version = "10.2.0", features = ["aws-lc-rs"], optional = true } +p256 = { version = "0.13.2", optional = true } +ed25519-dalek = { version = "2.2", features = ["pkcs8"], optional = true } +hex = { version = "0.4.3", optional = true } +url = { version = "2.5.7", optional = true } +bson = { version = "2.15.0", optional = true } +mongodb = { version = "3.4", default-features = false, features = ["rustls-tls", "aws-auth", "compat-3-0-0", "dns-resolver"], optional = true } +mysql_async = { version = "0.34.2", default-features = false, features = ["default-rustls"], optional = true } +tokio-postgres = { version = "0.7", default-features = false, features = ["runtime"], optional = true } +tokio-postgres-rustls = { version = "0.13.0", optional = true } +rustls = { version = "0.23.35", optional = true } +rustls-native-certs = { version = "0.8.2", optional = true } # AWS validation aws-config = { version = "1.8", optional = true } aws-credential-types = { version = "1.2", optional = true } +aws-sdk-iam = { version = "1.101.0", optional = true } aws-sdk-sts = { version = "1.95", optional = true } aws-types = { version = "1.3", optional = true } aws-smithy-http-client = { version = "1.1", optional = true } diff --git a/crates/kingfisher-scanner/src/validation/aws.rs b/crates/kingfisher-scanner/src/validation/aws.rs index 928d86a..dec8ac3 100644 --- a/crates/kingfisher-scanner/src/validation/aws.rs +++ b/crates/kingfisher-scanner/src/validation/aws.rs @@ -8,6 +8,10 @@ use std::{collections::HashSet, sync::RwLock, time::Duration}; use anyhow::{anyhow, Result}; use aws_config::{retry::RetryConfig, BehaviorVersion, SdkConfig}; use aws_credential_types::Credentials; +use aws_sdk_iam::{ + config::Builder as IamConfigBuilder, error::SdkError as IamSdkError, + operation::update_access_key::UpdateAccessKeyError, types::StatusType, Client as IamClient, +}; use aws_sdk_sts::{ config::Builder as StsConfigBuilder, error::SdkError, operation::get_caller_identity::GetCallerIdentityError, Client as StsClient, @@ -188,18 +192,24 @@ pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &s /// Validate AWS credentials format before attempting validation. pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> Result<(), String> { - // Validate access key ID format (typically starts with "AKIA" and is 20 chars) - if !access_key_id.starts_with("AKIA") || access_key_id.len() != 20 { + // Validate access key ID format (20 chars, known AWS prefixes including STS) + if access_key_id.len() != 20 { + return Err("Invalid AWS access key ID format".to_string()); + } + if !access_key_id.chars().all(|c| c.is_ascii_alphanumeric()) { + return Err("AWS access key ID contains invalid characters".to_string()); + } + let prefix = &access_key_id[..4]; + let valid_prefix = + matches!(prefix, "AKIA" | "AGPA" | "AIDA" | "AROA" | "AIPA" | "ANPA" | "ANVA" | "ASIA") + || prefix.starts_with("A3T"); + if !valid_prefix { return Err("Invalid AWS access key ID format".to_string()); } // Validate secret key format (should be at least 40 chars) if secret_key.len() < 40 { return Err("Invalid AWS secret key format".to_string()); } - // Check for invalid characters - if !access_key_id.chars().all(|c| c.is_ascii_alphanumeric()) { - return Err("AWS access key ID contains invalid characters".to_string()); - } Ok(()) } @@ -222,6 +232,84 @@ fn is_throttling_or_transient(e: &SdkError) -> bool { } } +fn is_iam_throttling_or_transient(e: &IamSdkError) -> bool { + match e { + IamSdkError::ServiceError(ctx) => { + let code = ctx.err().meta().code().unwrap_or_default(); + let status: StatusCode = ctx.raw().status().into(); + code.contains("Throttl") + || status == StatusCode::TOO_MANY_REQUESTS + || status == StatusCode::SERVICE_UNAVAILABLE + } + IamSdkError::DispatchFailure(df) => df.is_timeout() || df.is_io(), + IamSdkError::ResponseError(ctx) => { + let status: StatusCode = ctx.raw().status().into(); + status == StatusCode::TOO_MANY_REQUESTS || status == StatusCode::SERVICE_UNAVAILABLE + } + _ => false, + } +} + +/// Revoke (deactivate) an AWS access key via IAM. +pub async fn revoke_aws_access_key( + aws_access_key_id: &str, + aws_secret_access_key: &str, +) -> Result<(bool, String)> { + // Create static credentials + let credentials = Credentials::new( + aws_access_key_id, + aws_secret_access_key, + None, // session token + None, // expiry + "static", // provider name + ); + let config = build_base_config(credentials).await; + + // Create IAM client + let iam_config = IamConfigBuilder::from(&config).interceptor(UaInterceptor).build(); + let iam_client = IamClient::from_conf(iam_config); + + const MAX_ATTEMPTS: usize = 3; + const ATTEMPT_TIMEOUT: Duration = Duration::from_secs(5); + + for attempt in 1..=MAX_ATTEMPTS { + let result = timeout( + ATTEMPT_TIMEOUT, + iam_client + .update_access_key() + .access_key_id(aws_access_key_id) + .status(StatusType::Inactive) + .send(), + ) + .await; + + match result { + Ok(Ok(_)) => { + return Ok((true, "AWS access key set to Inactive".to_string())); + } + Ok(Err(e)) => { + if is_iam_throttling_or_transient(&e) { + if attempt == MAX_ATTEMPTS { + return Err(anyhow!("AWS revocation failed: {}", e)); + } + } else { + return Ok((false, e.to_string())); + } + } + Err(_) => { + if attempt == MAX_ATTEMPTS { + return Err(anyhow!("AWS revocation timed out")); + } + } + } + let max_delay = 100u64 * 2u64.pow((attempt - 1) as u32); + let sleep_ms = rng().random_range(0..=max_delay); + sleep(Duration::from_millis(sleep_ms)).await; + } + + Err(anyhow!("AWS revocation failed")) +} + /// Validate AWS credentials by calling STS GetCallerIdentity. /// /// Returns `(is_valid, message)` where message is the ARN on success or an error message. diff --git a/src/validation/azure.rs b/crates/kingfisher-scanner/src/validation/azure.rs similarity index 81% rename from src/validation/azure.rs rename to crates/kingfisher-scanner/src/validation/azure.rs index 915ee6d..f330811 100644 --- a/src/validation/azure.rs +++ b/crates/kingfisher-scanner/src/validation/azure.rs @@ -10,9 +10,8 @@ use reqwest::{header::HeaderValue, Client}; use serde_json::Value as JsonValue; use sha2::Sha256; -use crate::{ - validation::{Cache, CachedResponse, ValidationResponseBody, VALIDATION_CACHE_SECONDS}, - validation_body, +use super::{ + validation_body, Cache, CachedResponse, ValidationResponseBody, VALIDATION_CACHE_SECONDS, }; pub fn generate_azure_cache_key(azure_json: &str) -> String { @@ -22,14 +21,13 @@ pub fn generate_azure_cache_key(azure_json: &str) -> String { format!("AZURE:{:x}", h.finalize()) } -/// Validate Azure Storage credentials without Azure SDK crates +/// Validate Azure Storage credentials without Azure SDK crates. pub async fn validate_azure_storage_credentials( azure_json: &str, cache: &Cache, ) -> Result<(bool, ValidationResponseBody)> { let cache_key = generate_azure_cache_key(azure_json); - /* ── short-circuit cached result ───────────────────────────── */ if let Some(e) = cache.get(&cache_key) { let c = e.value(); if c.timestamp.elapsed() < Duration::from_secs(VALIDATION_CACHE_SECONDS) { @@ -37,7 +35,6 @@ pub async fn validate_azure_storage_credentials( } } - /* ── pull account + key from caller JSON ──────────────────── */ let tok: JsonValue = serde_json::from_str(azure_json)?; let storage_account = tok["storage_account"].as_str().unwrap_or(""); let storage_key = tok["storage_key"].as_str().unwrap_or(""); @@ -48,12 +45,10 @@ pub async fn validate_azure_storage_credentials( return Ok((false, msg)); } - /* ── build SignedKey GET /?comp=list ──────────────────────── */ let now_rfc = Utc::now().format("%a, %d %b %Y %H:%M:%S GMT").to_string(); let url = format!("https://{account}.blob.core.windows.net/?comp=list", account = storage_account); - // canonical string-to-sign per MSFT docs . let canon_headers = format!("x-ms-date:{now_rfc}\nx-ms-version:2023-11-03\n"); let canon_resource = format!("/{account}/\ncomp:list", account = storage_account); let string_to_sign = format!( @@ -62,7 +57,6 @@ pub async fn validate_azure_storage_credentials( resource = canon_resource ); - // HMAC-SHA256 -- Base64 let key_bytes = b64.decode(storage_key)?; let mut mac = Hmac::::new_from_slice(&key_bytes).map_err(|_| anyhow!("invalid key length"))?; @@ -84,7 +78,6 @@ pub async fn validate_azure_storage_credentials( let client = Client::builder().build()?; let resp = client.get(&url).headers(hdrs).send().await?; - /* ── capture status before `.text()` consumes resp ────────── */ let status = resp.status(); let body_txt = resp.text().await?; @@ -95,7 +88,6 @@ pub async fn validate_azure_storage_credentials( return Err(anyhow!(body)); } - // parse XML payload let mut reader = Reader::from_str(&body_txt); reader.config_mut().trim_text(true); let mut buf = Vec::new(); @@ -114,7 +106,6 @@ pub async fn validate_azure_storage_credentials( buf.clear(); } - /* ── success ─────────────────────────────────────────────── */ let body = format!("Account: {}; Containers: {:?}", storage_account, names); let body_opt = validation_body::from_string(body); cache.insert(cache_key, CachedResponse::new(body_opt.clone(), StatusCode::OK, true)); diff --git a/src/validation/coinbase.rs b/crates/kingfisher-scanner/src/validation/coinbase.rs similarity index 94% rename from src/validation/coinbase.rs rename to crates/kingfisher-scanner/src/validation/coinbase.rs index c3f2873..e4d135c 100644 --- a/src/validation/coinbase.rs +++ b/crates/kingfisher-scanner/src/validation/coinbase.rs @@ -15,11 +15,9 @@ use rand::TryRngCore; use reqwest::{Client, StatusCode, Url}; use sha1::{Digest, Sha1}; -use crate::{ - validation::{ - httpvalidation, Cache, CachedResponse, ValidationResponseBody, VALIDATION_CACHE_SECONDS, - }, - validation_body, +use super::http_validation as httpvalidation; +use super::{ + validation_body, Cache, CachedResponse, ValidationResponseBody, VALIDATION_CACHE_SECONDS, }; pub fn generate_coinbase_cache_key(cred_name: &str, private_key: &str) -> String { @@ -89,7 +87,6 @@ fn build_jwt( let _ = rng.try_fill_bytes(&mut nonce); - // Try ECDSA (PEM encoded EC key). Fallback to raw Ed25519 base64 key. if let Ok(secret_key) = SecretKey::from_sec1_pem(&pem).or_else(|_| SecretKey::from_pkcs8_pem(&pem)) { @@ -118,7 +115,6 @@ fn build_jwt( return Ok(format!("{signing_input}.{sig_b64}")); } else { - // Assume base64-encoded Ed25519 keypair let key_bytes = base64::engine::general_purpose::STANDARD .decode(pem.as_bytes()) .map_err(|e| anyhow!("invalid base64 key: {e}"))?; diff --git a/src/validation/gcp.rs b/crates/kingfisher-scanner/src/validation/gcp.rs similarity index 97% rename from src/validation/gcp.rs rename to crates/kingfisher-scanner/src/validation/gcp.rs index 2dc8f56..0cbc5ae 100644 --- a/src/validation/gcp.rs +++ b/crates/kingfisher-scanner/src/validation/gcp.rs @@ -1,6 +1,5 @@ use std::sync::Arc; -use crate::validation::GLOBAL_USER_AGENT; use anyhow::{anyhow, Result}; use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; use chrono::{Duration as ChronoDuration, Utc}; @@ -13,6 +12,8 @@ use serde_json::Value as JsonValue; use tokio::sync::Semaphore; use tracing::debug; +use super::GLOBAL_USER_AGENT; + static GLOBAL_VALIDATOR: OnceCell = OnceCell::new(); pub struct GcpValidator { @@ -52,7 +53,6 @@ impl GcpValidator { let _permit = self.semaphore.acquire().await?; let token_info: JsonValue = serde_json::from_str(gcp_json)?; - // Extract required fields. let project_id = token_info["project_id"].as_str().unwrap_or("").to_string(); let client_email = token_info["client_email"].as_str().unwrap_or("").to_string(); let private_key = token_info["private_key"].as_str().unwrap_or("").to_string(); @@ -185,7 +185,6 @@ impl GcpValidator { let iat = now.timestamp(); let exp = (now + ChronoDuration::hours(1)).timestamp(); - // JWT Header and Claims. let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"RS256","typ":"JWT"}"#); let claims = format!( r#"{{ @@ -200,12 +199,10 @@ impl GcpValidator { let claims_encoded = URL_SAFE_NO_PAD.encode(claims); let message = format!("{}.{}", header, claims_encoded); - // Parse PEM and create RSA key pair. let pem = parse(private_key_pem).map_err(|e| anyhow!("Failed to parse PEM: {}", e))?; let key_pair = signature::RsaKeyPair::from_pkcs8(&pem.contents()) .map_err(|_| anyhow!("Invalid RSA private key"))?; - // Sign the message. let rng = rand::SystemRandom::new(); let mut signature = vec![0; key_pair.public().modulus_len()]; key_pair diff --git a/crates/kingfisher-scanner/src/validation/http_validation.rs b/crates/kingfisher-scanner/src/validation/http_validation.rs index 1ddb959..312f395 100644 --- a/crates/kingfisher-scanner/src/validation/http_validation.rs +++ b/crates/kingfisher-scanner/src/validation/http_validation.rs @@ -1,11 +1,4 @@ -//! HTTP-based credential validation. -//! -//! This module provides utilities for validating credentials via HTTP requests. - -use std::collections::BTreeMap; -use std::future::Future; -use std::str::FromStr; -use std::time::Duration; +use std::{collections::BTreeMap, future::Future, str::FromStr, time::Duration}; use anyhow::{anyhow, Error, Result}; use http::StatusCode; @@ -25,11 +18,6 @@ use super::GLOBAL_USER_AGENT; use kingfisher_rules::ResponseMatcher; /// Build a deterministic cache key from the immutable parts of an HTTP request. -/// -/// * `method` – case-insensitive HTTP verb ("GET", "POST"…) -/// * `url` – fully-qualified URL (any query string should already be present) -/// * `headers` – *logical* headers you intend to send (template-rendered) -/// * `body` – optional request body pub fn generate_http_cache_key_parts( method: &str, url: &Url, @@ -45,7 +33,6 @@ pub fn generate_http_cache_key_parts( hasher.update(url.as_bytes()); hasher.update(b"\0"); - // Collect headers sorted lexicographically (BTreeMap is already sorted) for (k, v) in headers { hasher.update(k.as_bytes()); hasher.update(b":"); @@ -53,7 +40,6 @@ pub fn generate_http_cache_key_parts( hasher.update(b"\0"); } - // Include the request body in the cache key if present if let Some(b) = body { hasher.update(b"BODY\0"); hasher.update(b.as_bytes()); @@ -87,7 +73,6 @@ pub fn build_request_builder( let custom_headers = process_headers(headers, parser, globals, url) .map_err(|e| format!("Error processing headers: {}", e))?; - // Prepare a standard set of headers let user_agent = GLOBAL_USER_AGENT.as_str(); let standard_headers = [ (header::USER_AGENT, user_agent), @@ -99,7 +84,6 @@ pub fn build_request_builder( (header::ACCEPT_ENCODING, "gzip, deflate, br"), (header::CONNECTION, "keep-alive"), ]; - let mut combined_headers = HeaderMap::new(); for (name, value) in &standard_headers { if let Ok(hv) = HeaderValue::from_str(value) { @@ -111,7 +95,6 @@ pub fn build_request_builder( } request_builder = request_builder.headers(combined_headers); - // If a body template is provided, parse and render it if let Some(body_template) = body { let template = parser .parse(body_template) @@ -157,7 +140,6 @@ pub fn process_headers( let cleaned_key = key.trim().replace(&['\n', '\r'][..], ""); let cleaned_value = header_value.trim().replace(&['\n', '\r'][..], ""); - let name = match HeaderName::from_str(&cleaned_key) { Ok(n) => n, Err(e) => { @@ -170,7 +152,6 @@ pub fn process_headers( continue; } }; - let value = match HeaderValue::from_str(&cleaned_value) { Ok(v) => v, Err(e) => { @@ -188,7 +169,7 @@ pub fn process_headers( Ok(headers_map) } -/// Exponential-backoff retry helper. +/// Exponential‐backoff retry helper that always returns `Result`. async fn retry_with_backoff( mut operation: F, is_retryable: impl Fn(&Result, usize) -> bool, @@ -216,7 +197,6 @@ where Err(anyhow!("Max retries reached")) } -/// Retry a multipart request with exponential backoff. pub async fn retry_multipart_request( mut build_request: F, max_retries: usize, @@ -256,7 +236,6 @@ where .await } -/// Retry an HTTP request with exponential backoff. pub async fn retry_request( request_builder: RequestBuilder, max_retries: u32, @@ -414,42 +393,3 @@ pub async fn check_url_resolvable(url: &Url) -> Result<(), Box String { } /// Validate a JDBC connection string by dispatching to the supported backend validators. -/// -/// # Arguments -/// * `jdbc_conn` - The JDBC connection string to validate -/// * `lax_tls` - If true, accept self-signed or invalid certificates pub async fn validate_jdbc(jdbc_conn: &str, lax_tls: bool) -> Result { let trimmed = jdbc_conn.trim(); if !trimmed.to_ascii_lowercase().starts_with("jdbc:") { @@ -90,14 +86,12 @@ fn normalize_postgres_url(subname: &str) -> Result { return Err(anyhow!("Postgres JDBC connection string is empty")); } - // First try parsing using the standard JDBC layout, otherwise fall back to a canonical URL. let candidate = format!("postgresql:{}", trimmed); let mut url = Url::parse(&candidate).or_else(|_| { let fallback = format!("postgresql://{}", trimmed.trim_start_matches('/')); Url::parse(&fallback) })?; - // Extract credentials from the query string when they are present. let mut user = None; let mut password = None; if url.query().is_some() { @@ -129,30 +123,3 @@ fn normalize_postgres_url(subname: &str) -> Result { Ok(url.to_string()) } - -#[cfg(test)] -mod tests { - use super::normalize_postgres_url; - use pretty_assertions::assert_eq; - - #[test] - fn normalizes_postgres_query_credentials() { - let normalized = normalize_postgres_url( - "//db.example.com:5432/app?user=admin&password=s3cr3t&sslmode=require", - ) - .unwrap(); - assert_eq!(normalized, "postgresql://admin:s3cr3t@db.example.com:5432/app?sslmode=require"); - } - - #[test] - fn preserves_existing_credentials() { - let normalized = - normalize_postgres_url("//db.example.com:5432/app?sslmode=prefer").unwrap(); - assert_eq!(normalized, "postgresql://db.example.com:5432/app?sslmode=prefer"); - } - - #[test] - fn rejects_empty_input() { - assert!(normalize_postgres_url("").is_err()); - } -} diff --git a/src/validation/jwt.rs b/crates/kingfisher-scanner/src/validation/jwt.rs similarity index 58% rename from src/validation/jwt.rs rename to crates/kingfisher-scanner/src/validation/jwt.rs index 3661386..4900512 100644 --- a/src/validation/jwt.rs +++ b/crates/kingfisher-scanner/src/validation/jwt.rs @@ -10,11 +10,9 @@ use reqwest::{redirect::Policy, Client, Url}; use serde::Deserialize; use tokio::net::lookup_host; -use super::utils::check_url_resolvable; +use super::http_validation::check_url_resolvable; /// Global redirect-free client with strict TLS validation. -/// Building a `Client` is comparatively expensive; re-using it lets reqwest -/// share its internal connection pool and TLS sessions across JWT validations. static STRICT_CLIENT: Lazy = Lazy::new(|| { Client::builder() .redirect(Policy::none()) @@ -32,7 +30,6 @@ static LAX_CLIENT: Lazy = Lazy::new(|| { .expect("failed to build lax Client") }); -/// Get the appropriate client based on TLS mode. fn get_client(lax_tls: bool) -> &'static Client { if lax_tls { &LAX_CLIENT @@ -41,16 +38,10 @@ fn get_client(lax_tls: bool) -> &'static Client { } } -/// RFC 1918 + loopback + link-local nets we refuse to contact -const BLOCKED_NETS: &[&str] = &[ - "10.0.0.0/8", - "172.16.0.0/12", - "192.168.0.0/16", // private - "127.0.0.0/8", - "169.254.0.0/16", // loopback / link-local -]; +/// RFC 1918 + loopback + link-local nets we refuse to contact. +const BLOCKED_NETS: &[&str] = + &["10.0.0.0/8", "172.16.0.0/12", "192.168.0.0/16", "127.0.0.0/8", "169.254.0.0/16"]; -// aud is allowed to be either a string or an array, so let Serde flatten it. #[derive(Debug, Deserialize)] #[serde(untagged)] enum Aud { @@ -66,25 +57,15 @@ struct Claims { aud: Option, } -/// Runtime options for JWT validation policy. #[derive(Clone, Default)] pub struct ValidateOptions { /// If true, accept unsigned tokens (`alg: "none"`) as long as temporal checks pass. - /// Default is **false** (more secure). pub allow_alg_none: bool, - /// If provided and `iss` is absent, use this key to cryptographically verify the token. - /// Useful for non-OIDC flows where you already know the verification key. pub fallback_decoding_key: Option, } -/// Backwards-compatible entry point with secure defaults: -/// - `alg: none` is **rejected** -/// - `iss` is **required** unless `fallback_decoding_key` is supplied (not supplied here) -/// -/// # Arguments -/// * `token` - The JWT token to validate -/// * `lax_tls` - If true, accept self-signed or invalid certificates for JWKS fetching +/// Backwards-compatible entry point with secure defaults. pub async fn validate_jwt(token: &str, lax_tls: bool) -> Result<(bool, String)> { validate_jwt_with( token, @@ -95,19 +76,12 @@ pub async fn validate_jwt(token: &str, lax_tls: bool) -> Result<(bool, String)> } /// Strict validator with policy control. -/// Returns (is_active_credential, explanation). -/// -/// # Arguments -/// * `token` - The JWT token to validate -/// * `opts` - Validation options -/// * `lax_tls` - If true, accept self-signed or invalid certificates for JWKS fetching pub async fn validate_jwt_with( token: &str, opts: &ValidateOptions, lax_tls: bool, ) -> Result<(bool, String)> { let client = get_client(lax_tls); - // --- insecure payload decode to read claims -------------------------------- let claims: Claims = { let payload_b64 = token.split('.').nth(1).ok_or_else(|| anyhow!("invalid JWT format"))?; let payload_json = URL_SAFE_NO_PAD @@ -116,7 +90,6 @@ pub async fn validate_jwt_with( serde_json::from_slice(&payload_json).map_err(|e| anyhow!("invalid JSON claims: {e}"))? }; - // temporal checks let now = Utc::now().timestamp(); if let Some(nbf) = claims.nbf { if now < nbf { @@ -129,7 +102,6 @@ pub async fn validate_jwt_with( } } - // parse header enough to read "alg" without jsonwebtoken's enum (which rejects "none") let header_b64 = token.split('.').next().ok_or_else(|| anyhow!("invalid JWT format"))?; let header_json = URL_SAFE_NO_PAD.decode(header_b64).map_err(|e| anyhow!("invalid base64 in header: {e}"))?; @@ -137,10 +109,8 @@ pub async fn validate_jwt_with( serde_json::from_slice(&header_json).map_err(|e| anyhow!("invalid header json: {e}"))?; let alg_str = header_val.get("alg").and_then(|v| v.as_str()).unwrap_or(""); - // --- Policy: reject `alg: none` unless explicitly allowed ------------------ if alg_str.eq_ignore_ascii_case("none") { if opts.allow_alg_none { - // time-valid is enough if explicitly allowed return Ok(( true, format!( @@ -154,11 +124,9 @@ pub async fn validate_jwt_with( } } - // Safe to decode full header now that we know alg != none let header = decode_header(token).map_err(|e| anyhow!("decode header: {e}"))?; let alg = header.alg; - // Proactively skip HMAC-signed JWTs to avoid ambiguous liveness results. if matches!(alg, Algorithm::HS256 | Algorithm::HS384 | Algorithm::HS512) { return Ok((false, format!("HMAC-signed JWTs are not validated ({alg:?})"))); } @@ -166,16 +134,12 @@ pub async fn validate_jwt_with( let issuer = claims.iss.clone().unwrap_or_default(); let aud_strings = extract_aud_strings(&claims); - // --- New rule: require `iss` OR use fallback key for crypto verification --- if issuer.trim().is_empty() { - // No issuer — we may still accept if we can cryptographically verify with a fallback key if let Some(decoding_key) = opts.fallback_decoding_key.as_ref() { - // Verify signature (aud checked if present) let mut validation = JwtValidation::new(alg); if !aud_strings.is_empty() { validation.set_audience(&aud_strings); } - // We already did exp/nbf manually. validation.validate_exp = false; validation.validate_nbf = false; @@ -194,13 +158,10 @@ pub async fn validate_jwt_with( } } - // --- With `iss`: OIDC discovery + JWKS verification path ------------------- - // require kid before any network I/O let Some(kid) = header.kid.clone() else { return Ok((false, "no kid in header".into())); }; - // build discovery URL and fetch it (redirects disabled) let config_url = format!("{}/.well-known/openid-configuration", issuer.trim_end_matches('/')); let cfg_resp = client .get(&config_url) @@ -215,19 +176,16 @@ pub async fn validate_jwt_with( let cfg_json: serde_json::Value = cfg_resp.json().await.map_err(|e| anyhow!("invalid discovery JSON: {e}"))?; - // extract jwks_uri let jwks_uri = cfg_json .get("jwks_uri") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow!("jwks_uri missing"))?; - // must be HTTPS let url = Url::parse(jwks_uri).map_err(|e| anyhow!("invalid jwks_uri: {e}"))?; if url.scheme() != "https" { return Ok((false, "jwks_uri must use https".to_string())); } - // host must match issuer host let iss_host = Url::parse(&issuer) .map_err(|e| anyhow!("invalid iss: {e}"))? .host_str() @@ -241,17 +199,14 @@ pub async fn validate_jwt_with( )); } - // DNS resolution + private-range block for addr in lookup_host((jwks_host.as_str(), 443)).await? { if is_blocked_ip(addr.ip()) { return Ok((false, "jwks_uri resolves to private or link-local IP".to_string())); } } - // reachability check (existing helper) check_url_resolvable(&url).await.map_err(|e| anyhow!("jwks uri unresolvable: {e}"))?; - // fetch JWKS with redirect-free client let jwks_resp = client.get(url).send().await.map_err(|e| anyhow!("jwks fetch failed: {e}"))?; if !jwks_resp.status().is_success() { return Ok((false, format!("jwks fetch failed: {}", jwks_resp.status()))); @@ -259,14 +214,12 @@ pub async fn validate_jwt_with( let jwk_set: JwkSet = jwks_resp.json().await.map_err(|e| anyhow!("invalid jwks json: {e}"))?; - // select key by kid let jwk = jwk_set .keys .iter() .find(|k| k.common.key_id.as_deref() == Some(&kid)) .ok_or_else(|| anyhow!("kid not found in jwks"))?; - // verify signature let decoding_key = DecodingKey::from_jwk(jwk).map_err(|e| anyhow!("invalid jwk: {e}"))?; let mut validation = JwtValidation::new(header.alg); if !aud_strings.is_empty() { @@ -281,7 +234,6 @@ pub async fn validate_jwt_with( Ok((true, format!("JWT valid (alg: {:?}, iss: {issuer}, aud: {:?})", alg, aud_strings))) } -/// Helper: normalize aud into a flat Vec fn extract_aud_strings(claims: &Claims) -> Vec { match &claims.aud { Some(Aud::Str(s)) => vec![s.clone()], @@ -289,97 +241,7 @@ fn extract_aud_strings(claims: &Claims) -> Vec { None => vec![], } } -/// returns true if IP is in a blocked network + fn is_blocked_ip(ip: std::net::IpAddr) -> bool { BLOCKED_NETS.iter().filter_map(|cidr| cidr.parse::().ok()).any(|net| net.contains(&ip)) } - -#[cfg(test)] -mod tests { - use super::{validate_jwt, validate_jwt_with, ValidateOptions}; - use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _}; - use chrono::{Duration as ChronoDuration, Utc}; - use jsonwebtoken::{encode, EncodingKey, Header}; - - fn build_unsigned_token(exp_offset: i64) -> String { - let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"none"}"#); - let exp = (Utc::now() + ChronoDuration::seconds(exp_offset)).timestamp(); - let payload = URL_SAFE_NO_PAD.encode(format!( - r#"{{ - "exp": {exp}, - "iss": "https://example.com", - "aud": ["test-audience"] - }}"# - )); - format!("{header}.{payload}.") - } - - #[tokio::test] - async fn hmac_signed_tokens_skipped() { - let mut header = Header::new(jsonwebtoken::Algorithm::HS256); - header.kid = Some("dummy".into()); - - let payload = serde_json::json!({ - "iss": "https://example.com", - "exp": (Utc::now() + ChronoDuration::minutes(5)).timestamp(), - }); - - let token = encode(&header, &payload, &EncodingKey::from_secret(b"secret")).unwrap(); - let res = validate_jwt(&token, false).await.unwrap(); - assert!(!res.0); - assert!(res.1.contains("HMAC-signed JWTs are not validated")); - } - - #[tokio::test] - async fn missing_kid_short_circuits_before_network() { - let header = URL_SAFE_NO_PAD.encode(r#"{"alg":"RS256"}"#); - let payload = URL_SAFE_NO_PAD.encode(format!( - r#"{{ - "exp": {}, - "iss": "https://example.com" - }}"#, - (Utc::now() + ChronoDuration::minutes(5)).timestamp() - )); - let signature = URL_SAFE_NO_PAD.encode("sig"); - let token = format!("{header}.{payload}.{signature}"); - - let res = validate_jwt(&token, false).await.unwrap(); - assert!(!res.0); - assert!(res.1.contains("no kid in header")); - } - - #[tokio::test] - async fn unsigned_token_rejected_by_default() { - let token = build_unsigned_token(60); - let res = validate_jwt(&token, false).await.unwrap(); - assert!(!res.0); - assert!(res.1.contains("unsigned JWT (alg: none) not allowed")); - } - - #[tokio::test] - async fn valid_token_allows_alg_none_when_opted_in() { - let token = build_unsigned_token(60); - let res = validate_jwt_with( - &token, - &ValidateOptions { allow_alg_none: true, fallback_decoding_key: None }, - false, - ) - .await - .unwrap(); - assert!(res.0, "expected success when alg none is explicitly allowed"); - } - - #[tokio::test] - async fn expired_token_still_rejected() { - let token = build_unsigned_token(-60); - let res = validate_jwt_with( - &token, - &ValidateOptions { allow_alg_none: true, fallback_decoding_key: None }, - false, - ) - .await - .unwrap(); - assert!(!res.0); - assert!(res.1.contains("expired")); - } -} diff --git a/crates/kingfisher-scanner/src/validation/mod.rs b/crates/kingfisher-scanner/src/validation/mod.rs index 856a7b6..2da8631 100644 --- a/crates/kingfisher-scanner/src/validation/mod.rs +++ b/crates/kingfisher-scanner/src/validation/mod.rs @@ -25,11 +25,35 @@ mod utils; mod validation_body; #[cfg(feature = "validation-http")] -mod http_validation; +pub mod http_validation; #[cfg(feature = "validation-aws")] pub mod aws; +#[cfg(feature = "validation-azure")] +pub mod azure; + +#[cfg(feature = "validation-coinbase")] +pub mod coinbase; + +#[cfg(feature = "validation-gcp")] +pub mod gcp; + +#[cfg(feature = "validation-jwt")] +pub mod jwt; + +#[cfg(feature = "validation-database")] +pub mod jdbc; + +#[cfg(feature = "validation-database")] +pub mod mongodb; + +#[cfg(feature = "validation-database")] +pub mod mysql; + +#[cfg(feature = "validation-database")] +pub mod postgres; + // Re-exports pub use utils::{find_closest_variable, process_captures}; pub use validation_body::{as_str, clone_as_string, from_string, ValidationResponseBody}; @@ -42,13 +66,18 @@ pub use http_validation::{ #[cfg(feature = "validation-aws")] pub use aws::{ - aws_key_to_account_number, generate_aws_cache_key, set_aws_skip_account_ids, - set_aws_validation_concurrency, should_skip_aws_validation, validate_aws_credentials, - validate_aws_credentials_input, + aws_key_to_account_number, generate_aws_cache_key, revoke_aws_access_key, + set_aws_skip_account_ids, set_aws_validation_concurrency, should_skip_aws_validation, + validate_aws_credentials, validate_aws_credentials_input, }; use once_cell::sync::OnceCell; -use std::time::{Duration, Instant}; +use std::{ + sync::Arc, + time::{Duration, Instant}, +}; + +use crossbeam_skiplist::SkipMap; /// User agent string used for HTTP validation requests. #[cfg(feature = "validation-http")] @@ -92,6 +121,9 @@ pub fn set_user_agent_suffix>(suffix: Option) { /// Cache duration for validation results (20 minutes). pub const VALIDATION_CACHE_SECONDS: u64 = 1200; +/// Cache type used for validation memoization. +pub type Cache = Arc>; + /// A cached validation response. #[derive(Clone, Debug)] pub struct CachedResponse { diff --git a/src/validation/mongodb.rs b/crates/kingfisher-scanner/src/validation/mongodb.rs similarity index 71% rename from src/validation/mongodb.rs rename to crates/kingfisher-scanner/src/validation/mongodb.rs index 363dc04..3709f1e 100644 --- a/src/validation/mongodb.rs +++ b/crates/kingfisher-scanner/src/validation/mongodb.rs @@ -1,4 +1,3 @@ -// src/validation/mongodb.rs use std::{net::IpAddr, time::Duration}; use anyhow::Result; @@ -12,45 +11,34 @@ use tokio::time::timeout; use tracing::debug; pub fn looks_like_mongodb_uri(uri: &str) -> bool { - // quick scheme check first if !(uri.starts_with("mongodb://") || uri.starts_with("mongodb+srv://")) { return false; } - // pure string-level parse – no network, even for +srv mongodb::options::ConnectionString::parse(uri).is_ok() } -/// Return true if the URI targets localhost/loopback or a unix domain socket. -/// This is a *string-only* check—no DNS or driver IO. fn uri_targets_localhost(uri: &str) -> bool { - // strip scheme let rest = uri .strip_prefix("mongodb://") .or_else(|| uri.strip_prefix("mongodb+srv://")) .unwrap_or(uri); - // authority ends at first '/' (before db/path); if missing, take whole rest let authority = rest.split_once('/').map(|(a, _)| a).unwrap_or(rest); - // unix domain socket forms (percent-encoded "/path/to.sock") let auth_lower = authority.to_ascii_lowercase(); if auth_lower.starts_with("%2f") || authority.starts_with('/') { - return true; // UDS → treat as local + return true; } - // drop userinfo if present let hostlist = authority.rsplit_once('@').map(|(_, h)| h).unwrap_or(authority); - // iterate seed list (mongodb://hostA,hostB,...) for part in hostlist.split(',') { let mut host = part.trim(); - // strip brackets for IPv6 literals if host.starts_with('[') && host.ends_with(']') && host.len() >= 2 { host = &host[1..host.len() - 1]; } - // strip :port if present (only when suffix is all digits) if let Some(idx) = host.rfind(':') { if host[idx + 1..].chars().all(|c| c.is_ascii_digit()) { host = &host[..idx]; @@ -65,12 +53,10 @@ fn uri_targets_localhost(uri: &str) -> bool { false } -/// Returns true for localhost/loopback/unspecified IPs and common localhost aliases. fn is_local_host(h: &str) -> bool { let s = h.trim().trim_end_matches('.'); let s_lower = s.to_ascii_lowercase(); - // common aliases seen in hosts files across distros if matches!( s_lower.as_str(), "localhost" @@ -83,12 +69,10 @@ fn is_local_host(h: &str) -> bool { return true; } - // explicit unspecified forms if s_lower.as_str() == "0.0.0.0" || s_lower.as_str() == "::" { return true; } - // literal IPs if let Ok(ip) = s.parse::() { return ip.is_loopback() || ip.is_unspecified(); } @@ -96,32 +80,24 @@ fn is_local_host(h: &str) -> bool { false } -const FAST_CONNECT_MS: u64 = 700; // direct single-host URIs +const FAST_CONNECT_MS: u64 = 700; const FAST_SELECT_MS: u64 = 300; -const SRV_PARSE_MS: u64 = 2_000; // limit DNS resolution time +const SRV_PARSE_MS: u64 = 2_000; const SRV_CONNECT_MS: u64 = 2500; const SRV_SELECT_MS: u64 = 2500; -/// Validates a MongoDB URI in ≤ 2 s. Returns `(bool, String)` where the -/// boolean indicates success and the string provides a status message. -/// -/// # Arguments -/// * `uri` - The MongoDB connection URI to validate -/// * `lax_tls` - If true, accept self-signed or invalid certificates +/// Validates a MongoDB URI in ≤ 2 s. pub async fn validate_mongodb(uri: &str, lax_tls: bool) -> Result<(bool, String)> { - // ---- quick reject without touching the network if !looks_like_mongodb_uri(uri) { return Ok((false, "Invalid MongoDB URI".to_string())); } - // ---- refuse localhost/loopback/UDS outright if uri_targets_localhost(uri) { return Ok((false, "Refusing to validate localhost/loopback MongoDB URIs.".to_string())); } let is_srv = uri.starts_with("mongodb+srv://"); - // ---- build client opts (guarded so we don't hit DNS/driver first) let mut opts = if is_srv { match timeout(Duration::from_millis(SRV_PARSE_MS), ClientOptions::parse(uri)).await { Ok(res) => res?, @@ -134,27 +110,22 @@ pub async fn validate_mongodb(uri: &str, lax_tls: bool) -> Result<(bool, String) }; if !is_srv { - // one socket, skip cluster discovery for plain 'mongodb://' opts.direct_connection = Some(true); opts.connect_timeout = Some(Duration::from_millis(FAST_CONNECT_MS)); opts.server_selection_timeout = Some(Duration::from_millis(FAST_SELECT_MS)); } else { - // SRV needs DNS and replica-set discovery; fail fast opts.connect_timeout = Some(Duration::from_millis(SRV_CONNECT_MS)); opts.server_selection_timeout = Some(Duration::from_millis(SRV_SELECT_MS)); - // leave direct_connection = None (driver decides) } opts.max_pool_size = Some(1); opts.min_pool_size = Some(0); - // Configure TLS options based on lax_tls setting if lax_tls { debug!("Using lax TLS mode for MongoDB connection"); let tls_options = TlsOptions::builder().allow_invalid_certificates(true).build(); opts.tls = Some(Tls::Enabled(tls_options)); } - // ---- dial and ping let client = Client::with_options(opts)?; let res = client.database("admin").run_command(doc! { "ping": 1 }).await; match res { diff --git a/src/validation/mysql.rs b/crates/kingfisher-scanner/src/validation/mysql.rs similarity index 72% rename from src/validation/mysql.rs rename to crates/kingfisher-scanner/src/validation/mysql.rs index 8ef77c3..31e9d48 100644 --- a/src/validation/mysql.rs +++ b/crates/kingfisher-scanner/src/validation/mysql.rs @@ -95,10 +95,6 @@ fn targets_localhost(opts: &Opts) -> bool { } /// Validate a MySQL connection URL. -/// -/// # Arguments -/// * `mysql_url` - The MySQL connection URL to validate -/// * `lax_tls` - If true, accept self-signed or invalid certificates pub async fn validate_mysql(mysql_url: &str, lax_tls: bool) -> Result<(bool, Vec)> { let opts = parse_mysql_url(mysql_url)?; @@ -109,7 +105,6 @@ pub async fn validate_mysql(mysql_url: &str, lax_tls: bool) -> Result<(bool, Vec let mut builder = OptsBuilder::from_opts(opts).stmt_cache_size(Some(0)); - // Configure TLS options based on lax_tls setting if lax_tls { debug!("Using lax TLS mode for MySQL connection"); let ssl_opts = SslOpts::default().with_danger_accept_invalid_certs(true); @@ -139,42 +134,3 @@ pub async fn validate_mysql(mysql_url: &str, lax_tls: bool) -> Result<(bool, Vec Err(_) => Err(anyhow!("MySQL connection timed out after {CONNECT_TIMEOUT:?}")), } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn parse_mysql_url_accepts_valid_urls() { - let url = "mysql://user:secret@exmple.com:3306/app"; - let opts = parse_mysql_url(url).expect("expected valid MySQL URL"); - assert_eq!(opts.user(), Some("user")); - assert_eq!(opts.pass(), Some("secret")); - assert_eq!(opts.ip_or_hostname(), "exmple.com"); - } - - #[test] - fn parse_mysql_url_rejects_invalid_urls() { - for candidate in [ - "", // empty - "mysql://user@exmple.com/app", // missing password - "mysql://:secret@exmple.com/app", // missing username - "mysql://user:secret@:3306/app", // missing host - "postgres://user:secret@exmple.com", // wrong scheme - "mysql://user:secret@exmple.com:70000/app", // invalid port - ] { - assert!( - parse_mysql_url(candidate).is_err(), - "expected parsing to fail for {candidate}" - ); - } - } - - #[test] - fn parse_mysql_url_allows_trimming_whitespace() { - let opts = - parse_mysql_url(" mysql://user:secret@exmple.com:3306/app ").expect("trimmed URL"); - assert_eq!(opts.user(), Some("user")); - assert_eq!(opts.pass(), Some("secret")); - } -} diff --git a/src/validation/postgres.rs b/crates/kingfisher-scanner/src/validation/postgres.rs similarity index 75% rename from src/validation/postgres.rs rename to crates/kingfisher-scanner/src/validation/postgres.rs index c81bb4c..e19b61e 100644 --- a/src/validation/postgres.rs +++ b/crates/kingfisher-scanner/src/validation/postgres.rs @@ -22,16 +22,10 @@ const CONNECT_TIMEOUT: Duration = Duration::from_secs(5); static INIT_PROVIDER: OnceCell<()> = OnceCell::new(); fn ensure_crypto_provider() { INIT_PROVIDER.get_or_init(|| { - // If another part of the program already installed a provider, - // ignore the error — we just need one global provider. let _ = CryptoProvider::install_default(ring::default_provider()); }); } -/// A certificate verifier that accepts any certificate (for lax TLS mode). -/// -/// This verifier still validates signatures to ensure the connection is encrypted, -/// but does not verify the certificate chain against trusted CAs. #[derive(Debug)] struct LaxCertVerifier(Arc); @@ -44,7 +38,6 @@ impl ServerCertVerifier for LaxCertVerifier { _ocsp_response: &[u8], _now: UnixTime, ) -> std::result::Result { - // Accept any certificate - this is the "lax" behavior Ok(ServerCertVerified::assertion()) } @@ -93,14 +86,9 @@ pub fn parse_postgres_url(postgres_url: &str) -> Result { } /// Validate a Postgres connection URL. -/// -/// # Arguments -/// * `postgres_url` - The Postgres connection URL to validate -/// * `lax_tls` - If true, accept self-signed or invalid certificates pub async fn validate_postgres(postgres_url: &str, lax_tls: bool) -> Result<(bool, Vec)> { let mut cfg = parse_postgres_url(postgres_url)?; - // --- skip localhost/loopback/unix-socket targets entirely ------------- if has_any_local_host(&cfg) { debug!("Skipping Postgres validation: host is localhost/loopback or unix socket"); return Ok((false, vec!["skipped localhost/loopback host".into()])); @@ -117,16 +105,14 @@ pub async fn validate_postgres(postgres_url: &str, lax_tls: bool) -> Result<(boo fn has_any_local_host(cfg: &Config) -> bool { cfg.get_hosts().iter().any(|h| match h { #[cfg(unix)] - Host::Unix(_) => true, // local unix socket + Host::Unix(_) => true, Host::Tcp(s) => is_local_tcp_host(s), }) } fn is_local_tcp_host(s: &str) -> bool { - // strip URI-style IPv6 brackets if present let host = s.trim_matches(|c| c == '[' || c == ']'); - // Direct IPs if let Ok(ip) = host.parse::() { return match ip { std::net::IpAddr::V4(v4) => { @@ -138,7 +124,6 @@ fn is_local_tcp_host(s: &str) -> bool { }; } - // Common localhost hostnames let lower = host.to_ascii_lowercase(); lower == "localhost" || lower.starts_with("localhost.") @@ -151,7 +136,6 @@ async fn check_postgres_db_connection( original_mode: SslMode, lax_tls: bool, ) -> Result<(bool, Vec)> { - // First attempt with caller-supplied sslmode, optional retry without TLS. for attempt in 0..=1 { let cfg_try = cfg.clone(); @@ -170,11 +154,9 @@ async fn check_postgres_db_connection( .await } else { timeout(CONNECT_TIMEOUT, async { - // Ensure Rustls crypto provider is installed *before* using the builder ensure_crypto_provider(); let tls_cfg = if lax_tls { - // Lax mode: accept any certificate (self-signed, expired, wrong hostname) debug!("Using lax TLS mode for Postgres connection"); let provider = Arc::new(ring::default_provider()); ClientConfig::builder() @@ -182,7 +164,6 @@ async fn check_postgres_db_connection( .with_custom_certificate_verifier(Arc::new(LaxCertVerifier(provider))) .with_no_client_auth() } else { - // Strict mode: full certificate validation let CertificateResult { certs, errors, .. } = load_native_certs(); for err in errors { debug!("native-cert error: {err}"); @@ -262,55 +243,3 @@ fn server_requires_encryption(err_msg: &str) -> bool { fn missing_cluster_identifier(err_msg: &str) -> bool { err_msg.contains("missing cluster identifier") } - -#[cfg(test)] -mod tests { - use super::{ - is_local_tcp_host, missing_cluster_identifier, parse_postgres_url, - server_requires_encryption, - }; - - #[test] - fn detects_encryption_requirement() { - assert!(server_requires_encryption("db error: FATAL: server requires encryption")); - assert!(!server_requires_encryption("some other error")); - } - - #[test] - fn detects_missing_cluster() { - assert!(missing_cluster_identifier( - "db error: FATAL: codeParamsRoutingFailed: missing cluster identifier", - )); - assert!(!missing_cluster_identifier("another error")); - } - - #[test] - fn detects_local_hosts() { - for h in [ - "localhost", - "LOCALHOST", - "localhost.localdomain", - "localhost6", - "127.0.0.1", - "[::1]", - "::", - ] { - assert!(is_local_tcp_host(h), "should treat {h} as local"); - } - for h in ["db.example.com", "10.0.0.1"] { - assert!(!is_local_tcp_host(h), "should not treat {h} as local"); - } - } - - #[test] - fn parse_accepts_postgis_scheme() { - let url = "postgis://postgres:secret@exmple.com:5432"; - assert!(parse_postgres_url(url).is_ok(), "postgis scheme should be accepted"); - } - - #[test] - fn parse_rejects_invalid_port() { - let url = "postgres://postgres:secret@exmple.com:70000"; - assert!(parse_postgres_url(url).is_err(), "invalid port should be rejected"); - } -} diff --git a/data/default/ignore.conf b/data/default/ignore.conf deleted file mode 100644 index 0776d1d..0000000 --- a/data/default/ignore.conf +++ /dev/null @@ -1,7 +0,0 @@ -# This file lists gitignore-style patterns: https://git-scm.com/docs/gitignore -# -# These patterns control which paths Kingfisher will scan. - -**/objects/pack/pack-*.pack -**/objects/pack/pack-*.idx -**/packed-refs diff --git a/docs/INSTALLATION.md b/docs/INSTALLATION.md index b5f2c06..b4b072b 100644 --- a/docs/INSTALLATION.md +++ b/docs/INSTALLATION.md @@ -16,6 +16,7 @@ This guide covers all installation methods for Kingfisher, including pre-commit - [Using the pre-commit Framework](#using-the-pre-commit-framework) - [Using Husky (Node.js projects)](#using-husky-nodejs-projects) - [Compile from Source](#compile-from-source) +- [PyPI Wheels](#pypi-wheels) - [Run Kingfisher in Docker](#run-kingfisher-in-docker) ## Pre-built Releases @@ -377,3 +378,22 @@ docker run --rm \ --format json \ --output /out/findings.json ``` + +## PyPI Wheels + +If you want to run Kingfisher from PyPI, install the `kingfisher-bin` package +and use the `kingfisher` command it exposes: + +```bash +pip install kingfisher-bin +kingfisher --help +``` + +Or run it without installation using `uvx`: + +```bash +uvx kingfisher-bin --help +``` + +For maintainers who need to build and publish wheels, see +[docs/PYPI.md](PYPI.md). diff --git a/docs/MULTI_STEP_REVOCATION.md b/docs/MULTI_STEP_REVOCATION.md new file mode 100644 index 0000000..356b57c --- /dev/null +++ b/docs/MULTI_STEP_REVOCATION.md @@ -0,0 +1,257 @@ +# Multi-Step Revocation Implementation + +## Overview + +This document describes the implementation of 2-step revocation support in Kingfisher. Some services require a two-step revocation process: + +1. **Step 1 (Lookup)**: Query the API to retrieve an internal ID, token identifier, or other metadata +2. **Step 2 (Delete)**: Use the extracted value(s) to perform the actual revocation/deletion + +## Architecture + +### New Types + +#### `HttpMultiStepRevocation` +```rust +pub struct HttpMultiStepRevocation { + /// Sequential steps to execute (minimum 1, maximum 2). + pub steps: Vec, +} +``` + +#### `RevocationStep` +```rust +pub struct RevocationStep { + /// Human-readable name for this step (e.g., "lookup_id", "delete"). + pub name: Option, + + /// HTTP request configuration for this step. + pub request: HttpRequest, + + /// Optional multipart configuration for this step. + pub multipart: Option, + + /// Variables to extract from the response for use in subsequent steps. + pub extract: Option>, +} +``` + +#### `ResponseExtractor` +```rust +pub enum ResponseExtractor { + /// Extract from JSON response using JSONPath syntax + JsonPath { path: String }, + + /// Extract using regex with a capture group + Regex { pattern: String }, + + /// Extract an HTTP response header value + Header { name: String }, + + /// Use the entire response body as-is + Body, + + /// Extract the HTTP status code as a string + StatusCode, +} +``` + +### Revocation Enum + +The `Revocation` enum has been extended with: + +```rust +pub enum Revocation { + AWS, + GCP, + Http(HttpValidation), + HttpMultiStep(HttpMultiStepRevocation), // New variant +} +``` + +## Implementation Details + +### Execution Flow + +1. **Validation**: Checks that 1-2 steps are defined +2. **Sequential Execution**: Each step executes in order +3. **Variable Extraction**: After each step completes, extract variables from response +4. **Variable Injection**: Extracted variables are available as Liquid templates in subsequent steps +5. **Response Validation**: Final step's `response_matcher` determines success/failure + +### Key Functions + +#### `extract_value_from_response()` +Extracts a value from an HTTP response based on the specified extractor type. + +**Supported Extractors:** +- **JsonPath**: Basic JSONPath implementation supporting: + - Nested fields: `$.data.user.id` + - Array indexing: `$.items[0].id` + - Combined: `$.data.sessions[0].session_id` +- **Regex**: Uses first capture group from pattern match +- **Header**: Extracts value from response header by name +- **Body**: Returns entire response body +- **StatusCode**: Returns HTTP status code as string + +#### `execute_revocation_step()` +Executes a single revocation step: +1. Renders URL and request templates with current variables +2. Builds and sends HTTP request +3. Extracts variables from response if configured +4. Adds extracted variables to globals for next step + +#### `execute_multi_step_revocation()` +Orchestrates the multi-step revocation process: +1. Validates step count (1-2 steps) +2. Iterates through steps sequentially +3. Tracks intermediate results +4. Returns final result from last step + +### Backwards Compatibility + +All existing single-step revocations continue to work unchanged: +- `Revocation::AWS` +- `Revocation::GCP` +- `Revocation::Http(_)` + +## Usage Examples + +### Basic 2-Step Revocation + +```yaml +revocation: + type: HttpMultiStep + content: + steps: + # Step 1: Get the token ID + - name: lookup_token_id + request: + method: GET + url: https://api.example.com/v1/tokens/current + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + extract: + TOKEN_ID: + type: JsonPath + path: "$.data.token_id" + + # Step 2: Delete the token + - name: delete_token + request: + method: DELETE + url: https://api.example.com/v1/tokens/{{ TOKEN_ID }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [204] +``` + +### Multiple Extractions + +```yaml +revocation: + type: HttpMultiStep + content: + steps: + - name: get_metadata + request: + method: GET + url: https://api.service.com/tokens/info + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + extract: + TOKEN_ID: + type: JsonPath + path: "$.id" + ACCOUNT_ID: + type: Header + name: X-Account-ID + TOKEN_TYPE: + type: Regex + pattern: '"type":\s*"([^"]+)"' + + - name: revoke_token + request: + method: POST + url: https://api.service.com/accounts/{{ ACCOUNT_ID }}/tokens/{{ TOKEN_ID }}/revoke + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: application/json + body: '{"token_type":"{{ TOKEN_TYPE }}"}' + response_matcher: + - type: StatusMatch + status: [200, 204] +``` + +## Testing + +Test your multi-step revocation using: + +```bash +# Revoke a token using multi-step revocation +kingfisher revoke --rule + +# With additional variables if needed +kingfisher revoke --rule --var EXTRA_VAR=value +``` + +## Files Modified + +### Core Implementation +- `crates/kingfisher-rules/src/rule.rs`: Added new types and enum variants +- `crates/kingfisher-rules/src/lib.rs`: Exported new types +- `src/direct_revoke.rs`: Added multi-step execution logic + +### Documentation +- `docs/RULES.md`: Added comprehensive multi-step revocation documentation +- `docs/MULTI_STEP_REVOCATION.md`: This file + +### Examples +- `crates/kingfisher-rules/data/rules/example_multistep.yml`: Example rules demonstrating multi-step revocation + +### Supporting Changes +- `src/reporter.rs`: Added pattern match for `HttpMultiStep` variant + +## Constraints + +1. **Maximum 2 steps**: The implementation supports 1-2 steps only +2. **Sequential execution**: Steps execute in order; no parallel execution +3. **Final step validation**: The last step must include `response_matcher` +4. **Variable naming**: Extracted variable names should be uppercase (convention) +5. **JSONPath limitations**: Basic implementation supporting common patterns only + +## Error Handling + +The implementation provides clear error messages for: +- Empty steps array +- More than 2 steps +- Missing response_matcher on final step +- Failed variable extraction +- Invalid JSONPath syntax +- Missing required headers or fields +- HTTP request failures + +All errors are propagated with context about which step failed and why. + +## Debug Logging + +Enable debug logging to see multi-step execution details: + +```bash +RUST_LOG=debug kingfisher revoke --rule +``` + +Debug logs include: +- Step execution start/completion +- URLs being called +- Variables extracted and their values +- Response status codes +- Intermediate step results diff --git a/docs/PYPI.md b/docs/PYPI.md new file mode 100644 index 0000000..36f74a5 --- /dev/null +++ b/docs/PYPI.md @@ -0,0 +1,91 @@ +# PyPI Wheel Distribution (Kingfisher CLI) + +This document describes how to package the Kingfisher Rust binary into +platform-specific Python wheels so users can install and run `kingfisher` via +`pip` or `uv`. + +## Overview + +The Python package is a thin wrapper that bundles the compiled Kingfisher binary +inside `kingfisher/bin/` and exposes a `kingfisher` console entry point that +executes it. + +Users can run it without installation via `uvx`: + +```bash +uvx kingfisher-bin --help +``` + +## Build prerequisites + +1. Build the Kingfisher binary for your target platform (see + [INSTALLATION.md](INSTALLATION.md) for `make` targets). +2. Install the Python build tooling: + +```bash +python -m pip install build +``` + +## Build a wheel + +Run the helper script from the repo root: + +```bash +scripts/build-pypi-wheel.sh \ + --binary ./path/to/kingfisher \ + --version 1.2.3 \ + --plat-name manylinux_2_17_x86_64 +``` + +For Windows, pass the `.exe` binary and a Windows platform tag: + +```bash +scripts/build-pypi-wheel.sh \ + --binary .\\path\\to\\kingfisher.exe \ + --version 1.2.3 \ + --plat-name win_amd64 +``` + +If you only build a Windows x64 binary, you can still ship a `win_arm64` wheel +using the same executable (it runs under emulation on ARM64 Windows): + +```bash +scripts/build-pypi-wheel.sh \ + --binary .\\path\\to\\kingfisher.exe \ + --version 1.2.3 \ + --plat-name win_arm64 +``` + +The resulting wheel will be placed in `dist-pypi/` by default. + +## Test locally + +```bash +python -m pip install dist-pypi/kingfisher_bin-*.whl +kingfisher --help +``` + +## Publish + +Upload the wheels to PyPI using `twine` (or your preferred tool): + +```bash +python -m pip install twine +python -m twine upload dist-pypi/* +``` + +### GitHub Actions (recommended) + +The repository includes a `pypi-wheels` workflow that: + +1. Downloads the release binaries. +2. Builds platform-tagged wheels. +3. Publishes them to PyPI using Trusted Publishing (OIDC). + +To use Trusted Publishing, create a PyPI project named `kingfisher-bin` and +enable GitHub Actions as a trusted publisher for this repository and workflow. +No API token is required once Trusted Publishing is configured. + +If you do not use Trusted Publishing, generate a PyPI API token and provide it +to `twine` (for example via `TWINE_USERNAME=__token__` and +`TWINE_PASSWORD=`). diff --git a/docs/RULES.md b/docs/RULES.md index 85f6352..072a500 100644 --- a/docs/RULES.md +++ b/docs/RULES.md @@ -100,6 +100,45 @@ revocation: type: GCP ``` +### Multi-Step Revocation + +Some services require a 2-step revocation process: +1. **Lookup Step**: Make a request to retrieve an ID or token +2. **Delete Step**: Use that ID to perform the actual revocation + +For these cases, use `HttpMultiStep`: + +```yaml +revocation: + type: HttpMultiStep + content: + steps: + - name: lookup_token_id # Step 1: Get the token ID + request: + method: GET + url: https://api.example.com/v1/tokens/current + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + extract: # Extract values from response + TOKEN_ID: # Variable name (uppercase) + type: JsonPath # Extraction method + path: "$.data.id" # JSONPath to the value + + - name: revoke_token # Step 2: Delete using the ID + request: + method: DELETE + url: https://api.example.com/v1/tokens/{{ TOKEN_ID }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] +``` + | Field | What it does | | ----------------------- | -------------------------------------------------------------------- | | name | Friendly name shown in reports | @@ -112,7 +151,7 @@ revocation: | depends_on_rule | Chain rules: use captures from one rule in another's validation | | pattern_requirements | Require character types and/or exclude placeholder words from matches | | validation | Configure HTTP, AWS, GCP, etc. checks to verify live validity | -| revocation | Configure HTTP or AWS revocation actions for a detected secret | +| revocation | Configure HTTP, AWS, or multi-step revocation for a detected secret | *responser_matcher* variants. Multiple can be used @@ -125,10 +164,234 @@ revocation: | **XmlValid** | – | Pass only if body parses as well-formed XML. Use when response is expected as XML data | | **ReportResponse** | `report_response` (bool) | Include raw payload in finding for debugging. | -## 2. Templating with Liquid +## 2. Multi-Step Revocation + +Some APIs require a two-step revocation process: + +1. **Step 1 (Lookup)**: Query the API to retrieve an internal ID, token identifier, or other metadata +2. **Step 2 (Delete)**: Use the extracted value(s) to perform the actual revocation/deletion + +Kingfisher supports up to 2 sequential steps in a revocation workflow. Each step can extract values from its response, making them available as variables in subsequent steps. + +### Response Extractors + +Values can be extracted from HTTP responses using the following methods: + +| Extractor Type | Description | Example | +|----------------|-------------|---------| +| **JsonPath** | Extract from JSON response using JSONPath syntax | `$.data.id`, `$.items[0].token_id` | +| **Regex** | Extract using regex with a capture group | `"token_id":\s*"([^"]+)"` | +| **Header** | Extract an HTTP response header value | `X-Token-ID` | +| **Body** | Use the entire response body as-is | - | +| **StatusCode** | Extract the HTTP status code as a string | - | + +### Multi-Step Revocation Schema + +```yaml +revocation: + type: HttpMultiStep + content: + steps: + - name: # Optional: human-readable step name + request: # Standard HTTP request configuration + method: GET|POST|DELETE|... + url: https://api.example.com/... + headers: + Header-Name: "value" + body: "optional request body" + response_matcher: # Required for final step only + - type: StatusMatch + status: [200] + extract: # Optional: extract variables from response + VARIABLE_NAME: # Variable name (uppercase recommended) + type: JsonPath|Regex|Header|Body|StatusCode + path: "$.path.to.value" # For JsonPath + pattern: "regex pattern" # For Regex (use first capture group) + name: "header-name" # For Header + + - name: # Subsequent steps can use extracted variables + request: + method: DELETE + url: https://api.example.com/tokens/{{ VARIABLE_NAME }} + response_matcher: + - type: StatusMatch + status: [204] +``` + +### Multi-Step Revocation Requirements + +- **Minimum 1, Maximum 2 steps**: You must define at least 1 step and no more than 2 steps +- **Final step requires response_matcher**: The last step must include a `response_matcher` to determine success/failure +- **Intermediate steps are optional**: Earlier steps don't require response matchers but can have them for validation +- **Variables flow forward**: Variables extracted in step 1 are available in step 2 via Liquid templates (e.g., `{{ TOKEN_ID }}`) +- **All standard Liquid filters apply**: You can use filters on extracted variables just like with `{{ TOKEN }}` + +### Example 1: Basic Two-Step Revocation + +This example shows a service that requires looking up a token's ID before deletion: + +```yaml +rules: + - name: Example Service Token + id: kingfisher.example.1 + pattern: | + (?xi) + example_token_ + [A-Za-z0-9]{32} + min_entropy: 3.5 + examples: + - example_token_abc123def456ghi789jkl012mno345 + validation: + type: Http + content: + request: + method: GET + url: https://api.example.com/v1/auth/verify + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + revocation: + type: HttpMultiStep + content: + steps: + # Step 1: Look up the token ID + - name: lookup_token_id + request: + method: GET + url: https://api.example.com/v1/tokens/current + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + extract: + TOKEN_ID: + type: JsonPath + path: "$.data.token_id" + + # Step 2: Delete the token using the ID + - name: delete_token + request: + method: DELETE + url: https://api.example.com/v1/tokens/{{ TOKEN_ID }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [204] +``` + +### Example 2: Using Multiple Extraction Methods + +This example demonstrates extracting values using different methods: + +```yaml +revocation: + type: HttpMultiStep + content: + steps: + # Step 1: Get metadata from multiple sources + - name: get_token_metadata + request: + method: GET + url: https://api.service.com/tokens/info + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + extract: + # Extract from JSON body + TOKEN_ID: + type: JsonPath + path: "$.id" + + # Extract from response header + ACCOUNT_ID: + type: Header + name: X-Account-ID + + # Extract using regex + TOKEN_TYPE: + type: Regex + pattern: '"type":\s*"([^"]+)"' + + # Step 2: Use all extracted values + - name: revoke_token + request: + method: POST + url: https://api.service.com/accounts/{{ ACCOUNT_ID }}/tokens/{{ TOKEN_ID }}/revoke + headers: + Authorization: "Bearer {{ TOKEN }}" + Content-Type: application/json + body: '{"token_type":"{{ TOKEN_TYPE }}"}' + response_matcher: + - type: StatusMatch + status: [200, 204] +``` + +### Example 3: Complex JSONPath Extraction + +JSONPath supports nested objects and array indexing: + +```yaml +extract: + # Extract from nested object + USER_ID: + type: JsonPath + path: "$.data.user.id" + + # Extract from array (first element) + FIRST_TOKEN_ID: + type: JsonPath + path: "$.tokens[0].id" + + # Extract from nested array + SESSION_ID: + type: JsonPath + path: "$.data.sessions[0].session_id" +``` + +### Example 4: Single-Step Migration Path + +Existing single-step revocations remain unchanged and continue to work: + +```yaml +# This continues to work as before +revocation: + type: Http + content: + request: + method: DELETE + url: https://api.service.com/tokens/revoke + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [204] +``` + +### When to Use Multi-Step Revocation + +Use multi-step revocation when: + +- **The API requires looking up an ID first**: Some services don't accept the token directly for revocation +- **You need metadata from the token**: The revocation endpoint requires additional information only available via a separate API call +- **The service uses indirect revocation**: The token must be associated with another resource (session, key, credential) that needs to be identified first + +Do NOT use multi-step revocation when: + +- **The API accepts the token directly**: Use the simpler single-step `Http` revocation +- **You need more than 2 steps**: Kingfisher supports a maximum of 2 steps +- **The service provides a native revocation method**: Use `AWS` or `GCP` types when applicable + +## 3. Templating with Liquid Kingfisher leverages the Liquid template engine for dynamic parts of HTTP request bodies, headers, query parameters, and multipart payloads. The engine supports both built-in and custom filters to manipulate the captured secret (TOKEN) or other named captures ({{ NAME }}). -### Using Liquid Filters in Validation +### Using Liquid Filters in Validation and Revocation - **Capture Injection**: The unnamed capture from your regex becomes {{ TOKEN }}. Named captures are made available as uppercase variables (e.g. {{ RDMVAL }}). - **Filter Pipeline**: You can chain filters using the pipe (|) syntax: @@ -141,7 +404,7 @@ Arguments: Some filters accept parameters, provided after a colon: {{ TOKEN | hmac_sha256: "my-secret-key" }} ``` -### 3. Built-in & Custom Liquid Filters +### Built-in & Custom Liquid Filters Below is the complete list of Liquid filters available in Kingfisher, along with their usage patterns and examples. | Filter | Parameters | Description | Example | diff --git a/docs/TOKEN_REVOCATION_SUPPORT.md b/docs/TOKEN_REVOCATION_SUPPORT.md new file mode 100644 index 0000000..2f463c4 --- /dev/null +++ b/docs/TOKEN_REVOCATION_SUPPORT.md @@ -0,0 +1,201 @@ +# Token Revocation Support + +This document provides an overview of the revocation support added to Kingfisher for various service tokens. + +## Overview + +Revocation support has been added for **6 services** that provide verified, documented programmatic API endpoints to delete or revoke access tokens/keys. Most implementations use the **HttpMultiStep** revocation type because they require a two-step process: + +1. **Step 1 (Lookup)**: Query the API to retrieve an internal ID or token identifier +2. **Step 2 (Delete)**: Use the extracted ID to perform the actual revocation + +**Important**: All implementations have been verified against official API documentation to ensure correctness. + +## Services with Revocation Support + +### 1. SendGrid (`sendgrid.yml`) +- **Rule ID**: `kingfisher.sendgrid.1` +- **Revocation Type**: HttpMultiStep (2-step) +- **Endpoint**: `DELETE /v3/api_keys/{api_key_id}` +- **Process**: + 1. List all API keys to find the current key's ID + 2. Delete the API key using its ID +- **Note**: SendGrid only shows partial keys in the list, so the first key is extracted + +### 2. Tailscale (`tailscale.yml`) +- **Rule ID**: `kingfisher.tailscale.1` +- **Revocation Type**: HttpMultiStep (2-step) +- **Endpoint**: `DELETE /api/v2/tailnet/{tailnet}/keys/{keyId}` +- **Process**: + 1. List all keys to find the current key's ID + 2. Delete the key using its ID (using `-` as tailnet for authenticated user's tailnet) + +### 3. MongoDB Atlas (`mongodb.yml`) +- **Rule ID**: `kingfisher.mongodb.1` +- **Revocation Type**: HttpMultiStep (2-step) +- **Endpoint**: `DELETE /api/atlas/v2/orgs/{ORG_ID}/apiKeys/{API_KEY_ID}` +- **Process**: + 1. List all organizations to get the first ORG_ID + 2. Delete the API key using the public key as the API_KEY_ID +- **Authentication**: Uses HTTP Digest authentication +- **Note**: The Public Key serves as the API_KEY_ID needed for deletion + +### 4. Sumo Logic (`sumologic.yml`) +- **Rule ID**: `kingfisher.sumologic.2` +- **Revocation Type**: Http (single-step) +- **Endpoint**: `DELETE /api/v1/accessKeys/{id}` +- **Process**: Direct deletion using the Access ID +- **Authentication**: Basic Auth (Access ID as username, Access Key as password) +- **Note**: The Access ID is the ID needed for deletion (captured from `kingfisher.sumologic.1`) + +### 5. Twilio (`twilio.yml`) +- **Rule ID**: `kingfisher.twilio.2` +- **Revocation Type**: HttpMultiStep (2-step) +- **Endpoint**: `DELETE /2010-04-01/Accounts/{Account_SID}/Keys/{Key_SID}.json` +- **Process**: + 1. List accounts to get the Account SID + 2. Delete the API key using both Account SID and Key SID +- **Note**: Assumes TWILIOID is an API Key SID (starts with `SK`) + +### 6. NPM (`npm.yml`) +- **Rule IDs**: `kingfisher.npm.1`, `kingfisher.npm.2` +- **Revocation Type**: HttpMultiStep (2-step) +- **Endpoint**: `DELETE /-/npm/v1/tokens/token/{token_key}` +- **Process**: + 1. List all tokens to find the current token's key ID + 2. Revoke the token using its key +- **Alternative**: Can also use `npm token revoke ` CLI command + + +## Testing Revocation + +To test revocation for a detected token: + +```bash +# Revoke a token using the rule ID +kingfisher revoke --rule + +# With debug logging to see step-by-step execution +RUST_LOG=debug kingfisher revoke --rule + +# With additional variables if needed (e.g., for services with depends_on_rule) +kingfisher revoke --rule --var EXTRA_VAR=value +``` + +### Example: Revoking a SendGrid API Key + +```bash +# Revoke a SendGrid API key +kingfisher revoke --rule kingfisher.sendgrid.1 "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" +``` + +### Example: Revoking a MongoDB API Key + +```bash +# Revoke a MongoDB Atlas API key (requires both public and private key) +kingfisher revoke --rule kingfisher.mongodb.1 \ + --var PUBKEY=qj4Zrh8e6A \ + "4b18315e-6b7d-4337-b449-5d38f5a189ec" +``` + +## Implementation Details + +### Multi-Step Revocation Pattern + +All multi-step revocations follow this general pattern: + +```yaml +revocation: + type: HttpMultiStep + content: + steps: + # Step 1: Lookup + - name: lookup_id + request: + method: GET + url: https://api.service.com/endpoint + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - type: StatusMatch + status: [200] + - type: JsonValid + extract: + ID_VARIABLE: + type: JsonPath + path: "$.path.to.id" + + # Step 2: Delete + - name: delete + request: + method: DELETE + url: https://api.service.com/endpoint/{{ ID_VARIABLE }} + headers: + Authorization: "Bearer {{ TOKEN }}" + response_matcher: + - report_response: true + - type: StatusMatch + status: [200, 204] +``` + +### Variable Extraction Methods + +The following extraction methods are used across different services: + +| Method | Description | Example Services | +|--------|-------------|------------------| +| **JsonPath** | Extract from JSON response using JSONPath syntax | SendGrid, Netlify, Tailscale, ElevenLabs, NPM, MongoDB | +| **Regex** | Extract using regex with a capture group | (Not used in current implementations) | +| **Header** | Extract an HTTP response header value | (Not used in current implementations) | +| **Body** | Use the entire response body | (Not used in current implementations) | + +### Common JSONPath Patterns + +- `$.result[0].api_key_id` - SendGrid: Extract first API key ID from result array +- `$.keys[0].id` - Tailscale: Extract first key ID from keys object +- `$.results[0].id` - MongoDB: Extract first organization ID from results +- `$.objects[0].token.key` - NPM: Extract token key from objects array +- `$.accounts[0].sid` - Twilio: Extract account SID from accounts array + +## Security Considerations + +### Token Identification + +Some services (like SendGrid, NPM, and Tailscale) list all tokens but don't include the full token value in the response. The current implementations extract the **first** token from the list, which assumes: + +1. The user has only one active token, OR +2. The token being revoked is the first one in the list + +**Important**: If multiple tokens exist, the wrong token might be revoked. In production, consider: +- Adding user prompts to confirm which token to revoke +- Matching tokens by creation date, name, or other metadata +- Displaying a list of tokens for user selection + +### Digest Authentication + +MongoDB Atlas uses HTTP Digest authentication, which is properly handled by the Kingfisher HTTP client via the `digest` field in the request configuration. + + +## Limitations + +1. **Maximum 2 steps**: The HttpMultiStep implementation supports only 1-2 steps +2. **Sequential execution**: Steps execute in order; no parallel execution +3. **Token identification**: Services that don't return full token values may revoke the wrong token if multiple exist +4. **Requires API access**: All revocations require the token to have sufficient permissions to list and delete itself + +## Future Enhancements + +Potential improvements for revocation support: + +1. **Interactive mode**: Prompt user to select which token to revoke when multiple exist +2. **Dry-run mode**: Show what would be revoked without actually revoking +3. **Batch revocation**: Revoke multiple tokens at once +4. **Revocation history**: Track what was revoked and when +5. **Rollback support**: For services that support token restoration +6. **Service-specific CLI support**: For services like NPM that have CLI commands + +## References + +- [Multi-Step Revocation Implementation](MULTI_STEP_REVOCATION.md) +- [Writing Custom Rules](RULES.md) +- [Kingfisher Rules Schema](../crates/kingfisher-rules/src/rule.rs) diff --git a/pypi/README.md b/pypi/README.md new file mode 100644 index 0000000..a6cbc7e --- /dev/null +++ b/pypi/README.md @@ -0,0 +1,17 @@ +# Kingfisher (Python wheel) + +This package ships the Kingfisher CLI as a platform-specific Python wheel. +The `kingfisher` console script executes the bundled binary for your +OS/architecture. + +## Usage + +```bash +pip install kingfisher-bin +kingfisher --help +``` + +## Development + +Use the helper script in `scripts/build-pypi-wheel.sh` from the repo root to +build a wheel for a specific target after compiling the Rust binary. diff --git a/pypi/kingfisher/__init__.py b/pypi/kingfisher/__init__.py new file mode 100644 index 0000000..668a87e --- /dev/null +++ b/pypi/kingfisher/__init__.py @@ -0,0 +1,48 @@ +"""Python wrapper for the bundled Kingfisher binary.""" + +from __future__ import annotations + +import os +import stat +import subprocess +import sys +from pathlib import Path + +from ._version import __version__ + + +def _binary_name() -> str: + return "kingfisher.exe" if sys.platform == "win32" else "kingfisher" + + +def get_binary_path() -> str: + """Return the path to the bundled Kingfisher binary.""" + binary = Path(__file__).resolve().parent / "bin" / _binary_name() + + if not binary.exists(): + raise FileNotFoundError( + "Kingfisher binary not found. " + "This wheel may not match your platform." + ) + + if sys.platform != "win32": + current_mode = binary.stat().st_mode + if not (current_mode & stat.S_IXUSR): + binary.chmod( + current_mode + | stat.S_IXUSR + | stat.S_IXGRP + | stat.S_IXOTH + ) + + return os.fspath(binary) + + +def main() -> None: + """Execute the bundled Kingfisher binary.""" + binary = get_binary_path() + + if sys.platform == "win32": + raise SystemExit(subprocess.call([binary, *sys.argv[1:]])) + + os.execvp(binary, [binary, *sys.argv[1:]]) diff --git a/pypi/kingfisher/__main__.py b/pypi/kingfisher/__main__.py new file mode 100644 index 0000000..868d99e --- /dev/null +++ b/pypi/kingfisher/__main__.py @@ -0,0 +1,4 @@ +from . import main + +if __name__ == "__main__": + main() diff --git a/pypi/kingfisher/_version.py b/pypi/kingfisher/_version.py new file mode 100644 index 0000000..10aa336 --- /dev/null +++ b/pypi/kingfisher/_version.py @@ -0,0 +1 @@ +__version__ = "1.2.3" diff --git a/pypi/pyproject.toml b/pypi/pyproject.toml new file mode 100644 index 0000000..f485a2e --- /dev/null +++ b/pypi/pyproject.toml @@ -0,0 +1,43 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "kingfisher-bin" +description = "Kingfisher secret scanning CLI (packaged binary)" +readme = "README.md" +requires-python = ">=3.8" +license = { text = "Apache-2.0" } +authors = [ + { name = "MongoDB" } +] +classifiers = [ + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", +] +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/mongodb/kingfisher" +Repository = "https://github.com/mongodb/kingfisher" + +[project.scripts] +kingfisher = "kingfisher:main" + +[tool.hatch.version] +path = "kingfisher/_version.py" + +[tool.hatch.build] +ignore-vcs = true +include = [ + "kingfisher/**/*.py", + "kingfisher/bin/*", + "README.md", +] + +[tool.hatch.build.targets.wheel] +only-include = [ + "kingfisher", + "README.md", +] diff --git a/scripts/build-pypi-wheel.sh b/scripts/build-pypi-wheel.sh new file mode 100755 index 0000000..ea797bb --- /dev/null +++ b/scripts/build-pypi-wheel.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +set -euo pipefail + +usage() { + cat <<'USAGE' +Usage: + scripts/build-pypi-wheel.sh \ + --binary /path/to/kingfisher[.exe] \ + --version 1.2.3 \ + --plat-name manylinux_2_17_x86_64 \ + [--out-dir dist-pypi] + +Notes: + - Build the Rust binary for your target platform before running this script. + - Requires: python -m build (pip install build) +USAGE +} + +binary_path="" +version="" +plat_name="" +out_dir="dist-pypi" + +while [[ $# -gt 0 ]]; do + case "$1" in + --binary) + binary_path="$2" + shift 2 + ;; + --version) + version="$2" + shift 2 + ;; + --plat-name) + plat_name="$2" + shift 2 + ;; + --out-dir) + out_dir="$2" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage + exit 1 + ;; + esac +done + +PYTHON="${PYTHON:-}" + +if [[ -z "${PYTHON}" ]]; then + if command -v python >/dev/null 2>&1; then + PYTHON="python" + elif command -v python3 >/dev/null 2>&1; then + PYTHON="python3" + else + echo "Python not found. Install Python 3 (or set PYTHON=/path/to/python3)." >&2 + exit 1 + fi +fi + +# Ensure build module exists +"$PYTHON" -m build --version >/dev/null 2>&1 || { + echo "Installing Python build backend (build)..." >&2 + "$PYTHON" -m pip install -U build >/dev/null +} + + +# Resolve binary_path to an absolute, normalized path (works without realpath) +if [[ -z "$binary_path" ]]; then + echo "Missing --binary" >&2 + exit 1 +fi + +if [[ "$binary_path" != /* ]]; then + # interpret relative to the directory where the user invoked the script + binary_path="$PWD/$binary_path" +fi + +# Normalize path and verify it exists +if ! binary_path="$(cd "$(dirname "$binary_path")" && pwd)/$(basename "$binary_path")"; then + echo "Failed to resolve binary path: $binary_path" >&2 + exit 1 +fi + +if [[ ! -f "$binary_path" ]]; then + echo "Binary not found: $binary_path" >&2 + echo "Tip: check for typos (e.g. 'kiingfisher' vs 'kingfisher')." >&2 + exit 1 +fi + + +root_dir="$(git rev-parse --show-toplevel)" +cd "$root_dir" + +pkg_dir="$root_dir/pypi" +bin_dir="$pkg_dir/kingfisher/bin" + +mkdir -p "$bin_dir" "$out_dir" + +binary_name="kingfisher" +if [[ "$binary_path" == *.exe ]]; then + binary_name="kingfisher.exe" +fi + +cp "$binary_path" "$bin_dir/$binary_name" +chmod +x "$bin_dir/$binary_name" || true +test -x "$bin_dir/$binary_name" || { + echo "Binary copy failed: $bin_dir/$binary_name" >&2 + exit 1 +} +ls -la "$bin_dir/$binary_name" + + +cat > "$pkg_dir/kingfisher/_version.py" < BTreeSet { vars.extend(extract_template_vars(body)); } } + Revocation::HttpMultiStep(multi_step) => { + // Extract variables from all steps + // Note: Variables extracted in step 1 are available in step 2, + // but we only track initial input variables here + for step in &multi_step.steps { + vars.extend(extract_template_vars(&step.request.url)); + for (key, value) in &step.request.headers { + vars.extend(extract_template_vars(key)); + vars.extend(extract_template_vars(value)); + } + if let Some(body) = &step.request.body { + vars.extend(extract_template_vars(body)); + } + } + } } vars @@ -191,6 +209,70 @@ async fn render_and_parse_url( reqwest::Url::parse(&rendered).map_err(|e| anyhow!("Invalid URL '{}': {}", rendered, e)) } +/// Extract a value from an HTTP response using the specified extractor. +fn extract_value_from_response( + extractor: &ResponseExtractor, + body: &str, + headers: &reqwest::header::HeaderMap, + status: &reqwest::StatusCode, +) -> Result { + match extractor { + ResponseExtractor::JsonPath { path } => { + let json: serde_json::Value = + serde_json::from_str(body).context("Response body is not valid JSON")?; + + // Simple JSONPath implementation supporting basic paths like: + // $.field, $.field.nested, $.array[0], $.array[0].field + let path_parts: Vec<&str> = path.trim_start_matches("$.").split('.').collect(); + + let mut current = &json; + for part in path_parts { + if let Some((array_name, index_str)) = part.split_once('[') { + let index: usize = + index_str.trim_end_matches(']').parse().context("Invalid array index")?; + + if !array_name.is_empty() { + current = current + .get(array_name) + .ok_or_else(|| anyhow!("Field '{}' not found", array_name))?; + } + + current = current + .get(index) + .ok_or_else(|| anyhow!("Array index {} not found", index))?; + } else { + current = + current.get(part).ok_or_else(|| anyhow!("Field '{}' not found", part))?; + } + } + + match current { + serde_json::Value::String(s) => Ok(s.clone()), + serde_json::Value::Number(n) => Ok(n.to_string()), + serde_json::Value::Bool(b) => Ok(b.to_string()), + _ => Ok(current.to_string()), + } + } + ResponseExtractor::Regex { pattern } => { + let re = Regex::new(pattern).context(format!("Invalid regex pattern: {}", pattern))?; + let caps = re + .captures(body) + .ok_or_else(|| anyhow!("Regex pattern did not match response body"))?; + + caps.get(1) + .map(|m| m.as_str().to_string()) + .ok_or_else(|| anyhow!("No capture group found in regex pattern")) + } + ResponseExtractor::Header { name } => headers + .get(name) + .and_then(|v| v.to_str().ok()) + .map(|s| s.to_string()) + .ok_or_else(|| anyhow!("Header '{}' not found in response", name)), + ResponseExtractor::Body => Ok(body.to_string()), + ResponseExtractor::StatusCode => Ok(status.as_u16().to_string()), + } +} + /// Execute HTTP revocation against the provided rule. async fn execute_http_revocation( http_revocation: &HttpValidation, @@ -225,8 +307,7 @@ async fn execute_http_revocation( let status = response.status(); let headers = response.headers().clone(); - let body = - response.text().await.unwrap_or_else(|e| format!("Failed to read response body: {}", e)); + let body = response.text().await.context("Failed to read response body")?; let display_body = if body.len() > 500 { format!("{}...", &body[..500]) } else { body.clone() }; @@ -247,6 +328,138 @@ async fn execute_http_revocation( }) } +/// Execute a single revocation step and extract variables from the response. +async fn execute_revocation_step( + step: &RevocationStep, + globals: &mut Object, + client: &Client, + parser: &liquid::Parser, + timeout: Duration, + retries: u32, + step_number: usize, +) -> Result<(reqwest::StatusCode, reqwest::header::HeaderMap, String)> { + let default_step_name = format!("step_{}", step_number); + let step_name = step.name.as_ref().map(|s| s.as_str()).unwrap_or(&default_step_name); + + debug!("Executing revocation step {}: {}", step_number, step_name); + + let url = render_and_parse_url(parser, globals, &step.request.url).await?; + debug!("Step {} URL: {}", step_number, url); + + let request_builder = build_request_builder( + client, + &step.request.method, + &url, + &step.request.headers, + &step.request.body, + timeout, + parser, + globals, + ) + .map_err(|e| anyhow!("Failed to build request for {}: {}", step_name, e))?; + + let backoff_min = Duration::from_millis(100); + let backoff_max = Duration::from_secs(2); + + let response = retry_request(request_builder, retries, backoff_min, backoff_max) + .await + .map_err(|e| anyhow!("Request failed for {}: {}", step_name, e))?; + + let status = response.status(); + let headers = response.headers().clone(); + let body = response + .text() + .await + .with_context(|| format!("Failed to read response body for {}", step_name))?; + + // Extract variables from the response if configured + if let Some(extractors) = &step.extract { + debug!("Extracting {} variable(s) from step {} response", extractors.len(), step_number); + + for (var_name, extractor) in extractors { + match extract_value_from_response(extractor, &body, &headers, &status) { + Ok(value) => { + debug!("Step {}: Extracted variable {} = '{}'", step_number, var_name, value); + globals.insert(var_name.to_uppercase().into(), Value::scalar(value)); + } + Err(e) => { + return Err(anyhow!( + "Failed to extract variable '{}' in step {}: {}", + var_name, + step_number, + e + )); + } + } + } + } + + Ok((status, headers, body)) +} + +/// Execute multi-step HTTP revocation. +async fn execute_multi_step_revocation( + multi_step: &HttpMultiStepRevocation, + globals: &mut Object, + client: &Client, + parser: &liquid::Parser, + timeout: Duration, + retries: u32, +) -> Result { + if multi_step.steps.is_empty() { + bail!("Multi-step revocation must have at least one step"); + } + + if multi_step.steps.len() > 2 { + bail!( + "Multi-step revocation supports a maximum of 2 steps, got {}", + multi_step.steps.len() + ); + } + + let num_steps = multi_step.steps.len(); + debug!("Executing {}-step revocation", num_steps); + + // Execute each step sequentially + for (i, step) in multi_step.steps.iter().enumerate() { + let step_number = i + 1; + let is_final_step = step_number == num_steps; + + let (status, headers, body) = + execute_revocation_step(step, globals, client, parser, timeout, retries, step_number) + .await?; + + if is_final_step { + // Final step: validate response to determine success + let display_body = + if body.len() > 500 { format!("{}...", &body[..500]) } else { body.clone() }; + + let matchers = step + .request + .response_matcher + .as_deref() + .ok_or_else(|| anyhow!("Final revocation step must have response_matcher"))?; + + let html_allowed = step.request.response_is_html; + let revoked = validate_response(matchers, &body, &status, &headers, html_allowed); + + return Ok(DirectRevocationResult { + rule_id: String::new(), + rule_name: String::new(), + revoked, + status_code: Some(status.as_u16()), + message: display_body, + }); + } else { + // Intermediate step: just log the response + debug!("Step {} completed with status {}", step_number, status); + } + } + + // This should never happen due to the checks above, but keep for safety + Err(anyhow!("Multi-step revocation did not complete")) +} + /// Run direct revocation of a secret against one or more rules. pub async fn run_direct_revocation( args: &RevokeArgs, @@ -405,6 +618,18 @@ pub async fn run_direct_revocation( ) .await? } + Revocation::HttpMultiStep(multi_step) => { + let mut globals_mut = globals.clone(); + execute_multi_step_revocation( + multi_step, + &mut globals_mut, + &client, + &parser, + timeout, + args.retries, + ) + .await? + } }; result.rule_id = rule_id; @@ -468,3 +693,499 @@ pub fn print_results(results: &[DirectRevocationResult], format: &str, use_color pub fn any_revoked(results: &[DirectRevocationResult]) -> bool { results.iter().any(|r| r.revoked) } + +#[cfg(test)] +mod tests { + use super::*; + use kingfisher_rules::{HttpValidation, ResponseExtractor, Revocation}; + use reqwest::header::{HeaderMap, HeaderValue}; + use reqwest::StatusCode; + use std::collections::{BTreeMap, BTreeSet}; + + // ---- extract_value_from_response: JsonPath ---- + + #[test] + fn jsonpath_simple_field() { + let ext = ResponseExtractor::JsonPath { path: "$.name".into() }; + let body = r#"{"name":"alice"}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "alice"); + } + + #[test] + fn jsonpath_nested_field() { + let ext = ResponseExtractor::JsonPath { path: "$.data.user.id".into() }; + let body = r#"{"data":{"user":{"id":"u-123"}}}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "u-123"); + } + + #[test] + fn jsonpath_numeric_value() { + let ext = ResponseExtractor::JsonPath { path: "$.count".into() }; + let body = r#"{"count":42}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "42"); + } + + #[test] + fn jsonpath_boolean_value() { + let ext = ResponseExtractor::JsonPath { path: "$.active".into() }; + let body = r#"{"active":true}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "true"); + } + + #[test] + fn jsonpath_array_index_zero() { + let ext = ResponseExtractor::JsonPath { path: "$.items[0]".into() }; + let body = r#"{"items":["first","second","third"]}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "first"); + } + + #[test] + fn jsonpath_array_index_nested_field() { + let ext = ResponseExtractor::JsonPath { path: "$.items[0].token_id".into() }; + let body = r#"{"items":[{"token_id":"tok-abc"},{"token_id":"tok-def"}]}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "tok-abc"); + } + + #[test] + fn jsonpath_array_second_element() { + let ext = ResponseExtractor::JsonPath { path: "$.data[1].name".into() }; + let body = r#"{"data":[{"name":"a"},{"name":"b"}]}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "b"); + } + + #[test] + fn jsonpath_missing_top_level_field() { + let ext = ResponseExtractor::JsonPath { path: "$.nonexistent".into() }; + let body = r#"{"name":"alice"}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + let err = result.unwrap_err(); + assert!(err.to_string().contains("not found"), "Expected 'not found', got: {}", err); + } + + #[test] + fn jsonpath_missing_nested_field() { + let ext = ResponseExtractor::JsonPath { path: "$.data.missing.deep".into() }; + let body = r#"{"data":{"other":"value"}}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert!(result.is_err()); + } + + #[test] + fn jsonpath_array_index_out_of_bounds() { + let ext = ResponseExtractor::JsonPath { path: "$.items[5]".into() }; + let body = r#"{"items":["only","two"]}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + let err = result.unwrap_err(); + assert!(err.to_string().contains("not found"), "Expected 'not found', got: {}", err); + } + + #[test] + fn jsonpath_invalid_json_body() { + let ext = ResponseExtractor::JsonPath { path: "$.field".into() }; + let body = "not json at all"; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("not valid JSON"), + "Expected JSON parse error, got: {}", + err + ); + } + + #[test] + fn jsonpath_object_value_returns_json_string() { + let ext = ResponseExtractor::JsonPath { path: "$.nested".into() }; + let body = r#"{"nested":{"a":1,"b":2}}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + let val = result.unwrap(); + // When the value is not a string/number/bool, it should be serialized as JSON + let parsed: serde_json::Value = serde_json::from_str(&val).unwrap(); + assert_eq!(parsed["a"], 1); + assert_eq!(parsed["b"], 2); + } + + // ---- extract_value_from_response: Regex ---- + + #[test] + fn regex_with_capture_group() { + let ext = ResponseExtractor::Regex { pattern: r#"token_id":\s*"([^"]+)"#.into() }; + let body = r#"{"token_id": "abc-123-def"}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "abc-123-def"); + } + + #[test] + fn regex_no_capture_group() { + let ext = ResponseExtractor::Regex { pattern: r"token_id".into() }; + let body = r#"{"token_id": "abc"}"#; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("No capture group"), + "Expected 'No capture group', got: {}", + err + ); + } + + #[test] + fn regex_pattern_does_not_match() { + let ext = ResponseExtractor::Regex { pattern: r"xyz_(\d+)".into() }; + let body = "no match here"; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("did not match"), + "Expected 'did not match', got: {}", + err + ); + } + + #[test] + fn regex_invalid_pattern() { + let ext = ResponseExtractor::Regex { pattern: r"[invalid".into() }; + let body = "anything"; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert!(result.is_err()); + } + + #[test] + fn regex_multiple_capture_groups_uses_first() { + let ext = ResponseExtractor::Regex { pattern: r"(\w+):(\w+)".into() }; + let body = "key:value"; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "key"); + } + + // ---- extract_value_from_response: Header ---- + + #[test] + fn header_extraction_found() { + let ext = ResponseExtractor::Header { name: "x-request-id".into() }; + let mut headers = HeaderMap::new(); + headers.insert("x-request-id", HeaderValue::from_static("req-456")); + let result = extract_value_from_response(&ext, "", &headers, &StatusCode::OK); + assert_eq!(result.unwrap(), "req-456"); + } + + #[test] + fn header_extraction_missing() { + let ext = ResponseExtractor::Header { name: "x-missing".into() }; + let result = extract_value_from_response(&ext, "", &HeaderMap::new(), &StatusCode::OK); + let err = result.unwrap_err(); + assert!(err.to_string().contains("not found"), "Expected 'not found', got: {}", err); + } + + // ---- extract_value_from_response: Body ---- + + #[test] + fn body_extraction() { + let ext = ResponseExtractor::Body; + let body = "the full response body"; + let result = extract_value_from_response(&ext, body, &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "the full response body"); + } + + #[test] + fn body_extraction_empty() { + let ext = ResponseExtractor::Body; + let result = extract_value_from_response(&ext, "", &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), ""); + } + + // ---- extract_value_from_response: StatusCode ---- + + #[test] + fn status_code_extraction_200() { + let ext = ResponseExtractor::StatusCode; + let result = extract_value_from_response(&ext, "", &HeaderMap::new(), &StatusCode::OK); + assert_eq!(result.unwrap(), "200"); + } + + #[test] + fn status_code_extraction_404() { + let ext = ResponseExtractor::StatusCode; + let result = + extract_value_from_response(&ext, "", &HeaderMap::new(), &StatusCode::NOT_FOUND); + assert_eq!(result.unwrap(), "404"); + } + + #[test] + fn status_code_extraction_201() { + let ext = ResponseExtractor::StatusCode; + let result = extract_value_from_response(&ext, "", &HeaderMap::new(), &StatusCode::CREATED); + assert_eq!(result.unwrap(), "201"); + } + + // ---- extract_template_vars ---- + + #[test] + fn template_vars_basic() { + let vars = extract_template_vars("https://api.example.com/{{ TOKEN }}/revoke"); + assert!(vars.contains("TOKEN")); + assert_eq!(vars.len(), 1); + } + + #[test] + fn template_vars_multiple() { + let vars = extract_template_vars( + "https://api.example.com/{{ AKID }}/keys/{{ KEY_ID }}?token={{ TOKEN }}", + ); + assert!(vars.contains("AKID")); + assert!(vars.contains("KEY_ID")); + assert!(vars.contains("TOKEN")); + assert_eq!(vars.len(), 3); + } + + #[test] + fn template_vars_with_filters() { + let vars = extract_template_vars("{{ TOKEN | base64_encode }}"); + assert!(vars.contains("TOKEN")); + assert_eq!(vars.len(), 1); + } + + #[test] + fn template_vars_no_vars() { + let vars = extract_template_vars("https://api.example.com/revoke"); + assert!(vars.is_empty()); + } + + #[test] + fn template_vars_case_normalization() { + // Variables are uppercased on extraction + let vars = extract_template_vars("{{ token }}"); + assert!(vars.contains("TOKEN")); + } + + // ---- build_globals ---- + + #[test] + fn build_globals_sets_token() { + let template_vars = BTreeSet::from(["TOKEN".to_string()]); + let globals = build_globals("my-secret", &[], &[], &template_vars).unwrap(); + assert_eq!(globals.get("TOKEN"), Some(Value::scalar("my-secret".to_string())).as_ref()); + } + + #[test] + fn build_globals_auto_assigns_args() { + let template_vars = + BTreeSet::from(["TOKEN".to_string(), "AKID".to_string(), "REGION".to_string()]); + let args = vec!["my-akid".to_string(), "us-east-1".to_string()]; + let globals = build_globals("secret", &args, &[], &template_vars).unwrap(); + + assert_eq!(globals.get("TOKEN"), Some(Value::scalar("secret".to_string())).as_ref()); + assert_eq!(globals.get("AKID"), Some(Value::scalar("my-akid".to_string())).as_ref()); + assert_eq!(globals.get("REGION"), Some(Value::scalar("us-east-1".to_string())).as_ref()); + } + + #[test] + fn build_globals_explicit_variables() { + let template_vars = BTreeSet::from(["TOKEN".to_string(), "AKID".to_string()]); + let vars = vec!["AKID=explicit-value".to_string()]; + let globals = build_globals("secret", &[], &vars, &template_vars).unwrap(); + + assert_eq!(globals.get("AKID"), Some(Value::scalar("explicit-value".to_string())).as_ref()); + } + + #[test] + fn build_globals_invalid_var_format() { + let template_vars = BTreeSet::new(); + let vars = vec!["NO_EQUALS_SIGN".to_string()]; + let result = build_globals("secret", &[], &vars, &template_vars); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("Expected NAME=VALUE")); + } + + #[test] + fn build_globals_empty_var_name() { + let template_vars = BTreeSet::new(); + let vars = vec!["=value".to_string()]; + let result = build_globals("secret", &[], &vars, &template_vars); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("cannot be empty")); + } + + // ---- extract_revocation_vars ---- + + #[test] + fn extract_revocation_vars_aws() { + let vars = extract_revocation_vars(&Revocation::AWS); + assert!(vars.contains("AKID")); + assert!(vars.contains("TOKEN")); + } + + #[test] + fn extract_revocation_vars_gcp() { + let vars = extract_revocation_vars(&Revocation::GCP); + assert!(vars.contains("TOKEN")); + } + + #[test] + fn extract_revocation_vars_http() { + use kingfisher_rules::HttpRequest; + + let http = HttpValidation { + request: HttpRequest { + method: "DELETE".into(), + url: "https://api.example.com/{{ AKID }}/{{ TOKEN }}".into(), + headers: BTreeMap::from([("Authorization".into(), "Bearer {{ TOKEN }}".into())]), + body: Some(r#"{"key":"{{ KEY_ID }}"}"#.into()), + response_matcher: None, + multipart: None, + response_is_html: false, + }, + multipart: None, + }; + let vars = extract_revocation_vars(&Revocation::Http(http)); + assert!(vars.contains("AKID")); + assert!(vars.contains("TOKEN")); + assert!(vars.contains("KEY_ID")); + } + + #[test] + fn extract_revocation_vars_multi_step() { + use kingfisher_rules::{HttpMultiStepRevocation, HttpRequest, RevocationStep}; + + let multi = HttpMultiStepRevocation { + steps: vec![ + RevocationStep { + name: Some("lookup".into()), + request: HttpRequest { + method: "GET".into(), + url: "https://api.example.com/{{ TOKEN }}/info".into(), + headers: BTreeMap::new(), + body: None, + response_matcher: None, + multipart: None, + response_is_html: false, + }, + multipart: None, + extract: None, + }, + RevocationStep { + name: Some("delete".into()), + request: HttpRequest { + method: "DELETE".into(), + url: "https://api.example.com/{{ KEY_ID }}".into(), + headers: BTreeMap::from([("X-Api-Key".into(), "{{ API_KEY }}".into())]), + body: None, + response_matcher: None, + multipart: None, + response_is_html: false, + }, + multipart: None, + extract: None, + }, + ], + }; + let vars = extract_revocation_vars(&Revocation::HttpMultiStep(multi)); + assert!(vars.contains("TOKEN")); + assert!(vars.contains("KEY_ID")); + assert!(vars.contains("API_KEY")); + } + + // ---- find_rules_by_selector ---- + + fn make_test_rule(id: &str, name: &str) -> Rule { + Rule::new(kingfisher_rules::RuleSyntax { + name: name.to_string(), + id: id.to_string(), + pattern: r"\btest\b".to_string(), + min_entropy: 0.0, + confidence: Default::default(), + visible: true, + examples: vec![], + negative_examples: vec![], + references: vec![], + validation: None, + revocation: None, + depends_on_rule: vec![], + pattern_requirements: None, + tls_mode: None, + }) + } + + #[test] + fn find_rules_exact_match() { + let mut rules = BTreeMap::new(); + rules.insert( + "kingfisher.github.1".into(), + make_test_rule("kingfisher.github.1", "GitHub Token"), + ); + rules.insert( + "kingfisher.gitlab.1".into(), + make_test_rule("kingfisher.gitlab.1", "GitLab Token"), + ); + + let matched = find_rules_by_selector("kingfisher.github.1", &rules).unwrap(); + assert_eq!(matched.len(), 1); + assert_eq!(matched[0].id(), "kingfisher.github.1"); + } + + #[test] + fn find_rules_prefix_match() { + let mut rules = BTreeMap::new(); + rules.insert( + "kingfisher.github.1".into(), + make_test_rule("kingfisher.github.1", "GitHub PAT"), + ); + rules.insert( + "kingfisher.github.2".into(), + make_test_rule("kingfisher.github.2", "GitHub App"), + ); + rules.insert( + "kingfisher.gitlab.1".into(), + make_test_rule("kingfisher.gitlab.1", "GitLab Token"), + ); + + let matched = find_rules_by_selector("kingfisher.github", &rules).unwrap(); + assert_eq!(matched.len(), 2); + } + + #[test] + fn find_rules_auto_prefix_kingfisher() { + let mut rules = BTreeMap::new(); + rules.insert( + "kingfisher.github.1".into(), + make_test_rule("kingfisher.github.1", "GitHub Token"), + ); + + // Searching without "kingfisher." prefix should still find the rule + let matched = find_rules_by_selector("github.1", &rules).unwrap(); + assert_eq!(matched.len(), 1); + assert_eq!(matched[0].id(), "kingfisher.github.1"); + } + + #[test] + fn find_rules_no_match() { + let mut rules = BTreeMap::new(); + rules.insert( + "kingfisher.github.1".into(), + make_test_rule("kingfisher.github.1", "GitHub Token"), + ); + + let result = find_rules_by_selector("nonexistent", &rules); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("No rule found")); + } + + #[test] + fn find_rules_prefix_boundary() { + // "kingfisher.git" should NOT match "kingfisher.github.1" because + // "github" does not start after a '.' boundary following "git" + let mut rules = BTreeMap::new(); + rules.insert( + "kingfisher.github.1".into(), + make_test_rule("kingfisher.github.1", "GitHub Token"), + ); + + let result = find_rules_by_selector("kingfisher.git", &rules); + assert!(result.is_err(), "Prefix 'kingfisher.git' should not match 'kingfisher.github.1'"); + } +} diff --git a/src/reporter.rs b/src/reporter.rs index 27d4a3b..9352774 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -121,6 +121,15 @@ fn build_revoke_command( escape_for_shell(snippet) )) } + Revocation::HttpMultiStep(_) => { + // Multi-step HTTP revocation with dependent variables + Some(format!( + "kingfisher revoke --rule {} {}{}", + rule_id, + var_args, + escape_for_shell(snippet) + )) + } } } diff --git a/src/validation.rs b/src/validation.rs index 5279772..0671e3f 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -7,7 +7,6 @@ use std::{ }; use anyhow::Result; -use crossbeam_skiplist::SkipMap; use dashmap::DashMap; use http::StatusCode; use liquid::Object; @@ -23,24 +22,20 @@ use crate::{ location::OffsetSpan, matcher::{OwnedBlobMatch, SerializableCaptures}, rules::rule::Validation, - validation_body::{self, ValidationResponseBody}, + validation_body::{self}, }; // Re-export TlsMode from kingfisher_rules for use in client_for_rule pub use kingfisher_rules::TlsMode as RuleTlsMode; -pub mod aws; -pub mod azure; -pub mod coinbase; -pub mod gcp; -pub mod httpvalidation; -pub mod jdbc; -pub mod jwt; -pub mod mongodb; -pub mod mysql; -pub mod postgres; -pub use mysql::validate_mysql; -pub use postgres::validate_postgres; +pub use kingfisher_scanner::validation::aws; +pub use kingfisher_scanner::validation::http_validation as httpvalidation; +pub use kingfisher_scanner::validation::mysql::validate_mysql; +pub use kingfisher_scanner::validation::postgres::validate_postgres; +pub use kingfisher_scanner::validation::CachedResponse; +pub use kingfisher_scanner::validation::{ + azure, coinbase, gcp, jdbc, jwt, mongodb, mysql, postgres, +}; pub mod utils; const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes @@ -88,7 +83,8 @@ pub fn set_user_agent_suffix>(suffix: Option) { return; } - let _ = USER_AGENT_SUFFIX.set(trimmed); + let _ = USER_AGENT_SUFFIX.set(trimmed.clone()); + kingfisher_scanner::validation::set_user_agent_suffix(Some(trimmed)); } } @@ -158,7 +154,7 @@ impl ValidationClients { } // Use SkipMap-based cache instead of a mutex-wrapped FxHashMap. -type Cache = Arc>; +type Cache = kingfisher_scanner::validation::Cache; /// Returns an opaque 64-bit key for internal validation deduplication. /// @@ -227,24 +223,6 @@ pub fn is_parseable_mysql_uri(uri: &str) -> bool { mysql::parse_mysql_url(uri).is_ok() } -#[derive(Clone)] -pub struct CachedResponse { - pub body: ValidationResponseBody, - pub status: StatusCode, - pub is_valid: bool, - pub timestamp: Instant, -} - -impl CachedResponse { - pub fn new(body: ValidationResponseBody, status: StatusCode, is_valid: bool) -> Self { - Self { body, status, is_valid, timestamp: Instant::now() } - } - - pub fn is_still_valid(&self, cache_duration: Duration) -> bool { - self.timestamp.elapsed() < cache_duration - } -} - /// Collect dependent variables and missing dependencies from the provided matches. pub fn collect_variables_and_dependencies( matches: &[OwnedBlobMatch], diff --git a/src/validation/aws.rs b/src/validation/aws.rs deleted file mode 100644 index b597d82..0000000 --- a/src/validation/aws.rs +++ /dev/null @@ -1,453 +0,0 @@ -use std::{collections::HashSet, sync::RwLock, time::Duration}; - -use anyhow::{anyhow, Result}; -use aws_config::{retry::RetryConfig, BehaviorVersion, SdkConfig}; -use aws_credential_types::Credentials; -use aws_sdk_iam::{ - config::Builder as IamConfigBuilder, error::SdkError as IamSdkError, - operation::update_access_key::UpdateAccessKeyError, types::StatusType, Client as IamClient, -}; -use aws_sdk_sts::{ - config::Builder as StsConfigBuilder, error::SdkError, - operation::get_caller_identity::GetCallerIdentityError, Client as StsClient, -}; -use aws_smithy_http_client::{ - proxy::ProxyConfig, tls, Builder as HttpClientBuilder, ConnectorBuilder, -}; -use aws_smithy_runtime_api::{ - box_error::BoxError, - client::{ - http::SharedHttpClient, - interceptors::{context::BeforeTransmitInterceptorContextMut, Intercept}, - runtime_components::RuntimeComponents, - }, -}; -use aws_smithy_types::config_bag::ConfigBag; -use aws_types::region::Region; -use base32::Alphabet; -use byteorder::{BigEndian, ByteOrder}; -use http::{ - header::{HeaderValue, USER_AGENT}, - StatusCode, -}; -use once_cell::sync::{Lazy, OnceCell}; -use rand::{rng, Rng}; -use regex::Regex; -use tokio::{ - sync::Semaphore, - time::{sleep, timeout}, -}; - -use crate::validation::GLOBAL_USER_AGENT; - -static AWS_VALIDATION_SEMAPHORE: OnceCell = OnceCell::new(); -const BUILTIN_SKIP_ACCOUNT_IDS: &[&str] = &[ - "052310077262", - "171436882533", - "528757803018", - "534261010715", - "538784191382", - "595918472158", - "729780141977", - "893192397702", - "992382622183", -]; - -static AWS_SKIP_ACCOUNT_IDS: Lazy>> = Lazy::new(|| { - let mut set = HashSet::new(); - set.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string())); - RwLock::new(set) -}); - -fn build_http_client() -> SharedHttpClient { - HttpClientBuilder::new().build_with_connector_fn(|settings, runtime_components| { - let mut conn_builder = ConnectorBuilder::default() - .tls_provider(tls::Provider::Rustls(tls::rustls_provider::CryptoMode::AwsLc)); - - conn_builder.set_connector_settings(settings.cloned()); - if let Some(components) = runtime_components { - conn_builder.set_sleep_impl(components.sleep_impl()); - } - conn_builder.set_proxy_config(Some(ProxyConfig::from_env())); - conn_builder.build() - }) -} - -async fn build_base_config(credentials: Credentials) -> SdkConfig { - let retry_config = RetryConfig::adaptive().with_max_attempts(3); - aws_config::defaults(BehaviorVersion::latest()) - .region(Region::new("us-east-1")) - .credentials_provider(credentials) - .http_client(build_http_client()) - .retry_config(retry_config) - .load() - .await -} - -fn extract_account_id(input: &str) -> Option { - let trimmed = input.trim(); - if trimmed.len() == 12 && trimmed.chars().all(|c| c.is_ascii_digit()) { - return Some(trimmed.to_string()); - } - - static ACCOUNT_ID_RE: Lazy = Lazy::new(|| Regex::new(r"(\d{12})").expect("valid regex")); - ACCOUNT_ID_RE.captures(trimmed).and_then(|caps| caps.get(1)).map(|m| m.as_str().to_string()) -} - -/// Set the maximum number of concurrent AWS validations. Call before first use. -pub fn set_aws_validation_concurrency(max: usize) { - AWS_VALIDATION_SEMAPHORE.set(Semaphore::new(max)).ok(); -} - -fn aws_validation_semaphore() -> &'static Semaphore { - AWS_VALIDATION_SEMAPHORE.get_or_init(|| Semaphore::new(15)) -} - -pub fn set_aws_skip_account_ids(ids: I) -where - I: IntoIterator, - S: Into, -{ - let mut guard = match AWS_SKIP_ACCOUNT_IDS.write() { - Ok(g) => g, - Err(poisoned) => poisoned.into_inner(), - }; - guard.clear(); - - guard.extend(BUILTIN_SKIP_ACCOUNT_IDS.iter().map(|id| id.to_string())); - - for raw in ids.into_iter() { - let value = raw.into(); - if value.trim().is_empty() { - continue; - } - if let Some(normalized) = extract_account_id(&value) { - guard.insert(normalized); - } else { - tracing::warn!("Ignoring invalid AWS account ID in skip list: {value}"); - } - } -} - -pub fn should_skip_aws_validation(access_key_id: &str) -> Option { - let guard = AWS_SKIP_ACCOUNT_IDS.read().ok()?; - if guard.is_empty() { - return None; - } - - let account = aws_key_to_account_number(access_key_id).ok()?; - if guard.contains(&account) { - Some(account) - } else { - None - } -} - -#[derive(Debug)] -struct UaInterceptor; - -impl Intercept for UaInterceptor { - fn name(&self) -> &'static str { - "ua" - } - - fn modify_before_transmit( - &self, - context: &mut BeforeTransmitInterceptorContextMut<'_>, - _rc: &RuntimeComponents, - _cfg: &mut ConfigBag, - ) -> std::result::Result<(), BoxError> { - let req = context.request_mut(); - req.headers_mut().insert( - USER_AGENT, - HeaderValue::from_str(GLOBAL_USER_AGENT.as_str()) - .map_err(|e| format!("invalid USER_AGENT header: {e}"))?, - ); - Ok(()) - } -} - -/// Generate a standardized cache key for AWS validation attempts -pub fn generate_aws_cache_key(aws_access_key_id: &str, aws_secret_access_key: &str) -> String { - use sha1::{Digest, Sha1}; - let mut hasher = Sha1::new(); - hasher.update(aws_access_key_id.as_bytes()); - hasher.update(b"\0"); - hasher.update(aws_secret_access_key.as_bytes()); - format!("AWS:{:x}", hasher.finalize()) -} - -// Validate AWS credentials before attempting validation -pub fn validate_aws_credentials_input(access_key_id: &str, secret_key: &str) -> Result<(), String> { - // Validate access key ID format (typically starts with "AKIA" and is 20 chars) - if !access_key_id.starts_with("AKIA") || access_key_id.len() != 20 { - return Err("Invalid AWS access key ID format".to_string()); - } - // Validate secret key format (should be at least 40 chars) - if secret_key.len() < 40 { - return Err("Invalid AWS secret key format".to_string()); - } - // Check for invalid characters - if !access_key_id.chars().all(|c| c.is_ascii_alphanumeric()) { - return Err("AWS access key ID contains invalid characters".to_string()); - } - - Ok(()) -} - -fn is_throttling_or_transient(e: &SdkError) -> bool { - match e { - SdkError::ServiceError(ctx) => { - let code = ctx.err().meta().code().unwrap_or_default(); - let status: StatusCode = ctx.raw().status().into(); - code.contains("Throttl") - || status == StatusCode::TOO_MANY_REQUESTS - || status == StatusCode::SERVICE_UNAVAILABLE - } - SdkError::DispatchFailure(df) => df.is_timeout() || df.is_io(), - SdkError::ResponseError(ctx) => { - let status: StatusCode = ctx.raw().status().into(); - status == StatusCode::TOO_MANY_REQUESTS || status == StatusCode::SERVICE_UNAVAILABLE - } - _ => false, - } -} - -fn is_iam_throttling_or_transient(e: &IamSdkError) -> bool { - match e { - IamSdkError::ServiceError(ctx) => { - let code = ctx.err().meta().code().unwrap_or_default(); - let status: StatusCode = ctx.raw().status().into(); - code.contains("Throttl") - || status == StatusCode::TOO_MANY_REQUESTS - || status == StatusCode::SERVICE_UNAVAILABLE - } - IamSdkError::DispatchFailure(df) => df.is_timeout() || df.is_io(), - IamSdkError::ResponseError(ctx) => { - let status: StatusCode = ctx.raw().status().into(); - status == StatusCode::TOO_MANY_REQUESTS || status == StatusCode::SERVICE_UNAVAILABLE - } - _ => false, - } -} - -pub async fn revoke_aws_access_key( - aws_access_key_id: &str, - aws_secret_access_key: &str, -) -> Result<(bool, String)> { - // Create static credentials - let credentials = Credentials::new( - aws_access_key_id, - aws_secret_access_key, - None, // session token - None, // expiry - "static", // provider name - ); - let config = build_base_config(credentials).await; - - // Create IAM client - let iam_config = IamConfigBuilder::from(&config).interceptor(UaInterceptor).build(); - let iam_client = IamClient::from_conf(iam_config); - - const MAX_ATTEMPTS: usize = 3; - const ATTEMPT_TIMEOUT: Duration = Duration::from_secs(5); - - for attempt in 1..=MAX_ATTEMPTS { - let result = timeout( - ATTEMPT_TIMEOUT, - iam_client - .update_access_key() - .access_key_id(aws_access_key_id) - .status(StatusType::Inactive) - .send(), - ) - .await; - - match result { - Ok(Ok(_)) => { - return Ok((true, "AWS access key set to Inactive".to_string())); - } - Ok(Err(e)) => { - if is_iam_throttling_or_transient(&e) { - if attempt == MAX_ATTEMPTS { - return Err(anyhow!("AWS revocation failed: {}", e)); - } - } else { - return Ok((false, e.to_string())); - } - } - Err(_) => { - if attempt == MAX_ATTEMPTS { - return Err(anyhow!("AWS revocation timed out")); - } - } - } - - let max_delay = 100u64 * 2u64.pow((attempt - 1) as u32); - let sleep_ms = rng().random_range(0..=max_delay); - sleep(Duration::from_millis(sleep_ms)).await; - } - - Err(anyhow!("AWS revocation failed")) -} -pub async fn validate_aws_credentials( - aws_access_key_id: &str, - aws_secret_access_key: &str, -) -> Result<(bool, String)> { - let _permit = aws_validation_semaphore().acquire().await.expect("semaphore closed"); - - // Create static credentials - let credentials = Credentials::new( - aws_access_key_id, - aws_secret_access_key, - None, // session token - None, // expiry - "static", // provider name - ); - let config = build_base_config(credentials).await; - - // Create STS client - let sts_config = StsConfigBuilder::from(&config).interceptor(UaInterceptor).build(); - let sts_client = StsClient::from_conf(sts_config); - - const MAX_ATTEMPTS: usize = 3; - const ATTEMPT_TIMEOUT: Duration = Duration::from_secs(5); - - for attempt in 1..=MAX_ATTEMPTS { - let result = timeout(ATTEMPT_TIMEOUT, sts_client.get_caller_identity().send()).await; - match result { - Ok(Ok(identity)) => { - let arn = identity.arn.unwrap_or_else(|| "Unknown".to_string()); - return Ok((true, arn)); - } - Ok(Err(e)) => { - if is_throttling_or_transient(&e) { - if attempt == MAX_ATTEMPTS { - return Err(anyhow!("AWS validation failed: {}", e)); - } - } else { - return Ok((false, e.to_string())); - } - } - Err(_) => { - if attempt == MAX_ATTEMPTS { - return Err(anyhow!("AWS validation timed out")); - } - } - } - let max_delay = 100u64 * 2u64.pow((attempt - 1) as u32); - let sleep_ms = rng().random_range(0..=max_delay); - sleep(Duration::from_millis(sleep_ms)).await; - } - Err(anyhow!("AWS validation failed")) -} - -/// Converts an AWS Key ID to an AWS Account Number. -/// It assumes that the Key ID has a specific format and extracts the account -/// number encoded within it. Reference: https://medium.com/@TalBeerySec/a-short-note-on-aws-key-id-f88cc4317489 -pub fn aws_key_to_account_number(aws_key_id: &str) -> Result> { - // Ensure the AWS Key ID is at least 5 characters long (since we'll access index - // 4) - if aws_key_id.len() < 5 { - return Err("AWSKeyID is too short".into()); - } - // Check if the 5th character is 'I' or 'J' - let fifth_char = aws_key_id.as_bytes()[4] as char; - if fifth_char == 'I' || fifth_char == 'J' { - let err_msg = - format!("Not possible to retrieve account number for {} keys", &aws_key_id[..5]); - return Err(err_msg.into()); - } - // Remove the Key ID prefix (first 4 characters) - let trimmed_aws_key_id = &aws_key_id[4..]; - // Decode the trimmed Key ID from base32, ensuring it's in uppercase - let decoded = - base32::decode(Alphabet::Rfc4648 { padding: false }, &trimmed_aws_key_id.to_uppercase()) - .ok_or("Error decoding AWSKeyID")?; - if decoded.len() < 6 { - return Err("Decoded AWSKeyID is too short".into()); - } - // Create an 8-byte array initialized to zeros - let mut data = [0u8; 8]; - // Copy decoded[0..6] into data[2..8] - data[2..8].copy_from_slice(&decoded[0..6]); - // Interpret data as a big-endian u64 - let z = BigEndian::read_u64(&data); - // Define the mask - const MASK: u64 = 0x7FFFFFFFFF80; - // Calculate the account number - let account_num = (z & MASK) >> 7; - // Return the account number formatted as a 12-digit string - Ok(format!("{:012}", account_num)) -} - -#[cfg(test)] -mod tests { - use super::*; - use once_cell::sync::Lazy; - use std::sync::Mutex; - - static TEST_GUARD: Lazy> = Lazy::new(|| Mutex::new(())); - - #[test] - fn skip_account_list_normalizes_inputs() { - let _lock = TEST_GUARD.lock().unwrap(); - - set_aws_skip_account_ids([ - " 052310077262 ", - "arn:aws:iam::171436882533:role/demo", - "invalid", - ]); - - let guard = AWS_SKIP_ACCOUNT_IDS.read().unwrap(); - assert!(guard.contains("052310077262")); - assert!(guard.contains("171436882533")); - assert_eq!(guard.len(), BUILTIN_SKIP_ACCOUNT_IDS.len()); - drop(guard); - - set_aws_skip_account_ids(Vec::::new()); - } - - #[test] - fn should_skip_when_account_matches() { - let _lock = TEST_GUARD.lock().unwrap(); - - set_aws_skip_account_ids(["534261010715"]); - assert_eq!( - should_skip_aws_validation("AKIAXYZDQCEN4B6JSJQI"), - Some("534261010715".to_string()) - ); - - set_aws_skip_account_ids(Vec::::new()); - } - - #[test] - fn builtin_canary_accounts_are_preseeded() { - let _lock = TEST_GUARD.lock().unwrap(); - - set_aws_skip_account_ids(Vec::::new()); - assert_eq!( - should_skip_aws_validation("AKIAXYZDQCEN4B6JSJQI"), - Some("534261010715".to_string()) - ); - - set_aws_skip_account_ids(Vec::::new()); - } - - #[test] - fn duplicate_accounts_are_deduplicated() { - let _lock = TEST_GUARD.lock().unwrap(); - - set_aws_skip_account_ids([ - "534261010715", - "arn:aws:iam::534261010715:user/canarytokens", - " 534261010715 ", - ]); - - let guard = AWS_SKIP_ACCOUNT_IDS.read().unwrap(); - assert_eq!(guard.iter().filter(|id| id.as_str() == "534261010715").count(), 1); - drop(guard); - - set_aws_skip_account_ids(Vec::::new()); - } -} diff --git a/src/validation/httpvalidation.rs b/src/validation/httpvalidation.rs deleted file mode 100644 index c08c890..0000000 --- a/src/validation/httpvalidation.rs +++ /dev/null @@ -1,646 +0,0 @@ -use std::{collections::BTreeMap, future::Future, str::FromStr, time::Duration}; - -use crate::validation::GLOBAL_USER_AGENT; -use anyhow::{anyhow, Error, Result}; -use http::StatusCode; -use liquid::Object; -use quick_xml::de::from_str as xml_from_str; -use reqwest::{ - header, - header::{HeaderMap, HeaderName, HeaderValue}, - Client, Method, RequestBuilder, Response, Url, -}; -use serde::de::IgnoredAny; -use sha1::{Digest, Sha1}; -use tokio::time::sleep; -use tracing::debug; - -use crate::rules::rule::ResponseMatcher; - -/// Build a deterministic cache key from the immutable parts of an HTTP request. -/// -/// * `method` – case-insensitive HTTP verb (“GET”, “POST”…) -/// * `url` – fully-qualified URL (any query string should already be present) -/// * `headers` – *logical* headers you intend to send (template-rendered, lower-level additions -/// such as `User-Agent` may be appended by the caller) -/// -/// The parts are concatenated with `\0` separators before hashing to avoid accidental -/// collisions such as `"GET/foo"` vs `"GE" + "T/foo"`. -pub fn generate_http_cache_key_parts( - method: &str, - url: &Url, - headers: &BTreeMap, - body: Option<&str>, -) -> String { - let method = method.to_uppercase(); // ensure "get" == "GET" - let url = url.as_str(); // canonical form from `reqwest::Url` - - let mut hasher = Sha1::new(); - hasher.update(method.as_bytes()); - hasher.update(b"\0"); - hasher.update(url.as_bytes()); - hasher.update(b"\0"); - - // Collect headers sorted lexicographically (BTreeMap is already sorted), - // then hash as `key:value\0` - for (k, v) in headers { - hasher.update(k.as_bytes()); - hasher.update(b":"); - hasher.update(v.as_bytes()); - hasher.update(b"\0"); - } - - // Include the request body in the cache key if present - if let Some(b) = body { - hasher.update(b"BODY\0"); - hasher.update(b.as_bytes()); - hasher.update(b"\0"); - } - - // Hex-encode and prefix so callers can tell this key came from HTTP logic - format!("HTTP:{:x}", hasher.finalize()) -} - -/// Parse an HTTP method from a string. -pub fn parse_http_method(method_str: &str) -> Result { - Method::from_str(method_str).map_err(|_| format!("Invalid HTTP method: {}", method_str)) -} - -/// Build a reqwest RequestBuilder using the provided parameters. -pub fn build_request_builder( - client: &Client, - method_str: &str, - url: &Url, - headers: &BTreeMap, - body: &Option, - timeout: Duration, - parser: &liquid::Parser, - globals: &liquid::Object, -) -> Result { - let method = parse_http_method(method_str).map_err(|err_msg| { - debug!("{}", err_msg); - err_msg - })?; - let mut request_builder = client.request(method, url.clone()).timeout(timeout); - let custom_headers = process_headers(headers, parser, globals, url) - .map_err(|e| format!("Error processing headers: {}", e))?; - - // Prepare a standard set of headers. - let user_agent = GLOBAL_USER_AGENT.as_str(); - let standard_headers = [ - (header::USER_AGENT, user_agent), - ( - header::ACCEPT, - "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", - ), - (header::ACCEPT_LANGUAGE, "en-US,en;q=0.5"), - (header::ACCEPT_ENCODING, "gzip, deflate, br"), - (header::CONNECTION, "keep-alive"), - ]; - // Start with the standard headers and then overlay any custom headers so - // caller-specified values take precedence over defaults. - let mut combined_headers = HeaderMap::new(); - for (name, value) in &standard_headers { - if let Ok(hv) = HeaderValue::from_str(value) { - combined_headers.insert(name.clone(), hv); - } - } - for (name, value) in custom_headers.iter() { - combined_headers.insert(name.clone(), value.clone()); - } - request_builder = request_builder.headers(combined_headers); - - // If a body template is provided, parse and render it - if let Some(body_template) = body { - let template = parser - .parse(body_template) - .map_err(|e| format!("Error parsing body template: {}", e))?; - let rendered_body = template - .render(globals) - .map_err(|e| format!("Error rendering body template: {}", e))?; - request_builder = request_builder.body(rendered_body); - } - - Ok(request_builder) -} - -/// Process headers from a BTreeMap, rendering any Liquid templates. -pub fn process_headers( - headers: &BTreeMap, - parser: &liquid::Parser, - globals: &Object, - url: &Url, -) -> Result { - let mut headers_map = HeaderMap::new(); - for (key, value) in headers { - // Render the template - let template = match parser.parse(value) { - Ok(t) => t, - Err(e) => { - debug!("Error parsing Liquid template for '{}': {}", key, e); - continue; - } - }; - - let header_value = match template.render(globals) { - Ok(s) => s, - Err(e) => { - debug!( - "Failed to render header template. URL = <{}> | Key '{}': {}", - url.as_str(), - key, - e - ); - continue; - } - }; - // Clean key and value - let cleaned_key = key.trim().replace(&['\n', '\r'][..], ""); - let cleaned_value = header_value.trim().replace(&['\n', '\r'][..], ""); - // Validate header name - let name = match HeaderName::from_str(&cleaned_key) { - Ok(n) => n, - Err(e) => { - debug!( - "Invalid header name. URL = <{}> | Key '{}': {}", - url.as_str(), - cleaned_key, - e - ); - continue; - } - }; - // Validate header value - let value = match HeaderValue::from_str(&cleaned_value) { - Ok(v) => v, - Err(e) => { - debug!( - "Invalid header value. URL = <{}> | Value '{}': {}", - url.as_str(), - cleaned_value, - e - ); - continue; - } - }; - headers_map.insert(name, value); - } - Ok(headers_map) -} - -/// Exponential‐backoff retry helper that always returns `Result` -async fn retry_with_backoff( - mut operation: F, - is_retryable: impl Fn(&Result, usize) -> bool, - max_retries: usize, - backoff_min: Duration, - backoff_max: Duration, -) -> Result -where - F: FnMut() -> Fut, - Fut: Future>, -{ - let mut retries = 0; - while retries <= max_retries { - let result = operation().await; - // If this result is *not* retryable, return it directly (Ok or Err). - if !is_retryable(&result, retries) { - return result; - } - retries += 1; - if retries > max_retries { - break; - } - let backoff = backoff_min.saturating_mul(2u32.pow(retries as u32)).min(backoff_max); - sleep(backoff).await; - } - Err(anyhow!("Max retries reached")) -} - -pub async fn retry_multipart_request( - mut build_request: F, - max_retries: usize, - backoff_min: Duration, - backoff_max: Duration, -) -> Result -where - F: FnMut() -> Fut, - Fut: Future, -{ - retry_with_backoff( - // 1) operation: build + send - move || { - let fut = build_request(); - async move { - let rb = fut.await; - rb.send().await.map_err(Error::from) - } - }, - // 2) same retry logic - |res: &Result<_, Error>, _attempt| match res { - Ok(resp) - if matches!( - resp.status(), - StatusCode::BAD_GATEWAY - | StatusCode::SERVICE_UNAVAILABLE - | StatusCode::GATEWAY_TIMEOUT - ) => - { - true - } - Err(_) => true, - _ => false, - }, - max_retries, - backoff_min, - backoff_max, - ) - .await -} - -pub async fn retry_request( - request_builder: RequestBuilder, - max_retries: u32, - backoff_min: Duration, - backoff_max: Duration, -) -> Result { - retry_with_backoff( - // 1) operation: clone + send, yielding Result - move || { - let rb = - request_builder.try_clone().expect("retry_request: failed to clone RequestBuilder"); - async move { rb.send().await.map_err(Error::from) } - }, - // 2) is_retryable: transient HTTP status or network error - |res: &Result<_, Error>, _attempt| match res { - Ok(resp) - if matches!( - resp.status(), - StatusCode::BAD_GATEWAY - | StatusCode::SERVICE_UNAVAILABLE - | StatusCode::GATEWAY_TIMEOUT - ) => - { - true - } - Err(_) => true, - _ => false, - }, - max_retries as usize, - backoff_min, - backoff_max, - ) - .await -} - -/// Return `true` when the body is very likely HTML. -fn body_looks_like_html(body: &str, headers: &HeaderMap) -> bool { - // ---- 1. header heuristic --------------------------------------------- - let header_says_html = headers - .get("content-type") - .and_then(|v| v.to_str().ok()) - .map(|ct| { - let ct = ct.to_ascii_lowercase(); - ct.contains("text/html") || ct.contains("application/xhtml") - }) - .unwrap_or(false); - - // ---- 2. early-body scan (<=1024 bytes) -------------------------------- - // Find the last character boundary at or before 1024 bytes to avoid UTF-8 boundary issues - // Walk backward at most 3 bytes (UTF-8 max char size is 4 bytes) to find valid boundary - let mut end = 1024.min(body.len()); - while end > 0 && !body.is_char_boundary(end) { - end -= 1; - } - let probe = &body[..end]; - // Trim any leading whitespace so we still catch HTML that starts after newlines/indentation. - let trimmed = probe.trim_start_matches(|c: char| c.is_whitespace()); - let probe = trimmed.to_ascii_lowercase(); - let body_looks_htmlish = probe.starts_with('<') && probe.contains(" bool { - // Since match_all_types is always true here, we simply require all word and status conditions - // to hold. - let word_ok = matchers - .iter() - .filter_map(|m| { - if let ResponseMatcher::WordMatch { words, match_all_words, negative, .. } = m { - let raw = if *match_all_words { - words.iter().all(|w| body.contains(w)) - } else { - words.iter().any(|w| body.contains(w)) - }; - Some(if *negative { !raw } else { raw }) - } else { - None - } - }) - .all(|b| b); - - let status_ok = matchers - .iter() - .filter_map(|m| { - if let ResponseMatcher::StatusMatch { - status: expected, - match_all_status, - negative, - .. - } = m - { - let raw = if *match_all_status { - expected.iter().all(|s| s.to_string() == status.as_str()) - } else { - expected.iter().any(|s| s.to_string() == status.as_str()) - }; - Some(if *negative { !raw } else { raw }) - } else { - None - } - }) - .all(|b| b); - - // ── Header checks ────────────────────────────────────────── - let header_ok = matchers - .iter() - .filter_map(|m| { - if let ResponseMatcher::HeaderMatch { header, expected, match_all_values, .. } = m { - // header names are case-insensitive - let val = headers - .get(header) - .and_then(|v| v.to_str().ok()) - .unwrap_or_default() - .to_ascii_lowercase(); - Some(if *match_all_values { - expected.iter().all(|e| val.contains(&e.to_ascii_lowercase())) - } else { - expected.iter().any(|e| val.contains(&e.to_ascii_lowercase())) - }) - } else { - None - } - }) - .all(|b| b); - - // ----- JsonValid ---------------------------------------------------------- - let json_ok = matchers - .iter() - .filter_map(|m| { - if matches!(m, ResponseMatcher::JsonValid { .. }) { - Some(serde_json::from_str::(body).is_ok()) - } else { - None - } - }) - .all(|b| b); - - let xml_ok = matchers - .iter() - .filter_map(|m| { - if matches!(m, ResponseMatcher::XmlValid { .. }) { - // succeeds if `body` is well-formed XML - Some(xml_from_str::(body).is_ok()) - } else { - None - } - }) - .all(|b| b); - - let html_detected = body_looks_like_html(body, headers); - let html_ok = html_allowed || !html_detected; - - // // ── debug line ─- - // debug!( - // "validate_response -- word:{}, status:{}, header:{}, json:{}, xml:{} ⇒ {}", - // word_ok, status_ok, header_ok, json_ok, xml_ok, all_ok - // ); - // // ────────────────────────────────────────────────────────────── - - let all_ok = word_ok && status_ok && header_ok && json_ok && xml_ok && html_ok; - all_ok -} - -#[cfg(test)] -mod tests { - use std::sync::Once; - - use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, - }; - - use super::*; - static INIT: Once = Once::new(); - fn init() { - INIT.call_once(|| { - let _ = tracing_subscriber::fmt::try_init(); - }); - } - - #[test] - fn test_build_request_builder() { - init(); - let client = Client::builder() - .gzip(true) // enable gzip - .deflate(true) // enable deflate - .brotli(true) // enable brotli - .build() - .expect("building reqwest client"); - let parser = liquid::ParserBuilder::with_stdlib().build().unwrap(); - let globals = liquid::Object::new(); - let headers = BTreeMap::from([ - ("Content-Type".to_string(), "application/json".to_string()), - ("Accept".to_string(), "application/custom".to_string()), - ]); - let url = Url::from_str("https://example.com").unwrap(); - let result = build_request_builder( - &client, - "GET", - &url, - &headers, - &None, - Duration::from_secs(10), - &parser, - &globals, - ) - .expect("building request"); - let req = result.build().expect("finalizing request"); - assert_eq!( - req.headers().get(header::ACCEPT).and_then(|v| v.to_str().ok()), - Some("application/custom"), - ); - } - #[tokio::test] - async fn test_retry_request() { - init(); - let mock_server = MockServer::start().await; - Mock::given(method("GET")) - .and(path("/test")) - .respond_with(ResponseTemplate::new(200)) - .mount(&mock_server) - .await; - let client = Client::builder() - .gzip(true) // enable gzip - .deflate(true) // enable deflate - .brotli(true) // enable brotli - .build() - .expect("building reqwest client"); - let request_builder = client.get(&format!("{}/test", mock_server.uri())); - let response = retry_request( - request_builder, - 3, - Duration::from_millis(50), - Duration::from_millis(200), - ) - .await; - assert!(response.is_ok()); - } - #[test] - fn test_validate_response() { - // --- arrange ---------------------------------------------------------- - let matchers = vec![ResponseMatcher::WordMatch { - r#type: "word-match".to_string(), - words: vec!["test".to_string()], - match_all_words: true, - negative: false, - }]; - let status = StatusCode::OK; - let body = "This is a test"; - let headers = HeaderMap::new(); // empty header map - let html_allowed = false; - - // --- act -------------------------------------------------------------- - let result = validate_response(&matchers, body, &status, &headers, html_allowed); - - // --- assert ----------------------------------------------------------- - assert!(result); - } - #[test] - fn test_validate_response_slack_webhook() { - // Build matchers equivalent to rule kingfisher.slack.4 - let matchers = vec![ - ResponseMatcher::WordMatch { - r#type: "word-match".to_string(), - words: vec!["invalid_payload".to_string()], - match_all_words: false, // rule omits this → default is false - negative: false, - }, - ResponseMatcher::WordMatch { - r#type: "word-match".to_string(), - words: vec!["invalid_token".to_string()], - match_all_words: false, - negative: true, // body must *not* contain “invalid_token” - }, - ]; - - // Simulate the real Slack response you posted - let body = "invalid_payload"; - let status = StatusCode::BAD_REQUEST; // 400 - let mut headers = HeaderMap::new(); - headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/plain")); - - // Call validate_response with html_allowed = false - let ok = validate_response(&matchers, body, &status, &headers, false); - - // 4It *should* be valid (true) because all matcher conditions hold - assert!(ok, "Slack webhook response should be considered ACTIVE"); - } - - #[test] - fn test_body_looks_like_html_trims_whitespace() { - let mut headers = HeaderMap::new(); - headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/html; charset=utf-8")); - - let body = "\n\n \n\npage"; - - assert!(body_looks_like_html(body, &headers)); - } - - #[test] - fn test_html_response_rejected_when_not_allowed() { - let matchers = vec![ResponseMatcher::StatusMatch { - r#type: "status-match".to_string(), - status: vec![StatusCode::OK.into()], - match_all_status: false, - negative: false, - }]; - - let mut headers = HeaderMap::new(); - headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/html; charset=utf-8")); - - let body = "\nSign in"; - - let ok = validate_response(&matchers, body, &StatusCode::OK, &headers, false); - - assert!(!ok, "HTML responses should be rejected unless explicitly allowed"); - } - - #[test] - fn test_body_looks_like_html_utf8_boundary() { - // Test case for UTF-8 boundary issue: multi-byte character at 1024-byte boundary - // This reproduces the bug where slicing at byte 1024 would panic if it's in the middle - // of a multi-byte character (e.g., Chinese character '业' spans bytes 1023..1026) - let mut headers = HeaderMap::new(); - headers.insert(header::CONTENT_TYPE, HeaderValue::from_static("text/html; charset=utf-8")); - - // HTML at the start, with padding to push a multi-byte char to byte 1024 - // This mirrors the real crash: HTML response from Gitee with Chinese chars - let html_start = ""; - let padding_len = 1023 - html_start.len(); - let body = format!( - "{}{}业Gitee", - html_start, - "x".repeat(padding_len) - ); - - // Verify our test setup: multi-byte char should be at byte 1023 - assert_eq!(body.as_bytes()[1023], 0xE4, "Expected first byte of '业' at position 1023"); - - // This should not panic AND should correctly identify HTML - let result = body_looks_like_html(&body, &headers); - assert!( - result, - "Should correctly identify HTML even with multi-byte characters at boundary" - ); - } - - #[test] - fn test_cache_key_includes_body() { - let url = Url::from_str("https://example.com/api").unwrap(); - let headers = - BTreeMap::from([("Content-Type".to_string(), "application/json".to_string())]); - - // Same method, url, headers but different bodies should produce different cache keys - let key_no_body = generate_http_cache_key_parts("POST", &url, &headers, None); - let key_body_a = - generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "abc"}"#)); - let key_body_b = - generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "xyz"}"#)); - - // All three should be different - assert_ne!( - key_no_body, key_body_a, - "Cache key with body should differ from key without body" - ); - assert_ne!( - key_no_body, key_body_b, - "Cache key with body should differ from key without body" - ); - assert_ne!(key_body_a, key_body_b, "Cache keys with different bodies should be different"); - - // Same body should produce same key - let key_body_a_dup = - generate_http_cache_key_parts("POST", &url, &headers, Some(r#"{"value": "abc"}"#)); - assert_eq!(key_body_a, key_body_a_dup, "Same inputs should produce same cache key"); - } -}