From 078fa16e6a9511b47a5c72413ea567c76376207e Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 4 Dec 2025 22:02:30 -0800 Subject: [PATCH] - Reduced per-match memory usage by compacting stored source locations and interning repeated capture names. - Stored optional validation response bodies as boxed strings to avoid allocating empty payloads and to streamline validator caches. - Parallelized git cloning based on the configured job count and begin scanning repositories as soon as each clone finishes to reduce end-to-end scan times. - Combined per-repository results into a single aggregate summary after scans complete. - Added initial access-map support and report viewer html file. Currently beta features. --- .gitignore | 5 + CHANGELOG.md | 7 + Cargo.toml | 10 +- Makefile | 24 + README.md | 25 + docs/access-map-viewer/index.html | 1549 +++++++++++++++++ docs/access-map-viewer/sample-report.json | 97 ++ docs/access-map-viewer/viewer.css | 106 ++ docs/access-map-viewer/viewer.js | 195 +++ src/access_map.rs | 240 +++ src/access_map/aws.rs | 798 +++++++++ src/access_map/azure copy.rs | 9 + src/access_map/azure.rs | 9 + src/access_map/gcp.rs | 1321 ++++++++++++++ src/access_map/graph.rs | 48 + src/access_map/report.rs | 1190 +++++++++++++ src/baseline.rs | 14 +- src/blob.rs | 6 +- src/cli/commands/access_map.rs | 34 + src/cli/commands/mod.rs | 1 + src/cli/commands/scan.rs | 16 + src/cli/global.rs | 6 +- src/finding_data.rs | 3 +- src/findings_store.rs | 49 +- src/lib.rs | 2 + src/location.rs | 107 +- src/main.rs | 21 +- src/matcher.rs | 66 +- src/reporter.rs | 172 +- src/reporter/bson_format.rs | 9 +- src/reporter/json_format.rs | 53 +- src/reporter/pretty_format.rs | 46 +- src/reporter/sarif_format.rs | 21 +- src/scanner/enumerate.rs | 7 +- src/scanner/mod.rs | 4 +- src/scanner/repos.rs | 145 +- src/scanner/runner.rs | 558 +++++- src/scanner/summary.rs | 254 ++- src/scanner/validation.rs | 107 +- src/update.rs | 21 + src/validation.rs | 146 +- src/validation/azure.rs | 18 +- src/validation/coinbase.rs | 11 +- src/validation/gcp.rs | 114 +- src/validation/utils.rs | 4 +- src/validation_body.rs | 46 + tests/fingerprint_dedup.rs | 20 +- tests/int_allowlist.rs | 2 + tests/int_bitbucket.rs | 2 + tests/int_dedup.rs | 2 + tests/int_github.rs | 2 + tests/int_gitlab.rs | 4 + tests/int_local_path_validation.rs | 32 + tests/int_redact.rs | 2 + tests/int_slack.rs | 4 + tests/int_validation_cache.rs | 2 + tests/int_vulnerable_files.rs | 4 + .../vectorscan-rs/vectorscan-rs/src/native.rs | 4 +- 58 files changed, 7341 insertions(+), 433 deletions(-) create mode 100644 docs/access-map-viewer/index.html create mode 100644 docs/access-map-viewer/sample-report.json create mode 100644 docs/access-map-viewer/viewer.css create mode 100644 docs/access-map-viewer/viewer.js create mode 100644 src/access_map.rs create mode 100644 src/access_map/aws.rs create mode 100644 src/access_map/azure copy.rs create mode 100644 src/access_map/azure.rs create mode 100644 src/access_map/gcp.rs create mode 100644 src/access_map/graph.rs create mode 100644 src/access_map/report.rs create mode 100644 src/cli/commands/access_map.rs create mode 100644 src/validation_body.rs create mode 100644 tests/int_local_path_validation.rs diff --git a/.gitignore b/.gitignore index e552e7c..d6f49ca 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ *.sarif *.profile.json *.json +!webserver/static/sample-report.json +!docs/access-map-viewer/sample-report.json *.jsonl *.bson .prettierrc @@ -13,6 +15,9 @@ logs/* *.patch *.orig *.rej +*.html +!docs/access-map-viewer/index.html +*.dot ### macOS ### # General diff --git a/CHANGELOG.md b/CHANGELOG.md index f4862db..ff55907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. +## [v1.69.0] +- Reduced per-match memory usage by compacting stored source locations and interning repeated capture names. +- Stored optional validation response bodies as boxed strings to avoid allocating empty payloads and to streamline validator caches. +- Parallelized git cloning based on the configured job count and begin scanning repositories as soon as each clone finishes to reduce end-to-end scan times. +- Combined per-repository results into a single aggregate summary after scans complete. +- Added initial access-map support and report viewer html file. Currently beta features. + ## [v1.68.0] - Fixed Bitbucket authenticated cloning bug diff --git a/Cargo.toml b/Cargo.toml index f255862..5772636 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ publish = false [package] name = "kingfisher" -version = "1.68.0" +version = "1.69.0" description = "MongoDB's blazingly fast and accurate secret scanning and validation tool" edition.workspace = true rust-version.workspace = true @@ -192,6 +192,12 @@ walkdir = "2.5.0" p256 = "0.13.2" ed25519-dalek = { version = "2.2", features = ["pkcs8"] } aws-sdk-s3 = "1.100.0" +aws-sdk-iam = "1.95.0" +aws-sdk-ec2 = "1.95.0" +aws-sdk-dynamodb = "1.95.0" +aws-sdk-lambda = "1.95.0" +aws-sdk-kms = "1.95.0" +aws-sdk-secretsmanager = "1.95.0" gcloud-storage = { version = "1.1.1", default-features = false, features = [ "rustls-tls", "auth", @@ -235,7 +241,7 @@ incremental = false [profile.dev] opt-level = 0 -# debug = true +debug = true incremental = true codegen-units = 256 diff --git a/Makefile b/Makefile index 0325d64..86280c4 100644 --- a/Makefile +++ b/Makefile @@ -207,6 +207,30 @@ darwin-arm64: fi $(MAKE) list-archives +darwin-dev: +# @echo "Checking Rust for darwin-arm64..." +# @$(MAKE) check-rust || ( \ +# echo "Rust not found or out-of-date. Installing via Homebrew..." && \ +# brew install rust \ +# ) +# @brew list cmake >/dev/null 2>&1 || brew install cmake +# @brew list boost >/dev/null 2>&1 || brew install boost +# @brew install gcc libpcap pkg-config ragel sqlite coreutils gnu-tar +# @rustup target add aarch64-apple-darwin + cargo build --profile=dev --target aarch64-apple-darwin --features system-alloc +# @cd target/aarch64-apple-darwin/release && \ +# find ./$(PROJECT_NAME) -type f -not -name "*.d" -not -name "*.rlib" -exec shasum -a 256 {} \; > CHECKSUM.txt +# @mkdir -p target/release +# @cp target/aarch64-apple-darwin/release/$(PROJECT_NAME) target/release/ +# @cp target/aarch64-apple-darwin/release/CHECKSUM.txt target/release/CHECKSUM-darwin-arm64.txt +# @cd target/release && \ +# rm -rf $(PROJECT_NAME)-darwin-arm64.tgz && \ +# $(ARCHIVE_CMD) $(PROJECT_NAME)-darwin-arm64.tgz $(PROJECT_NAME) CHECKSUM-darwin-arm64.txt && \ +# if [ -f $(PROJECT_NAME)-darwin-arm64.tgz ]; then \ +# shasum -a 256 $(PROJECT_NAME)-darwin-arm64.tgz >> CHECKSUM-darwin-arm64.txt; \ +# fi +# $(MAKE) list-archives + darwin-x64: @echo "Checking Rust for darwin-x64..." @$(MAKE) check-rust || ( \ diff --git a/README.md b/README.md index 09515a5..e90a0f0 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ See ([docs/COMPARISON.md](docs/COMPARISON.md)) - [Scan Confluence pages matching a CQL query](#scan-confluence-pages-matching-a-cql-query) - [ Scanning Slack](#-scanning-slack) - [Scan Slack messages matching a search query](#scan-slack-messages-matching-a-search-query) + - [Identity mapping for cloud credentials](#identity-mapping-for-cloud-credentials) - [Environment Variables for Tokens](#environment-variables-for-tokens) - [Exit Codes](#exit-codes) - [Update Checks](#update-checks) @@ -1046,6 +1047,30 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan slack "akia" \ ``` *The Slack token must be a user token with the `search:read` scope. Bot tokens (those beginning with `xoxb-`) cannot call the Slack search API.* +## Identity mapping for cloud credentials + +Use the `identity-map` command to understand the blast radius of cloud credentials by resolving the owning identity, attached roles (including inherited org/folder bindings), and risky permissions. The command prints a JSON summary to stdout by default and can optionally emit a standalone HTML report. + +```bash +# Map AWS credentials using your default CLI environment (env vars, config files), +# write JSON to disk, and emit an interactive HTML report +kingfisher identity-map aws \ + --json-out identity-map.json \ + --html-out identity-map.html + +# Map a GCP service account key and save JSON + HTML to disk +kingfisher identity-map gcp path/to/key.json \ + --json-out identity-map.json \ + --html-out identity-map.html +``` + +Specify the provider (`aws`, `gcp`, or `azure`) as the first argument. +- **AWS**: Uses your default AWS credential chain (environment variables, config/credential profiles, or instance metadata). To map a static key file instead, pass the path as the second argument (supports JSON or `aws_access_key_id=` / `aws_secret_access_key=` key-value pairs). When possible, Kingfisher expands IAM permissions by reading the caller's attached managed and inline policies and surfaces admin, privilege-escalation, or read-only actions in the report. Lacking `iam:List*`/`iam:Get*` access will limit permission expansion but the identity can still be resolved. +- **GCP**: Pass the path to a service account key JSON file. +- **Azure**: Pass the path to a service principal JSON file. + +You can also collect identity maps while running a normal scan by passing `--identity-map`; a consolidated HTML output for every validated credential can be written with `--identity-map-html`. Identity mapping runs after the validation phase and will emit an HTML report when validated AWS or GCP credentials are found (the flag requires validation, so it cannot be combined with `--no-validate`). Each validated credential renders as its own card in the unified report, so mixed AWS/GCP findings appear together with direct links to their console views. If you enable `--identity-map` without specifying `--identity-map-html`, the scanner writes `kingfisher_idmap_.html` to the current directory; if no validated credentials are discovered, a debug log call notes that no identity-map output was written. + ## Environment Variables for Tokens | Variable | Purpose | diff --git a/docs/access-map-viewer/index.html b/docs/access-map-viewer/index.html new file mode 100644 index 0000000..44e62e4 --- /dev/null +++ b/docs/access-map-viewer/index.html @@ -0,0 +1,1549 @@ + + + + + + Kingfisher Access Map Viewer + + + + + + +
+
+
K
+
+ Kingfisher Viewer + Access Map & Findings +
+
+ +
+ +
+
+
+
+

Upload Report

+

Analyze Kingfisher JSON / JSONL output

+
+
+
+
+
πŸ“„
+
Drag & drop a report here
+
…or click to choose a file
+ +
+ +
+
+ + +
+ + + + diff --git a/docs/access-map-viewer/sample-report.json b/docs/access-map-viewer/sample-report.json new file mode 100644 index 0000000..4818fcd --- /dev/null +++ b/docs/access-map-viewer/sample-report.json @@ -0,0 +1,97 @@ +{ + "findings": [ + { + "rule": { + "name": "Alibaba Access Key Secret", + "id": "kingfisher.alibabacloud.2" + }, + "finding": { + "snippet": "m0qx7h2v4n8c9t3b6p1r5w0kzsdjf", + "fingerprint": "13778709639383676952", + "confidence": "medium", + "entropy": "4.55", + "validation": { + "status": "Inactive Credential", + "response": "{\"RequestId\":\"48F0D2A0-7C0E-5DE2-BC89-39811315C04A\",\"Message\":\"Specified access key is not found.\",\"Recommend\":\"https://api.aliyun.com/troubleshoot?q=InvalidAccessKeyId.NotFound&product=Sts&requestId=48F0D2A0-7C0E-5DE2-BC89-39811315C04A\",\"HostId\":\"sts.aliyuncs.com\",\"Code\":\"InvalidAccessKeyId.NotFound\"}" + }, + "language": "Plain Text", + "line": 5, + "column_start": 0, + "column_end": 29, + "path": "/tmp/repo/tmp/secretstuff/alibaba-test.txt" + } + }, + { + "rule": { + "name": "Alibaba Access Key Secret", + "id": "kingfisher.alibabacloud.2" + }, + "finding": { + "snippet": "z91trw6fap8kq2xmd4s7h3b0vnclpf", + "fingerprint": "8292190854848911527", + "confidence": "medium", + "entropy": "4.44", + "validation": { + "status": "Inactive Credential", + "response": "Validation skipped - missing dependent rules: kingfisher.alibabacloud.1, kingfisher.alibabacloud.1" + }, + "language": "Unknown", + "line": 8, + "column_start": 39, + "column_end": 68, + "path": "/tmp/repo/tmp/secretstuff/alibaba/alibaba-validator/.venv/lib/python3.13/site-packages/alibabacloud_tea_util-0.3.13.dist-info/RECORD" + } + }, + { + "rule": { + "name": "AWS Secret Access Key", + "id": "kingfisher.aws.2" + }, + "finding": { + "snippet": "dB9PyxlN/qa8sF0tJ4uM2qZr7eVw6TgHkC0nBpZq", + "fingerprint": "17034522315778178539", + "confidence": "medium", + "entropy": "4.67", + "validation": { + "status": "Active Credential", + "response": "AKIAFAKEKEY123456789 --- ARN: arn:aws:iam::000000000000:user/example_user --- AWS Account Number: 000000000000" + }, + "language": "Unknown", + "line": 1, + "column_start": 65, + "column_end": 104, + "path": "/tmp/repo/tmp/secretstuff/utf8.txt " + } + } + ], + "access_map": [ + { + "provider": "aws", + "account": "prod", + "groups": [ + { "resources": ["arn:aws:s3:::prod-bucket"], "permissions": ["s3:GetObject", "s3:ListBucket"] }, + { "resources": ["arn:aws:iam::123456789012:role/Admin"], "permissions": ["iam:AssumeRole"] } + ] + }, + { + "provider": "gcp", + "account": "test-project", + "groups": [ + { "resources": ["projects/test/instances/primary"], "permissions": ["compute.instances.get", "compute.instances.list"] } + ] + } + ], + "stats": { + "total": 259, + "critical": 37, + "validated": 167, + "unique_paths": 21, + "confidence_buckets": { + "High": 37, + "Medium": 222 + }, + "confidence_order": ["High", "Medium"], + "scan_date": "2025-11-25T15:37:41.863868-08:00", + "kingfisher_version": "1.68.0" + } +} diff --git a/docs/access-map-viewer/viewer.css b/docs/access-map-viewer/viewer.css new file mode 100644 index 0000000..0b4a82f --- /dev/null +++ b/docs/access-map-viewer/viewer.css @@ -0,0 +1,106 @@ +:root { + --bg: #0f172a; + --panel: #111827; + --text: #e5e7eb; + --muted: #9ca3af; + --accent: #38bdf8; + --border: #1f2937; + --good: #34d399; + --warn: #f59e0b; + font-family: "Inter", system-ui, -apple-system, sans-serif; +} + +* { box-sizing: border-box; } + +body { + margin: 0; + background: radial-gradient(circle at 20% 20%, rgba(56,189,248,0.08), transparent 25%), + radial-gradient(circle at 80% 0%, rgba(52,211,153,0.12), transparent 30%), + var(--bg); + color: var(--text); + min-height: 100vh; + padding: 24px; +} + +header { + display: flex; + align-items: center; + justify-content: space-between; + gap: 12px; + margin-bottom: 18px; +} + +h1 { margin: 0; font-size: 22px; } +h1 span { color: var(--accent); } + +.page { + display: grid; + grid-template-columns: 1fr 1.2fr; + gap: 16px; +} + +.panel { + background: var(--panel); + border: 1px solid var(--border); + border-radius: 14px; + padding: 16px; + box-shadow: 0 15px 30px rgba(0,0,0,0.35); +} + +.controls { display: flex; gap: 10px; align-items: center; flex-wrap: wrap; } +input[type="file"] { display: none; } + +.btn { + border: 1px solid var(--border); + background: rgba(255,255,255,0.04); + color: var(--text); + padding: 10px 14px; + border-radius: 10px; + cursor: pointer; + font-weight: 700; +} + +.btn-primary { + background: linear-gradient(135deg, #0ea5e9, #22d3ee); + color: #0b1224; + border: none; + box-shadow: 0 12px 28px rgba(56,189,248,0.35); +} + +.stat-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(140px,1fr)); + gap: 10px; +} + +.stat { + background: rgba(255,255,255,0.02); + border: 1px solid var(--border); + padding: 12px; + border-radius: 12px; +} + +.stat-label { color: var(--muted); font-size: 13px; } +.stat-value { font-size: 24px; font-weight: 800; } + +.table { width: 100%; border-collapse: collapse; margin-top: 10px; } +.table th, .table td { padding: 8px; border-bottom: 1px solid var(--border); text-align: left; } +.table th { color: var(--muted); font-weight: 700; } +.table tbody tr:hover { background: rgba(255,255,255,0.03); } + +.badge { padding: 4px 8px; border-radius: 999px; font-weight: 700; font-size: 12px; } +.badge-good { background: rgba(52,211,153,0.16); color: #6ee7b7; } +.badge-warn { background: rgba(245,158,11,0.15); color: #fbbf24; } + +pre { + background: rgba(255,255,255,0.02); + border: 1px dashed var(--border); + padding: 12px; + border-radius: 12px; + color: var(--muted); + overflow: auto; +} + +@media (max-width: 1000px) { + .page { grid-template-columns: 1fr; } +} diff --git a/docs/access-map-viewer/viewer.js b/docs/access-map-viewer/viewer.js new file mode 100644 index 0000000..5d97ed7 --- /dev/null +++ b/docs/access-map-viewer/viewer.js @@ -0,0 +1,195 @@ +const state = { + findings: [], + accessMap: [], +}; + +const fileInput = document.getElementById('file'); +const uploadBtn = document.getElementById('upload-btn'); +const sampleBtn = document.getElementById('sample-btn'); +const stats = { + findings: document.getElementById('stat-findings'), + access: document.getElementById('stat-access'), + providers: document.getElementById('stat-providers'), +}; +const findingsTable = document.getElementById('findings'); +const accessTable = document.getElementById('access-map'); +const payloadPreview = document.getElementById('payload-preview'); + +uploadBtn.addEventListener('click', () => fileInput.click()); +fileInput.addEventListener('change', () => { + if (fileInput.files?.[0]) { + loadFile(fileInput.files[0]); + } +}); + +sampleBtn.addEventListener('click', async () => { + const resp = await fetch('sample-report.json'); + const data = await resp.json(); + render(normalizePayload(data)); +}); + +async function loadFile(file) { + const text = await file.text(); + render(parseReport(text)); +} + +function parseReport(text) { + try { + const parsed = JSON.parse(text); + return normalizePayload(parsed); + } catch (_) { + return parseJsonl(text); + } +} + +function parseJsonl(text) { + const lines = text.split(/\r?\n/).filter(Boolean); + const findings = []; + let accessMap = []; + lines.forEach((line) => { + try { + const row = JSON.parse(line); + if (row.rule && row.finding) { + findings.push(row); + } + if (row.access_map) { + accessMap = accessMap.concat(row.access_map); + } + } catch (_) { + /* ignore */ + } + }); + return { findings, access_map: accessMap }; +} + +function normalizePayload(data) { + if (Array.isArray(data)) { + return { findings: data, access_map: [] }; + } + return { + findings: data.findings || [], + access_map: normalizeAccessMap(data.access_map || []), + }; +} + +function render(payload) { + state.findings = payload.findings || []; + state.accessMap = normalizeAccessMap(payload.access_map || []); + + const flattened = flattenAccessMap(state.accessMap); + + stats.findings.textContent = state.findings.length; + stats.access.textContent = flattened.length; + stats.providers.textContent = new Set(state.accessMap.map((e) => e.provider || '')).size; + + renderFindings(); + renderAccessMap(flattened); + payloadPreview.textContent = JSON.stringify({ ...payload, access_map: state.accessMap }, null, 2); +} + +function renderFindings() { + const tbody = findingsTable.querySelector('tbody'); + tbody.innerHTML = ''; + if (state.findings.length === 0) { + tbody.innerHTML = 'No findings yet.'; + return; + } + + state.findings.slice(0, 50).forEach((f) => { + const tr = document.createElement('tr'); + tr.innerHTML = ` + ${escapeHtml(f.rule?.name || '')} + ${escapeHtml(f.rule?.id || '')} + ${escapeHtml(f.finding?.path || '')} + ${escapeHtml( + f.finding?.confidence || '' + )} + `; + tbody.appendChild(tr); + }); +} + +function renderAccessMap(rows) { + const tbody = accessTable.querySelector('tbody'); + tbody.innerHTML = ''; + if (rows.length === 0) { + tbody.innerHTML = 'No access-map entries yet.'; + return; + } + + rows.forEach((row) => { + const tr = document.createElement('tr'); + tr.innerHTML = ` + ${escapeHtml(row.provider || '')} + ${escapeHtml(row.account || '')} + ${escapeHtml(row.resource || '')} + ${escapeHtml(row.permissions.join(', ') || '')} + `; + tbody.appendChild(tr); + }); +} + +function escapeHtml(str = '') { + const div = document.createElement('div'); + div.textContent = str; + return div.innerHTML; +} + +function classForConfidence(conf = '') { + const c = conf.toLowerCase(); + if (c === 'high') return 'badge-warn'; + if (c === 'medium') return 'badge'; + if (c === 'low') return 'badge-good'; + return 'badge'; +} + +function normalizeAccessMap(entries = []) { + if (!Array.isArray(entries)) return []; + + // Already in new schema + if (entries.some((e) => Array.isArray(e.groups))) { + return entries.map((entry) => ({ + provider: entry.provider, + account: entry.account, + groups: (entry.groups || []).map((group) => ({ + resources: Array.isArray(group.resources) ? group.resources : [], + permissions: Array.isArray(group.permissions) ? group.permissions : [], + })), + })); + } + + // Fallback for legacy flat entries + return entries.map((entry) => { + const permissions = Array.isArray(entry.permissions) + ? entry.permissions + : entry.permission + ? String(entry.permission) + .split(',') + .map((p) => p.trim()) + .filter(Boolean) + : []; + const resource = entry.resource ? [entry.resource] : []; + return { + provider: entry.provider, + account: entry.account, + groups: [{ resources: resource, permissions }], + }; + }); +} + +function flattenAccessMap(entries = []) { + const rows = []; + entries.forEach((entry) => { + (entry.groups || []).forEach((group) => { + (group.resources || []).forEach((resource) => { + rows.push({ + provider: entry.provider, + account: entry.account, + resource, + permissions: group.permissions || [], + }); + }); + }); + }); + return rows; +} diff --git a/src/access_map.rs b/src/access_map.rs new file mode 100644 index 0000000..3e2114f --- /dev/null +++ b/src/access_map.rs @@ -0,0 +1,240 @@ +use anyhow::{bail, Result}; +use serde::Serialize; + +use crate::cli::commands::access_map::{AccessMapArgs, AccessMapProvider}; + +mod aws; +mod azure; +mod gcp; +mod report; + +/// Run the identity mapping workflow for the selected cloud provider. +pub async fn run(args: AccessMapArgs) -> Result<()> { + let result = match args.provider { + AccessMapProvider::Gcp => gcp::map_access(args.credential_path.as_deref()).await?, + AccessMapProvider::Aws => aws::map_access(&args).await?, + AccessMapProvider::Azure => azure::map_access(&args).await?, + }; + + let json = serde_json::to_string_pretty(&result)?; + if let Some(path) = args.json_out { + std::fs::write(path, json)?; + } else { + println!("{json}"); + } + + if let Some(path) = args.html_out { + report::generate_html_report_multi(&[result], &path)?; + } + + Ok(()) +} + +/// A validated credential that can be mapped to an identity. +#[derive(Clone, Debug)] +pub enum AccessMapRequest { + /// AWS access key credentials. + Aws { access_key: String, secret_key: String, session_token: Option }, + /// A GCP service account JSON document. + Gcp { credential_json: String }, +} + +/// Structured output describing the resolved identity and its risk profile. +#[derive(Debug, Serialize, Clone)] +pub struct AccessMapResult { + /// Cloud name such as "gcp", "aws", or "azure". + pub cloud: String, + + /// Summary of the resolved identity. + pub identity: AccessSummary, + + /// Roles or bindings directly associated with the identity. + pub roles: Vec, + /// Aggregated permission findings. + pub permissions: PermissionSummary, + + /// Resources impacted by the credential. + pub resources: Vec, + + /// Overall severity score. + pub severity: Severity, + /// Guidance for remediation. + pub recommendations: Vec, + /// Additional risk notes derived from permissions and impersonation exposure. + pub risk_notes: Vec, +} + +/// Identity details such as email or ARN. +#[derive(Debug, Serialize, Clone)] +pub struct AccessSummary { + /// A stable identifier for the identity (email, ARN, or SPN). + pub id: String, + /// Identity type such as service account or user. + pub access_type: String, + /// Optional project or subscription identifier. + pub project: Option, + /// Optional tenant identifier. + pub tenant: Option, + /// Optional AWS-style account identifier. + pub account_id: Option, +} + +/// A single role or binding and its permissions. +#[derive(Debug, Serialize, Clone)] +pub struct RoleBinding { + /// Name of the role (for example, `roles/editor`). + pub name: String, + /// Source of the role (direct, inherited, etc.). + pub source: String, + /// Expanded permissions associated with the role. + pub permissions: Vec, +} + +/// Summarized permissions grouped by risk profile. +#[derive(Debug, Serialize, Default, Clone)] +pub struct PermissionSummary { + /// Administrator or owner-level permissions. + pub admin: Vec, + /// Permissions that allow privilege escalation. + pub privilege_escalation: Vec, + /// Risky permissions with broad or sensitive access. + pub risky: Vec, + /// Lower-risk read-only permissions. + pub read_only: Vec, +} + +/// Exposed resources and their assessed risk. +#[derive(Debug, Serialize, Clone)] +pub struct ResourceExposure { + /// Resource type such as project or bucket. + pub resource_type: String, + /// Resource name. + pub name: String, + /// Permissions that grant visibility or access to the resource. + pub permissions: Vec, + /// Risk level. + pub risk: String, + /// Human-readable justification. + pub reason: String, +} + +/// Severity classification for the credential. +#[derive(Debug, Serialize, Clone, Copy)] +pub enum Severity { + /// Low risk. + Low, + /// Medium risk. + Medium, + /// High risk. + High, + /// Critical risk. + Critical, +} + +/// Map a batch of credentials to their effective identities. +pub async fn map_requests(requests: Vec) -> Vec { + let mut results = Vec::new(); + + for request in requests { + let mapped = match request { + AccessMapRequest::Aws { access_key, secret_key, session_token } => { + aws::map_access_with_credentials(&access_key, &secret_key, session_token.as_deref()) + .await + .unwrap_or_else(|err| build_failed_result("aws", &access_key, err)) + } + AccessMapRequest::Gcp { credential_json } => { + gcp::map_access_from_json(&credential_json) + .await + .unwrap_or_else(|err| build_failed_result("gcp", "service_account", err)) + } + }; + + results.push(mapped); + } + + results +} + +/// Write HTML/JSON outputs for a collection of identity map results. +pub fn write_reports(results: &[AccessMapResult], html_out: &std::path::Path) -> Result<()> { + report::generate_html_report_multi(results, html_out)?; + Ok(()) +} + +fn severity_to_str(severity: Severity) -> &'static str { + match severity { + Severity::Low => "low", + Severity::Medium => "medium", + Severity::High => "high", + Severity::Critical => "critical", + } +} + +fn build_failed_result(cloud: &str, identity_label: &str, err: anyhow::Error) -> AccessMapResult { + AccessMapResult { + cloud: cloud.to_string(), + identity: AccessSummary { + id: identity_label.to_string(), + access_type: "unknown".into(), + project: None, + tenant: None, + account_id: None, + }, + roles: Vec::new(), + permissions: PermissionSummary::default(), + resources: vec![build_default_resource(None, Severity::Medium)], + severity: Severity::Medium, + recommendations: build_recommendations(Severity::Medium), + risk_notes: vec![format!("Identity mapping failed: {err}")], + } +} + +pub(crate) fn build_default_resource( + project_id: Option<&str>, + severity: Severity, +) -> ResourceExposure { + ResourceExposure { + resource_type: "project".into(), + name: project_id.unwrap_or_default().into(), + permissions: Vec::new(), + risk: severity_to_str(severity).to_string(), + reason: "Project containing the provided credential".into(), + } +} + +pub(crate) fn build_default_account_resource( + account_id: Option<&str>, + severity: Severity, +) -> ResourceExposure { + ResourceExposure { + resource_type: "account".into(), + name: account_id.unwrap_or_default().into(), + permissions: Vec::new(), + risk: severity_to_str(severity).to_string(), + reason: "AWS account linked to the provided credential".into(), + } +} + +pub(crate) fn build_recommendations(severity: Severity) -> Vec { + let mut recs = vec![ + "Rotate the credential and audit recent usage".to_string(), + "Apply the principle of least privilege to attached roles".to_string(), + ]; + + match severity { + Severity::Critical | Severity::High => { + recs.push("Investigate blast radius and revoke unused bindings".to_string()) + } + Severity::Medium => { + recs.push("Review write-level permissions and tighten scopes".to_string()) + } + Severity::Low => recs.push("Maintain monitoring for anomalous access".to_string()), + } + + recs +} + +/// Fallback handler for unsupported providers. +async fn unsupported_provider(provider: &AccessMapProvider) -> Result { + bail!("Identity mapping for {:?} is not implemented", provider) +} diff --git a/src/access_map/aws.rs b/src/access_map/aws.rs new file mode 100644 index 0000000..de74e70 --- /dev/null +++ b/src/access_map/aws.rs @@ -0,0 +1,798 @@ +use std::collections::BTreeSet; +use std::path::Path; + +use anyhow::{anyhow, Context, Result}; +use aws_config::{BehaviorVersion, SdkConfig}; +use aws_credential_types::Credentials; +use aws_sdk_dynamodb::Client as DynamoClient; +use aws_sdk_ec2::Client as Ec2Client; +use aws_sdk_iam::{error::SdkError, Client as IamClient}; +use aws_sdk_kms::Client as KmsClient; +use aws_sdk_lambda::Client as LambdaClient; +use aws_sdk_s3::Client as S3Client; +use aws_sdk_secretsmanager::Client as SecretsManagerClient; +use aws_sdk_sts::Client as StsClient; +use percent_encoding::percent_decode_str; +use serde_json::Value; +use tracing::warn; + +use crate::cli::commands::access_map::AccessMapArgs; + +use super::{ + build_default_account_resource, build_recommendations, AccessMapResult, AccessSummary, + PermissionSummary, ResourceExposure, RoleBinding, Severity, +}; + +pub async fn map_access(args: &AccessMapArgs) -> Result { + let config = load_config_from_path(args.credential_path.as_deref()).await?; + map_access_with_config(config).await +} + +fn permissions_for_prefix(summary: &PermissionSummary, prefix: &str) -> Vec { + let mut matches = BTreeSet::new(); + for perm in summary + .admin + .iter() + .chain(&summary.privilege_escalation) + .chain(&summary.risky) + .chain(&summary.read_only) + { + if perm == "*" || perm.starts_with(prefix) { + matches.insert(perm.clone()); + } + } + + matches.into_iter().collect() +} + +pub async fn map_access_with_credentials( + access_key: &str, + secret_key: &str, + session_token: Option<&str>, +) -> Result { + let credentials = match session_token { + Some(token) => { + Credentials::new(access_key, secret_key, Some(token.to_string()), None, "access_map") + } + None => Credentials::new(access_key, secret_key, None, None, "access_map"), + }; + + let config = load_config(Some(credentials)).await?; + map_access_with_config(config).await +} + +async fn map_access_with_config(config: SdkConfig) -> Result { + let sts = StsClient::new(&config); + let iam = IamClient::new(&config); + + let caller = + sts.get_caller_identity().send().await.context("Failed to call sts:GetCallerIdentity")?; + + let arn = caller + .arn() + .ok_or_else(|| anyhow!("AWS GetCallerIdentity response missing ARN"))? + .to_string(); + let account_id = caller.account().map(|s| s.to_string()); + + let identity = AccessSummary { + id: arn.clone(), + access_type: classify_identity(&arn).into(), + project: None, + tenant: None, + account_id: account_id.clone(), + }; + + let mut roles = derive_roles_from_arn(&arn); + let mut risk_notes = Vec::new(); + + let permissions = + expand_permissions(&iam, &arn, &mut roles, &mut risk_notes).await.unwrap_or_else(|err| { + warn!("AWS access-map: failed to enumerate IAM permissions: {err}"); + risk_notes.push(format!("IAM enumeration failed: {err}")); + PermissionSummary::default() + }); + let mut resources = + enumerate_resources(&config, &permissions, account_id.as_deref(), &mut risk_notes) + .await + .unwrap_or_else(|err| { + warn!("AWS access-map: resource enumeration failed: {err}"); + risk_notes.push(format!("AWS enumeration failed: {err}")); + Vec::new() + }); + + let severity = derive_severity(&permissions, !resources.is_empty()); + + if roles.is_empty() { + roles.push(RoleBinding { + name: identity.access_type.clone(), + source: "sts".into(), + permissions: Vec::new(), + }); + } + + if resources.is_empty() { + resources.push(build_default_account_resource(account_id.as_deref(), severity)); + } + + if arn.contains(":assumed-role/") { + risk_notes.push( + "Credential represents an assumed role session; review the role trust policy and session duration".into(), + ); + } + if permissions.admin.is_empty() + && permissions.privilege_escalation.is_empty() + && permissions.risky.is_empty() + && permissions.read_only.is_empty() + { + risk_notes.push("IAM permissions could not be enumerated for this identity.".into()); + } + + let recommendations = build_recommendations(severity); + + Ok(AccessMapResult { + cloud: "aws".into(), + identity, + roles, + permissions, + resources, + severity, + recommendations, + risk_notes, + }) +} + +fn classify_identity(arn: &str) -> &'static str { + if arn.contains(":assumed-role/") { + "assumed_role" + } else if arn.contains(":role/") { + "role" + } else if arn.contains(":user/") { + "user" + } else if arn.contains(":root") { + "root" + } else { + "unknown" + } +} + +fn derive_roles_from_arn(arn: &str) -> Vec { + let resource = arn.split(':').nth(5).unwrap_or_default(); + let mut parts = resource.split('/'); + let kind = parts.next().unwrap_or_default(); + let name = parts.next().unwrap_or_default(); + + let role_name = match kind { + "assumed-role" | "role" => Some(name.to_string()), + _ => None, + }; + + if let Some(name) = role_name { + vec![RoleBinding { name, source: "iam".into(), permissions: Vec::new() }] + } else { + Vec::new() + } +} + +async fn expand_permissions( + iam: &IamClient, + arn: &str, + roles: &mut Vec, + risk_notes: &mut Vec, +) -> Result { + let access_type = classify_identity(arn); + let resource = arn.split(':').nth(5).unwrap_or_default(); + let mut parts = resource.split('/'); + let _kind = parts.next(); + let name = parts.next().unwrap_or_default(); + + if arn.contains(":assumed-role/AWSReservedSSO_") { + risk_notes.push( + "This is an AWS IAM Identity Center session. These sessions cannot enumerate role policies. IAM permission mapping skipped.".into(), + ); + return Ok(PermissionSummary::default()); + } + + let mut actions = match access_type { + "role" | "assumed_role" => collect_role_actions(iam, name, risk_notes).await, + "user" => collect_user_actions(iam, name, risk_notes).await, + _ => Ok(Vec::new()), + } + .unwrap_or_else(|err| { + if err.to_string().contains("AccessDenied") { + risk_notes.push( + "IAM policy enumeration blocked: the caller does not have iam:Get* or iam:List* permissions. Permissions incomplete.".into(), + ); + } + risk_notes.push(format!("IAM enumeration failed: {err}")); + warn!("AWS access-map: IAM enumeration failed: {err}"); + Vec::new() + }); + + actions.sort(); + actions.dedup(); + + for role in roles.iter_mut() { + if role.permissions.is_empty() { + role.permissions = actions.clone(); + } + } + + Ok(classify_permissions(&actions)) +} + +async fn collect_role_actions( + iam: &IamClient, + role_name: &str, + risk_notes: &mut Vec, +) -> Result> { + let mut actions = Vec::new(); + + let attached = + iam.list_attached_role_policies().role_name(role_name).send().await.map_err(|err| { + map_iam_error( + err, + risk_notes, + &format!("list_attached_role_policies failed for role {role_name}"), + ) + })?; + + for policy in attached.attached_policies() { + if let Some(arn) = policy.policy_arn() { + collect_managed_policy_actions(iam, arn, &mut actions, risk_notes).await?; + } + } + + let inline = iam.list_role_policies().role_name(role_name).send().await.map_err(|err| { + map_iam_error(err, risk_notes, &format!("list_role_policies failed for role {role_name}")) + })?; + + for name in inline.policy_names() { + let policy = + iam.get_role_policy().role_name(role_name).policy_name(name).send().await.map_err( + |err| { + map_iam_error( + err, + risk_notes, + &format!("get_role_policy failed for role {role_name} policy {name}"), + ) + }, + )?; + + extract_actions_from_document(policy.policy_document(), &mut actions)?; + } + + Ok(actions) +} + +async fn collect_user_actions( + iam: &IamClient, + user_name: &str, + risk_notes: &mut Vec, +) -> Result> { + let mut actions = Vec::new(); + + let attached = + iam.list_attached_user_policies().user_name(user_name).send().await.map_err(|err| { + map_iam_error( + err, + risk_notes, + &format!("list_attached_user_policies failed for user {user_name}"), + ) + })?; + + for policy in attached.attached_policies() { + if let Some(arn) = policy.policy_arn() { + collect_managed_policy_actions(iam, arn, &mut actions, risk_notes).await?; + } + } + + let inline = iam.list_user_policies().user_name(user_name).send().await.map_err(|err| { + map_iam_error(err, risk_notes, &format!("list_user_policies failed for user {user_name}")) + })?; + + for name in inline.policy_names() { + let policy = + iam.get_user_policy().user_name(user_name).policy_name(name).send().await.map_err( + |err| { + map_iam_error( + err, + risk_notes, + &format!("get_user_policy failed for user {user_name} policy {name}"), + ) + }, + )?; + + extract_actions_from_document(policy.policy_document(), &mut actions)?; + } + + Ok(actions) +} + +async fn collect_managed_policy_actions( + iam: &IamClient, + policy_arn: &str, + actions: &mut Vec, + risk_notes: &mut Vec, +) -> Result<()> { + let policy = iam.get_policy().policy_arn(policy_arn).send().await.map_err(|err| { + map_iam_error(err, risk_notes, &format!("get_policy failed for {policy_arn}")) + })?; + let version = policy + .policy() + .and_then(|p| p.default_version_id()) + .ok_or_else(|| anyhow!("Managed policy {policy_arn} missing default version"))?; + + let document = + iam.get_policy_version().policy_arn(policy_arn).version_id(version).send().await.map_err( + |err| { + map_iam_error( + err, + risk_notes, + &format!("get_policy_version failed for {policy_arn} version {version}"), + ) + }, + )?; + + if let Some(doc) = document.policy_version().and_then(|v| v.document()) { + extract_actions_from_document(doc, actions)?; + } + + Ok(()) +} + +fn extract_actions_from_document(doc: &str, actions: &mut Vec) -> Result<()> { + let decoded = percent_decode_str(doc).decode_utf8()?.into_owned(); + let decoded = if decoded.starts_with('"') { + serde_json::from_str::(&decoded).unwrap_or(decoded) + } else { + decoded + }; + + let json: Value = serde_json::from_str(&decoded) + .map_err(|err| anyhow!("Failed to parse IAM policy document: {err}"))?; + + if let Some(statements) = json.get("Statement") { + if let Some(array) = statements.as_array() { + for stmt in array { + collect_actions_from_statement(stmt, actions); + } + } else { + collect_actions_from_statement(statements, actions); + } + } + + Ok(()) +} + +fn collect_actions_from_statement(statement: &Value, actions: &mut Vec) { + if statement.get("Effect").and_then(|e| e.as_str()) == Some("Deny") { + return; + } + + if let Some(action) = statement.get("Action") { + collect_action_values(action, actions); + } + + if let Some(not_action) = statement.get("NotAction") { + collect_action_values(not_action, actions); + } +} + +fn collect_action_values(value: &Value, actions: &mut Vec) { + match value { + Value::String(s) => actions.push(s.to_lowercase().replace(':', ".")), + Value::Array(arr) => { + for v in arr { + if let Some(s) = v.as_str() { + actions.push(s.to_lowercase().replace(':', ".")); + } + } + } + _ => {} + } +} + +fn classify_permissions(actions: &[String]) -> PermissionSummary { + let mut admin = Vec::new(); + let mut privilege_escalation = Vec::new(); + let mut risky = Vec::new(); + let mut read_only = Vec::new(); + + for action in actions { + let a = action.to_lowercase(); + if a == "*" || a.ends_with(".*") { + admin.push(action.clone()); + continue; + } + + if a.contains("iam.passrole") + || a.contains("iam.create") + || a.contains("iam.putrolepolicy") + || a.contains("iam.updaterolepolicy") + || a.contains("iam.updaterole") + || a.contains("sts.assumerole") + || a.contains("organizations.attachpolicy") + { + privilege_escalation.push(action.clone()); + continue; + } + + if a.contains(".get") || a.contains(".list") || a.contains(".describe") { + read_only.push(action.clone()); + continue; + } + + risky.push(action.clone()); + } + + PermissionSummary { admin, privilege_escalation, risky, read_only } +} + +fn derive_severity(permissions: &PermissionSummary, has_resources: bool) -> Severity { + if !permissions.admin.is_empty() || !permissions.privilege_escalation.is_empty() { + Severity::Critical + } else if !permissions.risky.is_empty() { + Severity::High + } else if !permissions.read_only.is_empty() || has_resources { + Severity::Medium + } else { + Severity::Low + } +} + +fn can_read(permissions: &PermissionSummary, service_prefix: &str) -> bool { + let prefix = service_prefix.to_lowercase(); + + permissions + .admin + .iter() + .chain(&permissions.privilege_escalation) + .chain(&permissions.risky) + .chain(&permissions.read_only) + .any(|action| action == "*" || action.starts_with(&prefix)) +} + +async fn enumerate_resources( + config: &SdkConfig, + permissions: &PermissionSummary, + account_id: Option<&str>, + risk_notes: &mut Vec, +) -> Result> { + let mut resources = Vec::new(); + let no_permissions = permissions.admin.is_empty() + && permissions.privilege_escalation.is_empty() + && permissions.risky.is_empty() + && permissions.read_only.is_empty(); + + if no_permissions { + risk_notes.push( + "IAM permissions unavailable; attempting best-effort resource discovery without permission gating.".into(), + ); + } + + if no_permissions || can_read(permissions, "s3.") { + let client = S3Client::new(config); + match client.list_buckets().send().await { + Ok(resp) => { + for bucket in resp.buckets() { + if let Some(name) = bucket.name() { + resources.push(ResourceExposure { + resource_type: "s3_bucket".into(), + name: format!("arn:aws:s3:::{name}"), + permissions: permissions_for_prefix(permissions, "s3."), + risk: "medium".into(), + reason: "S3 bucket visible to the identity".into(), + }); + } + } + } + Err(err) => { + if !handle_access_denied("s3", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate s3 buckets: {err}"); + risk_notes.push(format!("AWS enumeration failed for s3: {err}")); + } + } + } + } + + if no_permissions || can_read(permissions, "ec2.") { + let ec2 = Ec2Client::new(config); + match ec2.describe_instances().send().await { + Ok(resp) => { + let region = config + .region() + .map(|r| r.as_ref().to_string()) + .unwrap_or_else(|| "unknown-region".into()); + let account = account_id.unwrap_or("unknown-account"); + + for reservation in resp.reservations() { + for instance in reservation.instances() { + if let Some(id) = instance.instance_id() { + resources.push(ResourceExposure { + resource_type: "ec2_instance".into(), + name: format!("arn:aws:ec2:{}:{}:instance/{}", region, account, id), + permissions: permissions_for_prefix(permissions, "ec2."), + risk: "medium".into(), + reason: "EC2 instance readable by the identity".into(), + }); + } + } + } + } + Err(err) => { + if !handle_access_denied("ec2", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate ec2 instances: {err}"); + risk_notes.push(format!("AWS enumeration failed for ec2: {err}")); + } + } + } + } + + if no_permissions || can_read(permissions, "iam.") { + let iam = IamClient::new(config); + match iam.list_roles().send().await { + Ok(resp) => { + for role in resp.roles() { + let arn = role.arn(); + resources.push(ResourceExposure { + resource_type: "iam_role".into(), + name: arn.to_string(), + permissions: permissions_for_prefix(permissions, "iam."), + risk: "high".into(), + reason: "Identity can view IAM roles; may indicate privilege escalation potential".into(), + }); + } + } + Err(err) => { + if !handle_access_denied("iam", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate iam roles: {err}"); + risk_notes.push(format!("AWS enumeration failed for iam: {err}")); + } + } + } + } + + if no_permissions || can_read(permissions, "lambda.") { + let lambda = LambdaClient::new(config); + match lambda.list_functions().send().await { + Ok(resp) => { + for function in resp.functions() { + if let Some(arn) = function.function_arn() { + resources.push(ResourceExposure { + resource_type: "lambda_function".into(), + name: arn.to_string(), + permissions: permissions_for_prefix(permissions, "lambda."), + risk: "medium".into(), + reason: "Lambda visible; may imply code execution pathways".into(), + }); + } + } + } + Err(err) => { + if !handle_access_denied("lambda", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate lambda functions: {err}"); + risk_notes.push(format!("AWS enumeration failed for lambda: {err}")); + } + } + } + } + + if no_permissions || can_read(permissions, "dynamodb.") { + let dynamo = DynamoClient::new(config); + match dynamo.list_tables().send().await { + Ok(resp) => { + for table in resp.table_names() { + resources.push(ResourceExposure { + resource_type: "dynamodb_table".into(), + name: table.to_string(), + permissions: permissions_for_prefix(permissions, "dynamodb."), + risk: "medium".into(), + reason: "DynamoDB table visible to the identity".into(), + }); + } + } + Err(err) => { + if !handle_access_denied("dynamodb", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate dynamodb tables: {err}"); + risk_notes.push(format!("AWS enumeration failed for dynamodb: {err}")); + } + } + } + } + + if no_permissions || can_read(permissions, "kms.") { + let kms = KmsClient::new(config); + match kms.list_keys().send().await { + Ok(resp) => { + let region = config.region().map(|r| r.as_ref().to_string()); + let account = account_id.unwrap_or(""); + + for key in resp.keys() { + if let Some(id) = key.key_id() { + let arn = region + .as_ref() + .filter(|r| !r.is_empty()) + .and_then(|r| { + if account.is_empty() { + None + } else { + Some(format!("arn:aws:kms:{r}:{account}:key/{id}")) + } + }) + .unwrap_or_else(|| id.to_string()); + + resources.push(ResourceExposure { + resource_type: "kms_key".into(), + name: arn, + permissions: permissions_for_prefix(permissions, "kms."), + risk: "high".into(), + reason: + "Identity can view KMS keys; possible cryptographic privilege paths" + .into(), + }); + } + } + } + Err(err) => { + if !handle_access_denied("kms", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate kms keys: {err}"); + risk_notes.push(format!("AWS enumeration failed for kms: {err}")); + } + } + } + } + + if no_permissions || can_read(permissions, "secretsmanager.") { + let sm = SecretsManagerClient::new(config); + match sm.list_secrets().send().await { + Ok(resp) => { + for secret in resp.secret_list() { + if let Some(arn) = secret.arn() { + resources.push(ResourceExposure { + resource_type: "secret".into(), + name: arn.to_string(), + permissions: permissions_for_prefix(permissions, "secretsmanager."), + risk: "high".into(), + reason: "Secret visible to the identity".into(), + }); + } + } + } + Err(err) => { + if !handle_access_denied("secretsmanager", &err, risk_notes) { + warn!("AWS access-map: failed to enumerate secretsmanager secrets: {err}"); + risk_notes.push(format!("AWS enumeration failed for secretsmanager: {err}")); + } + } + } + } + + Ok(resources) +} + +async fn load_config_from_path(path: Option<&Path>) -> Result { + if let Some(path) = path { + let creds = load_credentials_from_file(path)?; + load_config(Some(creds)).await + } else { + load_config(None).await + } +} + +async fn load_config(credentials: Option) -> Result { + let mut loader = aws_config::defaults(BehaviorVersion::latest()); + + if let Some(creds) = credentials { + loader = loader.credentials_provider(creds); + } + + Ok(loader.load().await) +} + +fn load_credentials_from_file(path: &Path) -> Result { + let raw = std::fs::read_to_string(path).context("Failed to read AWS credential file")?; + + if let Ok(value) = serde_json::from_str::(&raw) { + return credentials_from_json(&value); + } + + credentials_from_kv(&raw) +} + +fn credentials_from_json(value: &Value) -> Result { + let map = value.as_object().ok_or_else(|| anyhow!("Credential JSON must be an object"))?; + let access_key = get_case_insensitive( + map, + &["access_key_id", "accessKeyId", "aws_access_key_id", "AccessKeyId"], + ) + .ok_or_else(|| anyhow!("Missing access_key_id in credential JSON"))?; + let secret_key = get_case_insensitive( + map, + &["secret_access_key", "secretAccessKey", "aws_secret_access_key", "SecretAccessKey"], + ) + .ok_or_else(|| anyhow!("Missing secret_access_key in credential JSON"))?; + let session_token = get_case_insensitive( + map, + &["session_token", "sessionToken", "aws_session_token", "SessionToken"], + ); + + Ok(match session_token { + Some(token) => Credentials::new(&access_key, &secret_key, Some(token), None, "access_map"), + None => Credentials::new(&access_key, &secret_key, None, None, "access_map"), + }) +} + +fn get_case_insensitive(map: &serde_json::Map, keys: &[&str]) -> Option { + keys.iter().find_map(|key| { + map.iter() + .find(|(existing, _)| existing.eq_ignore_ascii_case(key)) + .and_then(|(_, v)| v.as_str().map(|s| s.to_string())) + }) +} + +fn credentials_from_kv(raw: &str) -> Result { + let mut access_key = None; + let mut secret_key = None; + let mut session_token = None; + + for line in raw.lines() { + let trimmed = line.trim(); + if trimmed.starts_with('#') || trimmed.is_empty() { + continue; + } + if let Some((key, value)) = trimmed.split_once('=') { + let key_lower = key.trim().to_ascii_lowercase(); + let val = value.trim().to_string(); + match key_lower.as_str() { + "aws_access_key_id" | "access_key_id" => access_key = Some(val), + "aws_secret_access_key" | "secret_access_key" => secret_key = Some(val), + "aws_session_token" | "session_token" => session_token = Some(val), + _ => {} + } + } + } + + let access_key = + access_key.ok_or_else(|| anyhow!("Missing aws_access_key_id in credential file"))?; + let secret_key = + secret_key.ok_or_else(|| anyhow!("Missing aws_secret_access_key in credential file"))?; + + Ok(match session_token { + Some(token) => Credentials::new(&access_key, &secret_key, Some(token), None, "access_map"), + None => Credentials::new(&access_key, &secret_key, None, None, "access_map"), + }) +} + +fn handle_access_denied( + service: &str, + err: &SdkError, + risk_notes: &mut Vec, +) -> bool { + let message = err.to_string(); + if is_access_denied(&message) { + warn!("AWS access-map: access denied while enumerating {service}: {message}"); + risk_notes.push(format!("AWS enumeration incomplete: AccessDenied for {service}")); + return true; + } + + false +} + +fn is_access_denied(message: &str) -> bool { + message.contains("AccessDenied") || message.contains("AccessDeniedException") +} + +fn map_iam_error( + err: SdkError, + risk_notes: &mut Vec, + context: &str, +) -> anyhow::Error { + let message = err.to_string(); + if err.as_service_error().is_some() && is_access_denied(&message) { + risk_notes.push( + "IAM policy enumeration blocked: the caller does not have iam:Get* or iam:List* permissions. Permissions incomplete.".into(), + ); + } + warn!("AWS access-map IAM error: {context}: {message}"); + anyhow!("{context}: {message}") +} diff --git a/src/access_map/azure copy.rs b/src/access_map/azure copy.rs new file mode 100644 index 0000000..29c762e --- /dev/null +++ b/src/access_map/azure copy.rs @@ -0,0 +1,9 @@ +use anyhow::Result; + +use crate::cli::commands::access_map::AccessMapArgs; + +use super::AccessMapResult; + +pub async fn map_access(args: &AccessMapArgs) -> Result { + super::unsupported_provider(&args.provider).await +} diff --git a/src/access_map/azure.rs b/src/access_map/azure.rs new file mode 100644 index 0000000..29c762e --- /dev/null +++ b/src/access_map/azure.rs @@ -0,0 +1,9 @@ +use anyhow::Result; + +use crate::cli::commands::access_map::AccessMapArgs; + +use super::AccessMapResult; + +pub async fn map_access(args: &AccessMapArgs) -> Result { + super::unsupported_provider(&args.provider).await +} diff --git a/src/access_map/gcp.rs b/src/access_map/gcp.rs new file mode 100644 index 0000000..517142e --- /dev/null +++ b/src/access_map/gcp.rs @@ -0,0 +1,1321 @@ +use std::collections::HashSet; +use std::path::Path; + +use anyhow::{anyhow, Context, Result}; +use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; +use reqwest::{Client, StatusCode}; +use serde_json::Value; +use tracing::warn; + +macro_rules! verbose_warn { + ($($arg:tt)*) => { + if tracing::level_enabled!(tracing::Level::DEBUG) { + warn!($($arg)*); + } + }; +} + +use crate::validation::gcp::GcpValidator; + +#[derive(Debug, Clone)] +struct Ancestor { + kind: String, + id: String, +} + +use super::{ + build_default_resource, build_recommendations, AccessMapResult, AccessSummary, + PermissionSummary, ResourceExposure, RoleBinding, Severity, +}; + +pub async fn map_access(credential_path: Option<&Path>) -> Result { + let path = credential_path.ok_or_else(|| anyhow!("GCP access-map requires a key.json path"))?; + let data = std::fs::read_to_string(path).context("Failed to read credential file")?; + map_access_from_json(&data).await +} + +pub async fn map_access_from_json(data: &str) -> Result { + let validator = GcpValidator::global()?; + let token_context = validator + .get_access_token_from_sa_json(data) + .await + .context("Failed to mint GCP access token")?; + let http_client = validator.client().clone(); + + let access_token = token_context.access_token; + let client_email = token_context.client_email; + let mut project_id = + if token_context.project_id.is_empty() { None } else { Some(token_context.project_id) }; + + if project_id.is_none() { + project_id = match fetch_service_account_project(&http_client, &access_token, &client_email) + .await + { + Ok(value) => value, + Err(err) => { + verbose_warn!( + "GCP access-map: failed to fetch service account metadata for project discovery: {err}" + ); + None + } + }; + } + + let mut roles = Vec::new(); + let mut role_entries: Vec<(String, String)> = Vec::new(); + + let policy = fetch_project_policy(&http_client, &access_token, project_id.as_deref()).await?; + if let Some(policy) = policy.as_ref() { + collect_roles(policy, &client_email, "project", &mut role_entries); + } + + if let Some(project) = project_id.as_deref() { + let ancestors = fetch_project_ancestry(&http_client, &access_token, project) + .await + .unwrap_or_else(|e| { + verbose_warn!("GCP access-map: failed to fetch project ancestry: {e}"); + Vec::new() + }); + + for ancestor in ancestors { + if let Some(policy) = + fetch_ancestor_policy(&http_client, &access_token, &ancestor).await? + { + let source = match ancestor.kind.as_str() { + "organization" => format!("org:{}", ancestor.id), + "folder" => format!("folder:{}", ancestor.id), + _ => ancestor.kind.clone(), + }; + collect_roles(&policy, &client_email, &source, &mut role_entries); + } + } + } + + let mut seen = HashSet::new(); + for (role_name, source) in role_entries { + if !seen.insert((role_name.clone(), source.clone())) { + continue; + } + + let permissions = fetch_role_permissions(&http_client, &access_token, &role_name) + .await + .unwrap_or_else(|e| { + verbose_warn!("Failed to expand permissions for {role_name}: {e}"); + Vec::new() + }); + + roles.push(RoleBinding { name: role_name, source, permissions }); + } + + if roles.is_empty() { + if let Some(project) = project_id.as_deref() { + let mut tested_permissions = + test_project_permissions(&http_client, &access_token, project) + .await + .unwrap_or_else(|e| { + verbose_warn!("GCP access-map: failed testIamPermissions fallback: {e}"); + Vec::new() + }); + + if tested_permissions.is_empty() { + tested_permissions = test_service_account_permissions( + &http_client, + &access_token, + project, + &client_email, + ) + .await + .unwrap_or_else(|e| { + verbose_warn!( + "GCP access-map: failed serviceAccount testIamPermissions fallback: {e}" + ); + Vec::new() + }); + } + + if !tested_permissions.is_empty() { + roles.push(RoleBinding { + name: "testIamPermissions".into(), + source: "project".into(), + permissions: tested_permissions, + }); + } + } + } + + let impersonation_notes = if let Some(project) = project_id.as_deref() { + match fetch_service_account_iam_policy(&http_client, &access_token, project, &client_email) + .await + { + Ok(Some(policy)) => extract_impersonation_notes(&policy), + Ok(None) => Vec::new(), + Err(err) => { + verbose_warn!("GCP access-map: failed to fetch service account IAM policy: {err}"); + Vec::new() + } + } + } else { + Vec::new() + }; + + let permissions = classify_permissions(&roles); + let severity = derive_severity(&permissions); + + let mut resources = Vec::new(); + if let Some(project) = project_id.as_deref() { + let mut enumerated = + enumerate_resources(&http_client, &access_token, project, &permissions, &roles) + .await + .unwrap_or_else(|e| { + verbose_warn!("GCP access-map: failed resource enumeration: {e}"); + Vec::new() + }); + resources.append(&mut enumerated); + } + + if resources.is_empty() { + resources.push(build_default_resource(project_id.as_deref(), severity)); + } + + let identity = AccessSummary { + id: client_email, + access_type: "service_account".into(), + project: project_id.clone(), + tenant: None, + account_id: None, + }; + + let mut risk_notes = derive_risk_notes(&roles, &permissions); + risk_notes.extend(impersonation_notes); + + let recommendations = build_recommendations(severity); + + Ok(AccessMapResult { + cloud: "gcp".into(), + identity, + roles, + permissions, + resources, + severity, + recommendations, + risk_notes, + }) +} + +async fn fetch_project_policy( + client: &Client, + token: &str, + project_id: Option<&str>, +) -> Result> { + let project = project_id.ok_or_else(|| anyhow!("Missing project_id"))?; + let url = + format!("https://cloudresourcemanager.googleapis.com/v1/projects/{}:getIamPolicy", project); + + let resp_v3 = client + .post(&url) + .bearer_auth(token) + .json(&serde_json::json!({ "options": { "requestedPolicyVersion": 3 } })) + .send() + .await?; + let status_v3 = resp_v3.status(); + let body_v3 = resp_v3.bytes().await?; + + if status_v3.is_success() { + let json = serde_json::from_slice(&body_v3)?; + return Ok(Some(json)); + } + + if let Some(disabled) = service_disabled_message(&body_v3)? { + verbose_warn!( + "GCP access-map: Cloud Resource Manager API disabled for project {project}: {disabled}" + ); + return Ok(None); + } + + if status_v3 == StatusCode::FORBIDDEN || status_v3 == StatusCode::BAD_REQUEST { + let resp_v1 = + client.post(&url).bearer_auth(token).json(&serde_json::json!({})).send().await?; + let status_v1 = resp_v1.status(); + let body_v1 = resp_v1.bytes().await?; + + if status_v1.is_success() { + let json = serde_json::from_slice(&body_v1)?; + return Ok(Some(json)); + } + + if let Some(disabled) = service_disabled_message(&body_v1)? { + verbose_warn!("GCP access-map: Cloud Resource Manager API disabled for project {project}: {disabled}"); + return Ok(None); + } + + if status_v1 == StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: insufficient permissions to read IAM policy (v1 and v3)" + ); + return Ok(None); + } + + return Err(anyhow!( + "Failed to fetch project IAM policy (v1): HTTP {} {}", + status_v1, + String::from_utf8_lossy(&body_v1) + )); + } + + Err(anyhow!( + "Failed to fetch project IAM policy (v3): HTTP {} {}", + status_v3, + String::from_utf8_lossy(&body_v3) + )) +} + +async fn fetch_project_ancestry( + client: &Client, + token: &str, + project_id: &str, +) -> Result> { + let url = format!( + "https://cloudresourcemanager.googleapis.com/v1/projects/{}:getAncestry", + project_id + ); + + let resp = client.post(url).bearer_auth(token).json(&serde_json::json!({})).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!("GCP access-map: Cloud Resource Manager API disabled for project {project_id}: {disabled}"); + return Ok(Vec::new()); + } + + if status == StatusCode::FORBIDDEN { + verbose_warn!("GCP access-map: ancestry lookup forbidden for project {project_id}"); + return Ok(Vec::new()); + } + + if !status.is_success() { + return Err(anyhow!( + "Failed to fetch project ancestry: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + )); + } + + let json: Value = serde_json::from_slice(&body)?; + let mut ancestors = Vec::new(); + if let Some(arr) = json.get("ancestor").and_then(|a| a.as_array()) { + for item in arr { + if let Some(resource) = item.get("resourceId") { + if let (Some(kind), Some(id)) = (resource.get("type"), resource.get("id")) { + if let (Some(kind), Some(id)) = (kind.as_str(), id.as_str()) { + ancestors.push(Ancestor { kind: kind.to_string(), id: id.to_string() }); + } + } + } + } + } + + Ok(ancestors) +} + +async fn fetch_ancestor_policy( + client: &Client, + token: &str, + ancestor: &Ancestor, +) -> Result> { + let url = match ancestor.kind.as_str() { + "organization" => format!( + "https://cloudresourcemanager.googleapis.com/v1/organizations/{}:getIamPolicy", + ancestor.id + ), + "folder" => format!( + "https://cloudresourcemanager.googleapis.com/v1/folders/{}:getIamPolicy", + ancestor.id + ), + _ => return Ok(None), + }; + + let resp_v3 = client + .post(&url) + .bearer_auth(token) + .json(&serde_json::json!({ "options": { "requestedPolicyVersion": 3 } })) + .send() + .await?; + let status_v3 = resp_v3.status(); + let body_v3 = resp_v3.bytes().await?; + + if status_v3.is_success() { + let json = serde_json::from_slice(&body_v3)?; + return Ok(Some(json)); + } + + if let Some(disabled) = service_disabled_message(&body_v3)? { + verbose_warn!( + "GCP access-map: Cloud Resource Manager API disabled for {} {}: {disabled}", + ancestor.kind, + ancestor.id + ); + return Ok(None); + } + + if status_v3 == StatusCode::FORBIDDEN || status_v3 == StatusCode::BAD_REQUEST { + let resp_v1 = + client.post(&url).bearer_auth(token).json(&serde_json::json!({})).send().await?; + let status_v1 = resp_v1.status(); + let body_v1 = resp_v1.bytes().await?; + + if status_v1.is_success() { + let json = serde_json::from_slice(&body_v1)?; + return Ok(Some(json)); + } + + if let Some(disabled) = service_disabled_message(&body_v1)? { + verbose_warn!( + "GCP access-map: Cloud Resource Manager API disabled for {} {}: {disabled}", + ancestor.kind, + ancestor.id + ); + return Ok(None); + } + + if status_v1 == StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: insufficient permissions to read {} IAM policy (v1 and v3)", + ancestor.kind + ); + return Ok(None); + } + + return Err(anyhow!( + "Failed to fetch {} IAM policy (v1): HTTP {} {}", + ancestor.kind, + status_v1, + String::from_utf8_lossy(&body_v1) + )); + } + + Err(anyhow!( + "Failed to fetch {} IAM policy (v3): HTTP {} {}", + ancestor.kind, + status_v3, + String::from_utf8_lossy(&body_v3) + )) +} + +async fn fetch_service_account_project( + client: &Client, + token: &str, + client_email: &str, +) -> Result> { + // Try to pull the service account resource; this works even when IAM policy access is blocked. + let encoded_email = utf8_percent_encode(client_email, NON_ALPHANUMERIC); + let url = format!("https://iam.googleapis.com/v1/projects/-/serviceAccounts/{}", encoded_email); + + let resp = client.get(url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!("GCP access-map: IAM API disabled when fetching metadata for {client_email}: {disabled}"); + return Ok(None); + } + + if status == StatusCode::FORBIDDEN { + verbose_warn!("GCP access-map: service account metadata forbidden for {client_email}"); + return Ok(None); + } + + if !status.is_success() { + return Err(anyhow!( + "Failed to fetch service account metadata: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + )); + } + + let json: Value = serde_json::from_slice(&body)?; + Ok(json.get("projectId").and_then(|p| p.as_str()).map(|s| s.to_string())) +} + +fn extract_roles(policy: &Value, client_email: &str) -> Vec { + let email_member = format!("serviceAccount:{client_email}"); + let mut role_bindings = Vec::new(); + if let Some(bindings) = policy["bindings"].as_array() { + for binding in bindings { + if let Some(role_name) = binding["role"].as_str() { + if let Some(members) = binding["members"].as_array() { + if members.iter().any(|m| m.as_str() == Some(&email_member)) { + role_bindings.push(role_name.to_string()); + } + } + } + } + } + role_bindings +} + +fn collect_roles( + policy: &Value, + client_email: &str, + source: &str, + out: &mut Vec<(String, String)>, +) { + for role in extract_roles(policy, client_email) { + out.push((role, source.to_string())); + } +} + +async fn fetch_role_permissions( + client: &Client, + token: &str, + role_name: &str, +) -> Result> { + let url = if role_name.starts_with("roles/") { + format!("https://iam.googleapis.com/v1/{role_name}") + } else if role_name.starts_with("projects/") || role_name.starts_with("organizations/") { + format!("https://iam.googleapis.com/v1/{role_name}") + } else { + format!("https://iam.googleapis.com/v1/roles/{role_name}") + }; + + let resp = client.get(url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!("GCP access-map: IAM API disabled while expanding {role_name}: {disabled}"); + return Ok(Vec::new()); + } + + if !status.is_success() { + return Err(anyhow!( + "Failed to expand permissions for {role_name}: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + )); + } + + let json: Value = serde_json::from_slice(&body)?; + let permissions = json + .get("includedPermissions") + .and_then(|p| p.as_array()) + .map(|arr| arr.iter().filter_map(|p| p.as_str().map(|s| s.to_string())).collect()) + .unwrap_or_default(); + Ok(permissions) +} + +fn classify_permissions(roles: &[RoleBinding]) -> PermissionSummary { + let mut admin = HashSet::new(); + let mut privilege_escalation = HashSet::new(); + let mut risky = HashSet::new(); + let mut read_only = HashSet::new(); + + for role in roles { + let role_lower = role.name.to_lowercase(); + if role_lower.contains("owner") + || role_lower.contains("admin") + || role.name == "roles/editor" + { + admin.insert(role.name.clone()); + } + + for perm in &role.permissions { + if perm.contains("*") { + risky.insert(perm.clone()); + continue; + } + + if perm.contains("setIamPolicy") + || perm.contains("serviceAccountTokenCreator") + || perm.contains("serviceAccounts.actAs") + || perm.contains("roles.create") + || perm.contains("roles.update") + { + privilege_escalation.insert(perm.clone()); + continue; + } + + if perm.contains(".get") || perm.contains(".list") { + read_only.insert(perm.clone()); + continue; + } + + risky.insert(perm.clone()); + } + } + + PermissionSummary { + admin: sorted(admin), + privilege_escalation: sorted(privilege_escalation), + risky: sorted(risky), + read_only: sorted(read_only), + } +} + +fn derive_severity(permissions: &PermissionSummary) -> Severity { + if !permissions.admin.is_empty() || !permissions.privilege_escalation.is_empty() { + Severity::Critical + } else if !permissions.risky.is_empty() { + Severity::High + } else if !permissions.read_only.is_empty() { + Severity::Medium + } else { + Severity::Low + } +} + +fn collect_permission_set(roles: &[RoleBinding]) -> HashSet { + let mut perms = HashSet::new(); + for role in roles { + for perm in &role.permissions { + perms.insert(perm.clone()); + } + } + perms +} + +fn matching_permissions(perm_set: &HashSet, prefixes: &[&str]) -> Vec { + let mut matches: Vec = perm_set + .iter() + .filter(|perm| *perm == "*" || prefixes.iter().any(|prefix| perm.starts_with(prefix))) + .cloned() + .collect(); + + matches.sort(); + matches +} + +async fn enumerate_resources( + client: &Client, + token: &str, + project_id: &str, + permissions: &PermissionSummary, + roles: &[RoleBinding], +) -> Result> { + let mut resources = Vec::new(); + let perm_set = collect_permission_set(roles); + let mut add_storage = false; + let mut add_bigquery = false; + let mut add_secret_manager = false; + let mut add_compute = false; + let mut add_cloud_sql = false; + let mut add_pubsub = false; + let mut add_cloud_run = false; + let mut add_artifact_registry = false; + let mut add_gke = false; + + for perm in permissions + .risky + .iter() + .chain(permissions.read_only.iter()) + .chain(permissions.privilege_escalation.iter()) + { + if perm.starts_with("storage.buckets.list") || perm.starts_with("storage.buckets.get") { + add_storage = true; + } + if perm.starts_with("bigquery.datasets.list") || perm.starts_with("bigquery.datasets.get") { + add_bigquery = true; + } + if perm.starts_with("secretmanager.secrets.list") { + add_secret_manager = true; + } + if perm.starts_with("compute.instances.list") || perm.starts_with("compute.instances.get") { + add_compute = true; + } + if perm.starts_with("cloudsql.instances.list") || perm.starts_with("sql.instances.list") { + add_cloud_sql = true; + } + if perm.starts_with("pubsub.topics.list") || perm.starts_with("pubsub.subscriptions.list") { + add_pubsub = true; + } + if perm.starts_with("run.services.list") { + add_cloud_run = true; + } + if perm.starts_with("artifactregistry.repositories.list") { + add_artifact_registry = true; + } + if perm.starts_with("container.clusters.list") { + add_gke = true; + } + } + + if add_storage { + let url = format!("https://storage.googleapis.com/storage/v1/b?project={}", project_id); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: Cloud Storage API disabled for project {project_id}: {disabled}" + ); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("items").and_then(|i| i.as_array()) { + let writable = perm_set.iter().any(|p| { + p.starts_with("storage.objects.create") + || p.starts_with("storage.objects.update") + || p.starts_with("storage.objects.delete") + }); + for bucket in items { + if let Some(name) = bucket.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "storage_bucket".into(), + name: format!("projects/{project_id}/buckets/{name}"), + permissions: matching_permissions( + &perm_set, + &["storage.buckets.", "storage.objects."], + ), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can list and write bucket objects".into() + } else { + "Service account can list bucket contents".into() + }, + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: storage enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_compute { + let url = format!( + "https://compute.googleapis.com/compute/v1/projects/{}/aggregated/instances", + project_id + ); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: Compute Engine API disabled for project {project_id}: {disabled}" + ); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("items").and_then(|i| i.as_object()) { + let writable = perm_set.iter().any(|p| { + p.starts_with("compute.instances.insert") + || p.starts_with("compute.instances.update") + || p.starts_with("compute.instances.delete") + }); + + for zone in items.values() { + if let Some(instances) = zone.get("instances").and_then(|i| i.as_array()) { + for instance in instances { + if let Some(name) = instance.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "compute_instance".into(), + name: format!("projects/{project_id}/instances/{name}"), + permissions: matching_permissions( + &perm_set, + &["compute.instances."], + ), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can manage Compute Engine instances".into() + } else { + "Service account can list Compute Engine instances".into() + }, + }); + } + } + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: Compute Engine enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_cloud_sql { + let url = format!( + "https://sqladmin.googleapis.com/sql/v1beta4/projects/{}/instances", + project_id + ); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: Cloud SQL Admin API disabled for project {project_id}: {disabled}" + ); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("items").and_then(|i| i.as_array()) { + let writable = perm_set.iter().any(|p| { + p.starts_with("cloudsql.instances.update") + || p.starts_with("cloudsql.instances.create") + || p.starts_with("cloudsql.instances.delete") + || p.starts_with("sql.instances.update") + || p.starts_with("sql.instances.create") + || p.starts_with("sql.instances.delete") + }); + + for instance in items { + if let Some(name) = instance.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "cloudsql_instance".into(), + name: format!("projects/{project_id}/instances/{name}"), + permissions: matching_permissions( + &perm_set, + &["cloudsql.instances.", "sql.instances."], + ), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can manage Cloud SQL instances".into() + } else { + "Service account can list Cloud SQL instances".into() + }, + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: Cloud SQL enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_pubsub { + let topics_url = format!("https://pubsub.googleapis.com/v1/projects/{project_id}/topics"); + let subs_url = + format!("https://pubsub.googleapis.com/v1/projects/{project_id}/subscriptions"); + + let writable = perm_set.iter().any(|p| { + p.starts_with("pubsub.topics.publish") + || p.starts_with("pubsub.topics.create") + || p.starts_with("pubsub.subscriptions.create") + }); + + for (url, resource_type) in + [(topics_url, "pubsub_topic"), (subs_url, "pubsub_subscription")] + { + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: Pub/Sub API disabled for project {project_id}: {disabled}" + ); + continue; + } + + if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + let key = if resource_type == "pubsub_topic" { "topics" } else { "subscriptions" }; + if let Some(items) = json.get(key).and_then(|i| i.as_array()) { + for item in items { + if let Some(name) = item.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: resource_type.into(), + name: name.to_string(), + permissions: matching_permissions( + &perm_set, + &["pubsub.topics.", "pubsub.subscriptions."], + ), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can publish to or manage Pub/Sub resources" + .into() + } else { + "Service account can list Pub/Sub resources".into() + }, + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: Pub/Sub enumeration failed for {resource_type}: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + } + + if add_cloud_run { + let url = + format!("https://run.googleapis.com/v2/projects/{}/locations/-/services", project_id); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: Cloud Run API disabled for project {project_id}: {disabled}" + ); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("services").and_then(|i| i.as_array()) { + let writable = perm_set.iter().any(|p| { + p.starts_with("run.services.update") || p.starts_with("run.services.create") + }); + + for service in items { + if let Some(name) = service.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "cloud_run_service".into(), + name: name.to_string(), + permissions: matching_permissions(&perm_set, &["run.services."]), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can deploy or modify Cloud Run services".into() + } else { + "Service account can list Cloud Run services".into() + }, + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: Cloud Run enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_artifact_registry { + let url = format!( + "https://artifactregistry.googleapis.com/v1/projects/{}/locations/-/repositories", + project_id + ); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!("GCP access-map: Artifact Registry API disabled for project {project_id}: {disabled}"); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("repositories").and_then(|i| i.as_array()) { + let writable = perm_set.iter().any(|p| { + p.starts_with("artifactregistry.repositories.uploadArtifacts") + || p.starts_with("artifactregistry.repositories.create") + || p.starts_with("artifactregistry.repositories.update") + }); + + for repo in items { + if let Some(name) = repo.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "artifact_registry_repository".into(), + name: name.to_string(), + permissions: matching_permissions(&perm_set, &["artifactregistry."]), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can push or modify Artifact Registry repositories" + .into() + } else { + "Service account can list Artifact Registry repositories".into() + }, + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: Artifact Registry enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_gke { + let url = format!( + "https://container.googleapis.com/v1/projects/{}/locations/-/clusters", + project_id + ); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!("GCP access-map: Kubernetes Engine API disabled for project {project_id}: {disabled}"); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("clusters").and_then(|i| i.as_array()) { + let writable = perm_set.iter().any(|p| { + p.starts_with("container.clusters.update") + || p.starts_with("container.clusters.create") + }); + + for cluster in items { + if let Some(name) = cluster.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "gke_cluster".into(), + name: name.to_string(), + permissions: matching_permissions(&perm_set, &["container.clusters."]), + risk: if writable { "high".into() } else { "medium".into() }, + reason: if writable { + "Service account can modify or create GKE clusters".into() + } else { + "Service account can list GKE clusters".into() + }, + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: GKE enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_bigquery { + let url = + format!("https://bigquery.googleapis.com/bigquery/v2/projects/{}/datasets", project_id); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: BigQuery API disabled for project {project_id}: {disabled}" + ); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("datasets").and_then(|i| i.as_array()) { + for dataset in items { + if let Some(ds_id) = dataset + .get("datasetReference") + .and_then(|r| r.get("datasetId")) + .and_then(|n| n.as_str()) + { + resources.push(ResourceExposure { + resource_type: "bigquery_dataset".into(), + name: format!("projects/{project_id}/datasets/{ds_id}"), + permissions: matching_permissions( + &perm_set, + &["bigquery.datasets.", "bigquery.tables."], + ), + risk: "medium".into(), + reason: "Service account can list BigQuery datasets".into(), + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: BigQuery enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + if add_secret_manager { + let url = + format!("https://secretmanager.googleapis.com/v1/projects/{}/secrets", project_id); + let resp = client.get(&url).bearer_auth(token).send().await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: Secret Manager API disabled for project {project_id}: {disabled}" + ); + } else if status.is_success() { + let json: Value = serde_json::from_slice(&body)?; + if let Some(items) = json.get("secrets").and_then(|i| i.as_array()) { + let write_access = + perm_set.iter().any(|p| p.contains("secretmanager.secrets.create")); + for secret in items { + if let Some(name) = secret.get("name").and_then(|n| n.as_str()) { + resources.push(ResourceExposure { + resource_type: "secretmanager_secret".into(), + name: name.to_string(), + permissions: matching_permissions( + &perm_set, + &["secretmanager.secrets.", "secretmanager.versions."], + ), + risk: if write_access { "high".into() } else { "medium".into() }, + reason: "Service account can list secrets".into(), + }); + } + } + } + } else if status != StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: Secret Manager enumeration failed: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + ); + } + } + + Ok(resources) +} + +async fn fetch_service_account_iam_policy( + client: &Client, + token: &str, + project_id: &str, + client_email: &str, +) -> Result> { + let encoded_email = utf8_percent_encode(client_email, NON_ALPHANUMERIC); + let url = format!( + "https://iam.googleapis.com/v1/projects/{}/serviceAccounts/{}:getIamPolicy", + project_id, encoded_email + ); + + let resp = client + .post(&url) + .bearer_auth(token) + .json(&serde_json::json!({ "options": { "requestedPolicyVersion": 3 } })) + .send() + .await?; + let status = resp.status(); + let body = resp.bytes().await?; + + if let Some(disabled) = service_disabled_message(&body)? { + verbose_warn!( + "GCP access-map: IAM API disabled when fetching service account policy: {disabled}" + ); + return Ok(None); + } + + if status == StatusCode::FORBIDDEN { + verbose_warn!("GCP access-map: service account IAM policy forbidden for {client_email}"); + return Ok(None); + } + + if !status.is_success() { + return Err(anyhow!( + "Failed to fetch service account IAM policy: HTTP {} {}", + status, + String::from_utf8_lossy(&body) + )); + } + + let policy: Value = serde_json::from_slice(&body)?; + Ok(Some(policy)) +} + +fn extract_impersonation_notes(policy: &Value) -> Vec { + let mut notes = Vec::new(); + if let Some(bindings) = policy.get("bindings").and_then(|b| b.as_array()) { + for binding in bindings { + let role = binding.get("role").and_then(|r| r.as_str()).unwrap_or(""); + if !(role.contains("serviceAccountTokenCreator") + || role.contains("serviceAccountUser") + || role.contains("ServiceAccountUser")) + { + continue; + } + + if let Some(members) = binding.get("members").and_then(|m| m.as_array()) { + for member in members { + if let Some(m) = member.as_str() { + notes.push(format!("{m} can impersonate this service account via {role}")); + } + } + } + } + } + notes +} + +fn derive_risk_notes(roles: &[RoleBinding], permissions: &PermissionSummary) -> Vec { + let mut notes = Vec::new(); + if !permissions.admin.is_empty() { + notes.push(format!("Admin-level roles attached: {}", permissions.admin.join(", "))); + } + if !permissions.privilege_escalation.is_empty() { + notes.push(format!( + "Privilege escalation permissions detected: {}", + permissions.privilege_escalation.join(", ") + )); + } + + let perm_set = collect_permission_set(roles); + if perm_set.iter().any(|p| p.contains("serviceAccounts.actAs")) { + notes.push("Can impersonate other service accounts (iam.serviceAccounts.actAs)".into()); + } + if perm_set.iter().any(|p| p.contains("resourcemanager.projects.setIamPolicy")) { + notes.push("Can modify project IAM policies".into()); + } + if perm_set.iter().any(|p| { + p.starts_with("storage.") && (p.contains("objects.create") || p.contains("buckets.update")) + }) { + notes.push("Has write access to Cloud Storage resources".into()); + } + if perm_set.iter().any(|p| p.contains("secretmanager.secrets.addVersion")) { + notes.push("Can write new versions into Secret Manager".into()); + } + + if roles.iter().any(|r| r.source.starts_with("org:")) { + notes.push("Inherited organization-level roles detected".into()); + } + if roles.iter().any(|r| r.source.starts_with("folder:")) { + notes.push("Inherited folder-level roles detected".into()); + } + + notes +} + +fn sorted(items: HashSet) -> Vec { + let mut v: Vec<_> = items.into_iter().collect(); + v.sort(); + v +} + +async fn test_project_permissions( + client: &Client, + token: &str, + project_id: &str, +) -> Result> { + let candidates = vec![ + "resourcemanager.projects.getIamPolicy", + "resourcemanager.projects.setIamPolicy", + "resourcemanager.projects.testIamPermissions", + "iam.serviceAccounts.actAs", + "iam.serviceAccounts.get", + "iam.serviceAccounts.getAccessToken", + "iam.serviceAccountKeys.list", + "iam.serviceAccountTokenCreator", + "storage.buckets.list", + "storage.objects.list", + "compute.instances.list", + "compute.instances.create", + "bigquery.datasets.get", + "bigquery.tables.list", + "secretmanager.secrets.list", + "cloudsql.instances.list", + "pubsub.topics.list", + "pubsub.subscriptions.list", + "run.services.list", + "artifactregistry.repositories.list", + "container.clusters.list", + ]; + + let url = format!( + "https://cloudresourcemanager.googleapis.com/v1/projects/{}:testIamPermissions", + project_id + ); + + let resp = client + .post(url) + .bearer_auth(token) + .json(&serde_json::json!({ "permissions": candidates })) + .send() + .await?; + + if resp.status() == StatusCode::FORBIDDEN { + verbose_warn!("GCP access-map: testIamPermissions forbidden for project {project_id}"); + return Ok(Vec::new()); + } + + let resp = resp.error_for_status()?; + let json: Value = resp.json().await?; + let permissions = json + .get("permissions") + .and_then(|p| p.as_array()) + .map(|arr| arr.iter().filter_map(|p| p.as_str().map(|s| s.to_string())).collect()) + .unwrap_or_default(); + + Ok(permissions) +} + +async fn test_service_account_permissions( + client: &Client, + token: &str, + project_id: &str, + client_email: &str, +) -> Result> { + let candidates = vec![ + "iam.serviceAccounts.get", + "iam.serviceAccounts.getIamPolicy", + "iam.serviceAccounts.actAs", + "iam.serviceAccounts.signBlob", + "iam.serviceAccounts.signJwt", + "iam.serviceAccounts.implicitDelegation", + "iam.serviceAccountKeys.list", + ]; + + let encoded_email = utf8_percent_encode(client_email, NON_ALPHANUMERIC); + let resource = format!("projects/{}/serviceAccounts/{}", project_id, encoded_email); + + let url = format!("https://iam.googleapis.com/v1/{}:testIamPermissions", resource); + + let resp = client + .post(url) + .bearer_auth(token) + .json(&serde_json::json!({ "permissions": candidates })) + .send() + .await?; + + if resp.status() == StatusCode::FORBIDDEN { + verbose_warn!( + "GCP access-map: testIamPermissions forbidden for service account {client_email}" + ); + return Ok(Vec::new()); + } + + let resp = resp.error_for_status()?; + let json: Value = resp.json().await?; + let permissions = json + .get("permissions") + .and_then(|p| p.as_array()) + .map(|arr| arr.iter().filter_map(|p| p.as_str().map(|s| s.to_string())).collect()) + .unwrap_or_default(); + + Ok(permissions) +} + +fn service_disabled_message(body: &[u8]) -> Result> { + let parsed: Value = match serde_json::from_slice(body) { + Ok(v) => v, + Err(_) => return Ok(None), + }; + + let Some(error) = parsed.get("error") else { + return Ok(None); + }; + + if let Some(details) = error.get("details").and_then(|d| d.as_array()) { + for detail in details { + let reason = detail.get("reason").and_then(|r| r.as_str()); + if reason == Some("SERVICE_DISABLED") { + let metadata = detail.get("metadata"); + let service_title = metadata + .and_then(|m| m.get("serviceTitle")) + .and_then(|s| s.as_str()) + .or_else(|| metadata.and_then(|m| m.get("service")).and_then(|s| s.as_str())) + .unwrap_or("unknown service"); + let activation_url = metadata + .and_then(|m| m.get("activationUrl")) + .and_then(|s| s.as_str()) + .unwrap_or("https://console.developers.google.com/apis/dashboard"); + + return Ok(Some(format!( + "{service_title} is disabled; enable it at {activation_url}" + ))); + } + } + } + + Ok(None) +} diff --git a/src/access_map/graph.rs b/src/access_map/graph.rs new file mode 100644 index 0000000..519fd15 --- /dev/null +++ b/src/access_map/graph.rs @@ -0,0 +1,48 @@ +use super::AccessMapResult; + +/// Convert an identity map result into a Graphviz DOT representation. +pub fn to_dot(result: &AccessMapResult) -> String { + let mut out = String::new(); + out.push_str("digraph G {\n rankdir=LR;\n"); + + out.push_str(&format!( + " identity [label=\"{} ({})\"];\n", + result.identity.id, result.identity.access_type + )); + + for role in &result.roles { + let safe_role = sanitize(&role.name); + out.push_str(&format!( + " role_{safe} [label=\"{}\"];\n identity -> role_{safe};\n", + role.name, + safe = safe_role + )); + + for perm in &role.permissions { + let safe_perm = sanitize(perm); + out.push_str(&format!( + " perm_{safe} [label=\"{}\"];\n role_{role_safe} -> perm_{safe};\n", + perm, + role_safe = safe_role, + safe = safe_perm + )); + } + } + + for res in &result.resources { + let safe = sanitize(&res.name); + out.push_str(&format!( + " res_{safe} [label=\"{} ({})\"];\n identity -> res_{safe};\n", + res.name, + res.risk, + safe = safe + )); + } + + out.push_str("}\n"); + out +} + +fn sanitize(name: &str) -> String { + name.chars().map(|c| if c.is_alphanumeric() { c } else { '_' }).collect() +} diff --git a/src/access_map/report.rs b/src/access_map/report.rs new file mode 100644 index 0000000..832897a --- /dev/null +++ b/src/access_map/report.rs @@ -0,0 +1,1190 @@ +use std::path::Path; + +use anyhow::Result; +use base64::engine::general_purpose::STANDARD as BASE64_STANDARD; +use base64::Engine; +use flate2::{write::GzEncoder, Compression}; +use std::io::Write; + +use super::AccessMapResult; + +/// Generate a standalone HTML report with a simple, collapsible tree view (no D3 dependency). +pub fn generate_html_report_multi(results: &[AccessMapResult], path: &Path) -> Result<()> { + let json = serde_json::to_string(results)?; + let compressed = gzip_base64(&json)?; + let html = build_html(&json, &compressed); + std::fs::write(path, html)?; + Ok(()) +} + +fn gzip_base64(json_str: &str) -> Result { + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); + encoder.write_all(json_str.as_bytes())?; + let compressed = encoder.finish()?; + Ok(BASE64_STANDARD.encode(compressed)) +} + +fn build_html(json_str: &str, compressed_json_b64: &str) -> String { + const TEMPLATE: &str = r#" + + + + + Kingfisher Access Map + + + +
+

Access Map

+
Unified Access Map Report
+
+ +
+
+ +
+
+
+ + +"#; + let mut template = TEMPLATE.replace("REPLACE_COMPRESSED_JSON", compressed_json_b64); + let uncompressed_len = json_str.len().to_string(); + template = template.replace("REPLACE_UNCOMPRESSED_LEN", &uncompressed_len); + template +} diff --git a/src/baseline.rs b/src/baseline.rs index 68f5d93..2c0193a 100644 --- a/src/baseline.rs +++ b/src/baseline.rs @@ -89,7 +89,7 @@ pub fn apply_baseline( let entry = BaselineFinding { filepath: normalized, fingerprint: hash, - linenum: m.location.source_span.start.line, + linenum: m.location.resolved_source_span().start.line, lastupdated: Local::now().to_rfc2822(), }; new_entries.push(entry); @@ -159,18 +159,18 @@ mod tests { let mut store = FindingsStore::new(PathBuf::from(".")); let rule = test_rule(); let match_item = Match { - location: Location { - offset_span: OffsetSpan { start: 0, end: 1 }, - source_span: SourceSpan { + location: Location::with_source_span( + OffsetSpan { start: 0, end: 1 }, + Some(SourceSpan { start: SourcePoint { line: 1, column: 0 }, end: SourcePoint { line: 1, column: 1 }, - }, - }, + }), + ), groups: empty_captures(), blob_id: BlobId::default(), finding_fingerprint: fingerprint, rule: Arc::clone(&rule), - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: 0, validation_success: false, calculated_entropy: 0.0, diff --git a/src/blob.rs b/src/blob.rs index 9b1aff1..f1cbe0e 100644 --- a/src/blob.rs +++ b/src/blob.rs @@ -251,7 +251,8 @@ impl BlobId { let mut hasher = Sha1::new(); write!(&mut hasher, "blob {}\0", bytes.len()).unwrap(); hasher.update(bytes); - BlobId(hasher.finalize().as_slice().try_into().expect("SHA-1 output size mismatch")) + let digest: [u8; 20] = hasher.finalize().into(); + BlobId(digest) } } impl<'de> Deserialize<'de> for BlobId { @@ -303,7 +304,8 @@ impl BlobId { hasher.update(&input[..CHUNK]); hasher.update(&input[input.len() - CHUNK..]); } - BlobId(hasher.finalize().as_slice().try_into().expect("SHA-1 output size mismatch")) + let digest: [u8; 20] = hasher.finalize().into(); + BlobId(digest) } #[inline] diff --git a/src/cli/commands/access_map.rs b/src/cli/commands/access_map.rs new file mode 100644 index 0000000..94b8a0f --- /dev/null +++ b/src/cli/commands/access_map.rs @@ -0,0 +1,34 @@ +use std::path::PathBuf; + +use clap::{Args, ValueEnum}; + +/// Inspect a cloud credential and derive the effective identity and blast radius. +#[derive(Args, Debug)] +pub struct AccessMapArgs { + /// Cloud provider: aws | gcp | azure + #[clap(value_parser, value_name = "PROVIDER")] + pub provider: AccessMapProvider, + + /// Path to a credential artifact (e.g. GCP service account key JSON) + #[clap(value_parser, value_name = "CREDENTIAL", required = false)] + pub credential_path: Option, + + /// Optional path to write an interactive D3.js HTML report + #[clap(long, value_name = "PATH")] + pub html_out: Option, + + /// Optional path to write JSON output (otherwise JSON goes to stdout) + #[clap(long, value_name = "PATH")] + pub json_out: Option, +} + +/// Supported cloud providers for identity mapping. +#[derive(Clone, Debug, ValueEnum)] +pub enum AccessMapProvider { + /// Amazon Web Services + Aws, + /// Google Cloud Platform + Gcp, + /// Microsoft Azure + Azure, +} diff --git a/src/cli/commands/mod.rs b/src/cli/commands/mod.rs index 4b25b89..3c969c9 100644 --- a/src/cli/commands/mod.rs +++ b/src/cli/commands/mod.rs @@ -1,3 +1,4 @@ +pub mod access_map; pub mod azure; pub mod bitbucket; pub mod gitea; diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index f30d028..26d6e4f 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -82,6 +82,14 @@ pub struct ScanArgs { #[arg(long, short = 'n', default_value_t = false)] pub no_validate: bool, + /// Map validated cloud credentials to their effective identities + #[arg(long, default_value_t = false)] + pub access_map: bool, + + /// Optional path to write a consolidated access-map HTML report + #[arg(long, value_name = "PATH")] + pub access_map_html: Option, + /// Display only validated findings #[arg(long, default_value_t = false)] pub only_valid: bool, @@ -424,6 +432,14 @@ impl ScanCommandArgs { self.scan_args.no_dedup = true; } + if self.scan_args.access_map_html.is_some() { + self.scan_args.access_map = true; + } + + if self.scan_args.access_map && self.scan_args.no_validate { + bail!("--access-map cannot be used with --no-validate"); + } + Ok(ScanOperation::Scan(self.scan_args)) } } diff --git a/src/cli/global.rs b/src/cli/global.rs index a7e1ec5..ea44dad 100644 --- a/src/cli/global.rs +++ b/src/cli/global.rs @@ -6,7 +6,7 @@ use strum::Display; use sysinfo::{MemoryRefreshKind, RefreshKind, System}; use tracing::Level; -use crate::cli::commands::{rules::RulesArgs, scan::ScanCommandArgs}; +use crate::cli::commands::{access_map::AccessMapArgs, rules::RulesArgs, scan::ScanCommandArgs}; #[deny(missing_docs)] #[derive(Parser, Debug)] @@ -62,6 +62,10 @@ pub enum Command { #[command(alias = "rule")] Rules(RulesArgs), + /// Map a cloud credential to its identity, permissions, and blast radius + #[command(name = "access-map", alias = "access_map")] + AccessMap(AccessMapArgs), + /// Update the Kingfisher binary #[command(name = "self-update")] SelfUpdate, diff --git a/src/finding_data.rs b/src/finding_data.rs index 23252be..01ed934 100644 --- a/src/finding_data.rs +++ b/src/finding_data.rs @@ -3,6 +3,7 @@ use serde::{Deserialize, Serialize}; use crate::{ blob::BlobMetadata, findings_store, matcher::Match, origin::OriginSet, rules::rule::Confidence, + validation_body::ValidationResponseBody, }; // ------------------------------------------------------------------------------------------------- // FindingData @@ -23,7 +24,7 @@ pub struct FindingDataEntry { pub match_confidence: Confidence, pub visible: bool, /// Validation Body - pub validation_response_body: String, + pub validation_response_body: ValidationResponseBody, /// Validation Status Code pub validation_response_status: u16, diff --git a/src/findings_store.rs b/src/findings_store.rs index e51a881..6345e91 100644 --- a/src/findings_store.rs +++ b/src/findings_store.rs @@ -11,6 +11,7 @@ use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; use xxhash_rust::xxh3::xxh3_64; use crate::{ + access_map::AccessMapResult, blob::{BlobId, BlobMetadata}, finding_data, git_url::GitUrl, @@ -58,7 +59,9 @@ pub struct FindingsStore { confluence_links: FxHashMap, s3_buckets: FxHashMap, repo_links: FxHashMap, + access_map_results: Vec, } + impl FindingsStore { pub fn new(clone_dir: PathBuf) -> Self { let expected_items = 10_000_000; // tune to your largest scan @@ -80,6 +83,7 @@ impl FindingsStore { confluence_links: FxHashMap::default(), s3_buckets: FxHashMap::default(), repo_links: FxHashMap::default(), + access_map_results: Vec::new(), } } @@ -127,6 +131,14 @@ impl FindingsStore { &mut self.matches } + pub fn set_access_map_results(&mut self, results: Vec) { + self.access_map_results = results; + } + + pub fn access_map_results(&self) -> &[AccessMapResult] { + &self.access_map_results + } + pub fn record_rules(&mut self, rules: &[Arc]) { // Clear existing data and extend in place self.rules.clear(); @@ -283,7 +295,7 @@ impl FindingsStore { self.matches .iter() .filter(|msg| { - let (_, _, match_item) = &***msg; + let (_, _, match_item) = msg.as_ref(); match_item.visible }) .count() @@ -348,6 +360,39 @@ impl FindingsStore { &self.s3_buckets } + pub fn merge_from(&mut self, other: &FindingsStore, dedup: bool) { + for (dir, link) in other.repo_links() { + self.repo_links.entry(dir.clone()).or_insert_with(|| link.clone()); + } + + for (dir, bucket) in other.s3_buckets() { + self.s3_buckets.entry(dir.clone()).or_insert_with(|| bucket.clone()); + } + + for (dir, image) in other.docker_images() { + self.docker_images.entry(dir.clone()).or_insert_with(|| image.clone()); + } + + for (dir, link) in other.slack_links() { + self.slack_links.entry(dir.clone()).or_insert_with(|| link.clone()); + } + + for (dir, link) in other.confluence_links() { + self.confluence_links.entry(dir.clone()).or_insert_with(|| link.clone()); + } + + let batch: Vec<_> = other + .get_matches() + .iter() + .map(|msg| { + let (origin, blob_md, m) = msg.as_ref(); + (origin.clone(), blob_md.clone(), m.clone()) + }) + .collect(); + + self.record(batch, dedup); + } + pub fn get_finding_data_iter( &self, ) -> impl Iterator + '_ { @@ -373,7 +418,7 @@ impl FindingsStore { self.matches .iter() .filter(|msg| { - let (_, _, match_item) = &***msg; + let (_, _, match_item) = msg.as_ref(); match_item.rule.name() == metadata.rule_name }) .map(|msg| { diff --git a/src/lib.rs b/src/lib.rs index fcbff87..a2bb426 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod access_map; pub mod azure; pub mod baseline; pub mod binary; @@ -44,6 +45,7 @@ pub mod snippet; pub mod update; pub mod util; pub mod validation; +pub mod validation_body; use std::path::{Path, PathBuf}; diff --git a/src/location.rs b/src/location.rs index a50602a..69245df 100644 --- a/src/location.rs +++ b/src/location.rs @@ -141,9 +141,112 @@ impl<'a> LocationMapping<'a> { } } +/// Compact representation of a source span to reduce per-match footprint while +/// still being able to materialize full line/column data on demand. +#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema)] +pub struct CompactSourceSpan { + pub start_line: u32, + pub start_column: u32, + pub end_line: u32, + pub end_column: u32, +} + +impl CompactSourceSpan { + #[inline] + fn zero() -> Self { + Self { start_line: 0, start_column: 0, end_line: 0, end_column: 0 } + } + + #[inline] + fn from_source_span(span: &SourceSpan) -> Self { + Self { + start_line: span.start.line.try_into().unwrap_or(0), + start_column: span.start.column.try_into().unwrap_or(0), + end_line: span.end.line.try_into().unwrap_or(0), + end_column: span.end.column.try_into().unwrap_or(0), + } + } + + #[inline] + fn to_source_span(self) -> SourceSpan { + SourceSpan { + start: SourcePoint { + line: usize::try_from(self.start_line).unwrap_or(0), + column: usize::try_from(self.start_column).unwrap_or(0), + }, + end: SourcePoint { + line: usize::try_from(self.end_line).unwrap_or(0), + column: usize::try_from(self.end_column).unwrap_or(0), + }, + } + } +} + /// Combined byte‑ and source‑span. -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[derive(Debug, Clone, Deserialize, JsonSchema)] pub struct Location { pub offset_span: OffsetSpan, - pub source_span: SourceSpan, + #[serde( + default, + serialize_with = "serialize_compact_source_span", + deserialize_with = "deserialize_compact_source_span" + )] + #[schemars(with = "SourceSpan")] + pub source_span: Option, +} + +impl serde::Serialize for Location { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeStruct; + + let mut state = serializer.serialize_struct("Location", 2)?; + state.serialize_field("offset_span", &self.offset_span)?; + let source_span = self.source_span().unwrap_or_else(CompactSourceSpan::zero); + state.serialize_field("source_span", &source_span.to_source_span())?; + state.end() + } +} + +impl Location { + #[inline] + pub fn with_source_span(offset_span: OffsetSpan, source_span: Option) -> Self { + Self { + offset_span, + source_span: source_span.as_ref().map(CompactSourceSpan::from_source_span), + } + } + + #[inline] + pub fn source_span(&self) -> Option { + self.source_span + } + + #[inline] + pub fn resolved_source_span(&self) -> SourceSpan { + self.source_span.unwrap_or_else(CompactSourceSpan::zero).to_source_span() + } +} + +fn serialize_compact_source_span( + span: &Option, + serializer: S, +) -> Result +where + S: serde::Serializer, +{ + let source_span = span.unwrap_or_else(CompactSourceSpan::zero).to_source_span(); + source_span.serialize(serializer) +} + +fn deserialize_compact_source_span<'de, D>( + deserializer: D, +) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + let span = SourceSpan::deserialize(deserializer)?; + Ok(Some(CompactSourceSpan::from_source_span(&span))) } diff --git a/src/main.rs b/src/main.rs index 187bd2c..cdf019d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -33,7 +33,7 @@ use std::{ use anyhow::{Context, Result}; use kingfisher::{ - azure, bitbucket, + access_map, azure, bitbucket, cli::{ self, commands::{ @@ -54,7 +54,7 @@ use kingfisher::{ rule_loader::RuleLoader, rules_database::RulesDatabase, scanner::{load_and_record_rules, run_scan}, - update::check_for_update, + update::check_for_update_async, validation::set_user_agent_suffix, }; use serde_json::json; @@ -79,15 +79,18 @@ use crate::cli::commands::{ fn main() -> anyhow::Result<()> { color_backtrace::install(); // Parse command-line arguments - let args = CommandLineArgs::parse_args(); + let CommandLineArgs { command, global_args } = CommandLineArgs::parse_args(); - set_user_agent_suffix(args.global_args.user_agent_suffix.clone()); + set_user_agent_suffix(global_args.user_agent_suffix.clone()); + + let args = CommandLineArgs { command, global_args }; // Determine the number of jobs, defaulting to the number of CPUs let num_jobs = match &args.command { Command::Scan(scan_args) => scan_args.scan_args.num_jobs, Command::SelfUpdate => 1, // Self-update doesn't need a thread pool Command::Rules(_) => num_cpus::get(), // Default for Rules commands + Command::AccessMap(_) => 1, }; // Set up the Tokio runtime with the specified number of threads @@ -186,15 +189,16 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { let mut g = global_args; g.self_update = true; g.no_update_check = false; - check_for_update(&g, None); + let _ = check_for_update_async(&g, None).await; Ok(()) } + Command::AccessMap(identity_args) => access_map::run(identity_args).await, command => { let temp_dir = TempDir::new().context("Failed to create temporary directory")?; let clone_dir = temp_dir.path().to_path_buf(); let datastore = Arc::new(Mutex::new(FindingsStore::new(clone_dir))); - let update_status = check_for_update(&global_args, None); + let update_status = check_for_update_async(&global_args, None).await; match command { Command::Scan(scan_command) => match scan_command.into_operation()? { ScanOperation::Scan(mut scan_args) => { @@ -331,6 +335,9 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { run_rules_list(&list_args)?; } }, + Command::AccessMap(_) => { + anyhow::bail!("AccessMap command should not reach this branch") + } Command::SelfUpdate => { anyhow::bail!("SelfUpdate command should not reach this branch") } @@ -442,6 +449,8 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { }, confidence: ConfidenceLevel::Medium, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: None, diff --git a/src/matcher.rs b/src/matcher.rs index b193f29..b41d7b9 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -36,6 +36,7 @@ use crate::{ snippet::Base64BString, util::intern, validation::{is_parseable_mongodb_uri, is_parseable_mysql_uri, is_parseable_postgres_uri}, + validation_body::{self, ValidationResponseBody}, }; const MAX_CHUNK_SIZE: usize = 1 << 30; // 1 GiB per scan segment @@ -65,7 +66,7 @@ pub struct OwnedBlobMatch { pub finding_fingerprint: u64, pub matching_input_offset_span: OffsetSpan, pub captures: SerializableCaptures, - pub validation_response_body: String, + pub validation_response_body: ValidationResponseBody, pub validation_response_status: StatusCode, pub validation_success: bool, pub calculated_entropy: f32, @@ -156,7 +157,7 @@ pub struct BlobMatch<'a> { /// The capture groups from the match pub captures: SerializableCaptures, // regex::bytes::Captures<'a>, - pub validation_response_body: String, + pub validation_response_body: ValidationResponseBody, pub validation_response_status: StatusCode, pub validation_success: bool, @@ -475,7 +476,7 @@ impl<'a> Matcher<'a> { rule_id_usize, &mut seen_matches, origin, - Some(item.decoded.as_bytes()), + Some(item.decoded.as_slice()), true, redact, &filename, @@ -485,10 +486,9 @@ impl<'a> Matcher<'a> { ); } if depth + 1 < MAX_B64_DEPTH { - for nested in get_base64_strings(item.decoded.as_bytes()) { + for nested in get_base64_strings(item.decoded.as_slice()) { b64_stack.push(( DecodedData { - original: nested.original, decoded: nested.decoded, pos_start: item.pos_start, pos_end: item.pos_end, @@ -761,7 +761,7 @@ fn filter_match<'b>( matching_input: only_matching_input, matching_input_offset_span, captures: groups, - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: StatusCode::from_u16(0).unwrap_or(StatusCode::CONTINUE), validation_success: false, calculated_entropy, @@ -870,7 +870,7 @@ impl JsonSchema for Groups { // } #[derive(Debug, Clone, JsonSchema)] pub struct SerializableCapture { - pub name: Option, + pub name: Option<&'static str>, pub match_number: i32, pub start: usize, pub end: usize, @@ -919,8 +919,8 @@ impl SerializableCaptures { pub fn from_captures(captures: ®ex::bytes::Captures, _input: &[u8], re: &Regex) -> Self { let mut serialized_captures: SmallVec<[SerializableCapture; 2]> = SmallVec::new(); - let capture_names: SmallVec<[Option; 4]> = - re.capture_names().map(|name| name.map(str::to_string)).collect(); + let capture_names: SmallVec<[Option<&'static str>; 4]> = + re.capture_names().map(|name| name.map(intern)).collect(); // If there are explicit capture groups (e.g., group 1, 2, ...), // only serialize those. @@ -928,9 +928,9 @@ impl SerializableCaptures { for i in 1..captures.len() { // Start from 1 if let Some(cap) = captures.get(i) { - let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string(); - let raw_interned = intern(&raw_value); - let name = capture_names.get(i).and_then(|opt| opt.as_ref()).cloned(); + let raw_value = String::from_utf8_lossy(cap.as_bytes()); + let raw_interned = intern(raw_value.as_ref()); + let name = capture_names.get(i).and_then(|opt| *opt); serialized_captures.push(SerializableCapture { name, @@ -945,9 +945,9 @@ impl SerializableCaptures { // ELSE, if there is ONLY the full match (len == 1), // serialize just that full match (group 0) as the fallback. if let Some(cap) = captures.get(0) { - let raw_value = String::from_utf8_lossy(cap.as_bytes()).to_string(); - let raw_interned = intern(&raw_value); - let name = capture_names.get(0).and_then(|opt| opt.as_ref()).cloned(); + let raw_value = String::from_utf8_lossy(cap.as_bytes()); + let raw_interned = intern(raw_value.as_ref()); + let name = capture_names.get(0).and_then(|opt| *opt); serialized_captures.push(SerializableCapture { name, @@ -986,7 +986,13 @@ pub struct Match { pub rule: Arc, /// Validation Body - pub validation_response_body: String, + #[serde( + default, + serialize_with = "validation_body::serialize", + deserialize_with = "validation_body::deserialize" + )] + #[schemars(schema_with = "validation_body::schema")] + pub validation_response_body: ValidationResponseBody, /// Validation Status Code pub validation_response_status: u16, @@ -1042,7 +1048,7 @@ impl Match { Match { rule: owned_blob_match.rule.clone(), visible: owned_blob_match.rule.visible().to_owned(), - location: Location { offset_span, source_span: source_span.clone() }, + location: Location::with_source_span(offset_span, Some(source_span.clone())), groups: owned_blob_match.captures.clone(), blob_id: owned_blob_match.blob_id, finding_fingerprint, @@ -1074,8 +1080,7 @@ impl Match { } #[derive(Debug, Clone)] pub struct DecodedData { - pub original: String, - pub decoded: String, + pub decoded: Vec, pub pos_start: usize, pub pos_end: usize, } @@ -1115,15 +1120,8 @@ pub fn get_base64_strings(input: &[u8]) -> Vec { .or_else(|_| general_purpose::URL_SAFE_NO_PAD.decode(base64_slice)); if let Ok(decoded) = decode_result { - if let Ok(decoded_str) = std::str::from_utf8(&decoded) { - if decoded_str.is_ascii() { - results.push(DecodedData { - original: String::from_utf8_lossy(base64_slice).into_owned(), - decoded: decoded_str.to_string(), - pos_start: start, - pos_end: end, - }); - } + if decoded.is_ascii() { + results.push(DecodedData { decoded, pos_start: start, pos_end: end }); } } } @@ -1438,15 +1436,17 @@ mod test { /// and report correct byte-offsets. #[test] fn test_get_base64_strings_basic() { - let raw = b"foo MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY= bar"; + let base64_payload = b"MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY="; + let mut raw = b"foo ".to_vec(); + raw.extend_from_slice(base64_payload); + raw.extend_from_slice(b" bar"); // decodes to "0123456789abcdef0123456789abcdef" - let hits = get_base64_strings(raw); + let hits = get_base64_strings(&raw); assert_eq!(hits.len(), 1); let item = &hits[0]; - assert_eq!(item.decoded, "0123456789abcdef0123456789abcdef"); - assert_eq!(item.original, "MDEyMzQ1Njc4OWFiY2RlZjAxMjM0NTY3ODlhYmNkZWY="); + assert_eq!(std::str::from_utf8(&item.decoded).unwrap(), "0123456789abcdef0123456789abcdef"); // "foo␠" is 4 bytes, so the start offset is 4 - assert_eq!((item.pos_start, item.pos_end), (4, 4 + item.original.len())); + assert_eq!((item.pos_start, item.pos_end), (4, 4 + base64_payload.len())); } /// `compute_finding_fingerprint` must be stable (same input β‡’ same output) diff --git a/src/reporter.rs b/src/reporter.rs index 4cabba4..36539b9 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1,4 +1,5 @@ use std::{ + collections::{BTreeMap, BTreeSet}, fmt::Write, sync::{Arc, Mutex}, }; @@ -11,6 +12,7 @@ use serde::Serialize; use url::Url; use crate::{ + access_map::{AccessSummary, ResourceExposure}, blob::BlobMetadata, bstring_escape::Escaped, cli, @@ -19,6 +21,7 @@ use crate::{ matcher::Match, origin::{Origin, OriginSet}, rules::rule::Confidence, + validation_body::{self, ValidationResponseBody}, }; mod bson_format; mod json_format; @@ -396,14 +399,18 @@ impl DetailsReporter { path_a .cmp(&path_b) .then_with(|| { - a.m.location.source_span.start.line.cmp(&b.m.location.source_span.start.line) + a.m.location + .resolved_source_span() + .start + .line + .cmp(&b.m.location.resolved_source_span().start.line) }) .then_with(|| { a.m.location - .source_span + .resolved_source_span() .start .column - .cmp(&b.m.location.source_span.start.column) + .cmp(&b.m.location.resolved_source_span().start.column) }) }); Ok(matches) @@ -414,7 +421,7 @@ impl DetailsReporter { rm: &ReportMatch, args: &cli::commands::scan::ScanArgs, ) -> FindingReporterRecord { - let source_span = &rm.m.location.source_span; + let source_span = rm.m.location.resolved_source_span(); let line_num = source_span.start.line; // --- FIX IS HERE --- @@ -438,10 +445,9 @@ impl DetailsReporter { }; const MAX_RESPONSE_LENGTH: usize = 512; - let truncated_body: String = - rm.validation_response_body.chars().take(MAX_RESPONSE_LENGTH).collect(); - let ellipsis = - if rm.validation_response_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; + let validation_body = validation_body::as_str(&rm.validation_response_body); + let truncated_body: String = validation_body.chars().take(MAX_RESPONSE_LENGTH).collect(); + let ellipsis = if validation_body.len() > MAX_RESPONSE_LENGTH { "..." } else { "" }; let response_body = format!("{}{}", truncated_body, ellipsis); let git_metadata_val = rm @@ -449,7 +455,7 @@ impl DetailsReporter { .iter() .filter_map(|origin| { if let Origin::GitRepo(e) = origin { - self.extract_git_metadata(e, source_span) + self.extract_git_metadata(e, &source_span) } else { None } @@ -557,6 +563,66 @@ impl DetailsReporter { Ok(matches.iter().map(|rm| self.build_finding_record(rm, args)).collect()) } + pub fn build_report_envelope( + &self, + args: &cli::commands::scan::ScanArgs, + ) -> Result { + let findings = self.build_finding_records(args)?; + let access_map = self.build_access_map_records(args); + + Ok(ReportEnvelope { findings, access_map }) + } + + fn build_access_map_records( + &self, + args: &cli::commands::scan::ScanArgs, + ) -> Option> { + if !args.access_map { + return None; + } + + let ds = self.datastore.lock().unwrap(); + let raw_results = ds.access_map_results(); + + if raw_results.is_empty() { + return None; + } + + let mut entries = Vec::new(); + for result in raw_results { + let account = summarize_account(&result.identity); + let mut grouped: BTreeMap, Vec> = BTreeMap::new(); + + if result.resources.is_empty() { + grouped.insert(Vec::new(), vec![result.identity.id.clone()]); + } else { + for resource in &result.resources { + let resource_name = format_resource(resource); + let permissions = normalize_permissions(&result.cloud, &resource.permissions); + grouped.entry(permissions).or_default().push(resource_name); + } + } + + let mut groups: Vec = grouped + .into_iter() + .map(|(permissions, mut resources)| { + resources.sort(); + AccessMapResourceGroup { resources, permissions } + }) + .collect(); + + groups.sort_by(|a, b| a.resources.cmp(&b.resources)); + + entries.push(AccessMapEntry { + provider: result.cloud.clone(), + account: account.clone(), + groups, + }); + } + + Some(entries) + } + fn style_finding_heading(&self, val: D) -> StyledObject { self.styles.style_finding_heading.apply_to(val) } @@ -587,6 +653,46 @@ impl DetailsReporter { self.styles.style_active_creds.apply_to(val) } } + +fn normalize_permissions(cloud: &str, permissions: &[String]) -> Vec { + if cloud.eq_ignore_ascii_case("aws") { + return Vec::new(); + } + + let mut set = BTreeSet::new(); + for perm in permissions { + let normalized = perm.trim(); + if !normalized.is_empty() { + set.insert(normalized.to_string()); + } + } + + set.into_iter().collect() +} + +fn summarize_account(identity: &AccessSummary) -> Option { + identity + .account_id + .clone() + .filter(|s| !s.trim().is_empty()) + .or_else(|| identity.project.clone().filter(|s| !s.trim().is_empty())) + .or_else(|| identity.tenant.clone().filter(|s| !s.trim().is_empty())) + .or_else(|| Some(identity.id.clone()).filter(|s| !s.trim().is_empty())) +} + +fn format_resource(resource: &ResourceExposure) -> String { + let name = resource.name.trim(); + if name.is_empty() { + return resource.resource_type.clone(); + } + + let resource_type = resource.resource_type.trim(); + if resource_type.is_empty() { + name.to_string() + } else { + format!("{}:{}", resource_type, name) + } +} /// A trait for things that can be output as a document. /// /// This trait is used to factor output-related code, such as friendly handling @@ -641,7 +747,13 @@ pub struct ReportMatch { pub visible: bool, /// Validation Body - pub validation_response_body: String, + #[serde( + default, + serialize_with = "validation_body::serialize", + deserialize_with = "validation_body::deserialize" + )] + #[schemars(schema_with = "validation_body::schema")] + pub validation_response_body: ValidationResponseBody, /// Validation Status Code pub validation_response_status: u16, @@ -656,6 +768,28 @@ pub struct FindingReporterRecord { pub finding: FindingRecordData, } +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct AccessMapEntry { + pub provider: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub account: Option, + pub groups: Vec, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct AccessMapResourceGroup { + pub resources: Vec, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub permissions: Vec, +} + +#[derive(Serialize, JsonSchema, Clone, Debug)] +pub struct ReportEnvelope { + pub findings: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub access_map: Option>, +} + #[derive(Serialize, JsonSchema, Clone, Debug)] pub struct RuleMetadata { pub name: String, @@ -794,6 +928,8 @@ mod tests { }, confidence: ConfidenceLevel::Medium, no_validate: false, + access_map: false, + access_map_html: None, only_valid: false, min_entropy: None, rule_stats: false, @@ -847,7 +983,7 @@ mod tests { })); let blob_id = BlobId::new(b"blob-data"); - let validation_body_owned = validation_body.to_string(); + let validation_body_stored = validation_body::from_string(validation_body); let report_match = ReportMatch { origin, blob_metadata: BlobMetadata { @@ -857,20 +993,20 @@ mod tests { language: Some("Unknown".into()), }, m: Match { - location: Location { - offset_span: OffsetSpan { start: 0, end: 10 }, - source_span: SourceSpan { + location: Location::with_source_span( + OffsetSpan { start: 0, end: 10 }, + Some(SourceSpan { start: SourcePoint { line: 19, column: 0 }, end: SourcePoint { line: 19, column: 10 }, - }, - }, + }), + ), groups: SerializableCaptures { captures: SmallVec::<[SerializableCapture; 2]>::new(), }, blob_id, finding_fingerprint: 123, rule: Arc::clone(&rule), - validation_response_body: validation_body_owned.clone(), + validation_response_body: validation_body_stored.clone(), validation_response_status: validation_status, validation_success, calculated_entropy: 5.29, @@ -880,7 +1016,7 @@ mod tests { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: validation_body_owned, + validation_response_body: validation_body_stored, validation_response_status: validation_status, validation_success, }; diff --git a/src/reporter/bson_format.rs b/src/reporter/bson_format.rs index 6691c3b..0b13750 100644 --- a/src/reporter/bson_format.rs +++ b/src/reporter/bson_format.rs @@ -7,11 +7,16 @@ impl DetailsReporter { mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let records = self.build_finding_records(args)?; - for record in records { + let envelope = self.build_report_envelope(args)?; + for record in envelope.findings { let doc = bson::to_document(&record)?; doc.to_writer(&mut writer)?; } + + if let Some(access_map) = envelope.access_map { + let doc = bson::to_document(&serde_json::json!({ "access_map": access_map }))?; + doc.to_writer(&mut writer)?; + } Ok(()) } } diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index f9fe2bf..4be0701 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -6,9 +6,9 @@ impl DetailsReporter { mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let records = self.build_finding_records(args)?; - if !records.is_empty() { - serde_json::to_writer_pretty(&mut writer, &records)?; + let envelope = self.build_report_envelope(args)?; + if !envelope.findings.is_empty() || envelope.access_map.is_some() { + serde_json::to_writer_pretty(&mut writer, &envelope)?; writeln!(writer)?; } Ok(()) @@ -19,11 +19,17 @@ impl DetailsReporter { mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let records = self.build_finding_records(args)?; - for record in records { + let envelope = self.build_report_envelope(args)?; + for record in envelope.findings { serde_json::to_writer(&mut writer, &record)?; writeln!(writer)?; } + + if let Some(access_map) = envelope.access_map { + let payload = serde_json::json!({ "access_map": access_map }); + serde_json::to_writer(&mut writer, &payload)?; + writeln!(writer)?; + } Ok(()) } } @@ -52,6 +58,7 @@ mod tests { matcher::Match, origin::Origin, reporter::styles::Styles, + validation_body, }; use smallvec::smallvec; use std::{ @@ -166,6 +173,8 @@ mod tests { }, confidence: ConfidenceLevel::Medium, no_validate: false, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: None, @@ -201,16 +210,16 @@ mod tests { }; let rule = Arc::new(Rule::new(syntax)); Match { - location: Location { - offset_span: OffsetSpan { start: 10, end: 20 }, - source_span: SourceSpan { + location: Location::with_source_span( + OffsetSpan { start: 10, end: 20 }, + Some(SourceSpan { start: SourcePoint { line: 5, column: 10 }, end: SourcePoint { line: 5, column: 20 }, - }, - }, + }), + ), groups: SerializableCaptures { captures: smallvec![SerializableCapture { - name: Some("token".to_string()), + name: Some("token"), match_number: 1, start: 10, end: 20, @@ -220,7 +229,7 @@ mod tests { blob_id: BlobId::new(b"mock_blob"), finding_fingerprint: 0123, rule, - validation_response_body: "validation response".to_string(), + validation_response_body: validation_body::from_string("validation response"), validation_response_status: 200, validation_success, calculated_entropy: 4.5, @@ -275,16 +284,18 @@ mod tests { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: "validation response".to_string(), + validation_response_body: validation_body::from_string("validation response"), validation_response_status: 200, validation_success: true, }]; let reporter = setup_mock_reporter(matches); let mut output = Cursor::new(Vec::new()); reporter.json_format(&mut output, &create_default_args())?; - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; + let json_output: serde_json::Value = serde_json::from_slice(&output.into_inner())?; + let findings = + json_output.get("findings").and_then(|v| v.as_array()).cloned().unwrap_or_default(); + assert!(!findings.is_empty(), "JSON output should not be empty"); + let first = &findings[0]; assert_eq!(first["rule"]["name"], "MockRule"); assert_eq!(first["finding"]["language"], "Rust"); Ok(()) @@ -310,16 +321,18 @@ mod tests { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: "validation response".to_string(), + validation_response_body: validation_body::from_string("validation response"), validation_response_status: 200, validation_success, }]; let reporter = setup_mock_reporter(matches); let mut output = Cursor::new(Vec::new()); reporter.json_format(&mut output, &create_default_args())?; - let json_output: Vec = serde_json::from_slice(&output.into_inner())?; - assert!(!json_output.is_empty(), "JSON output should not be empty"); - let first = &json_output[0]; + let json_output: serde_json::Value = serde_json::from_slice(&output.into_inner())?; + let findings = + json_output.get("findings").and_then(|v| v.as_array()).cloned().unwrap_or_default(); + assert!(!findings.is_empty(), "JSON output should not be empty"); + let first = &findings[0]; let validation_status = first["finding"]["validation"]["status"].as_str().unwrap(); assert_eq!(validation_status, expected_status); } diff --git a/src/reporter/pretty_format.rs b/src/reporter/pretty_format.rs index ea31fc5..8ff3253 100644 --- a/src/reporter/pretty_format.rs +++ b/src/reporter/pretty_format.rs @@ -10,10 +10,17 @@ impl DetailsReporter { mut writer: W, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let records = self.build_finding_records(args)?; - let num_findings = records.len(); - for (index, record) in records.iter().enumerate() { + let envelope = self.build_report_envelope(args)?; + let num_findings = envelope.findings.len(); + for (index, record) in envelope.findings.iter().enumerate() { self.write_finding_record(&mut writer, record, index + 1, num_findings)?; + if index + 1 != num_findings { + writeln!(writer)?; + } + } + + if let Some(access_map) = envelope.access_map { + self.write_access_map(&mut writer, &access_map)?; } Ok(()) } @@ -39,7 +46,36 @@ impl DetailsReporter { writeln!(writer, "{}", self.style_finding_heading(formatted_heading))?; } writeln!(writer, "{}", PrettyFindingRecord(self, record))?; - writeln!(writer)?; + Ok(()) + } + + fn write_access_map( + &self, + writer: &mut W, + entries: &[AccessMapEntry], + ) -> Result<()> { + if entries.is_empty() { + return Ok(()); + } + + writeln!(writer, " |{}", self.style_heading("ACCESS MAP"))?; + for entry in entries { + for group in &entry.groups { + writeln!(writer, " |_service.......: {}", entry.provider.to_uppercase())?; + if let Some(account) = &entry.account { + writeln!(writer, " |__account.....: {}", account)?; + } + for resource in &group.resources { + writeln!(writer, " |____resource....: {}", resource)?; + } + if !group.permissions.is_empty() { + writeln!(writer, " |____permission..: {}", group.permissions.join(","))?; + } + } + + writeln!(writer)?; + } + Ok(()) } @@ -101,7 +137,7 @@ impl<'a> Display for PrettyFindingRecord<'a> { let finding = &record.finding; writeln!(f, " |Finding.......: {}", style_fn(&finding.snippet))?; if let Some(enc) = &finding.encoding { - writeln!(f, " |Encoding.....: {}", enc)?; + writeln!(f, " |Encoding......: {}", enc)?; } writeln!(f, " |Fingerprint...: {}", finding.fingerprint)?; writeln!(f, " |Confidence....: {}", finding.confidence)?; diff --git a/src/reporter/sarif_format.rs b/src/reporter/sarif_format.rs index ff771dd..9f552ec 100644 --- a/src/reporter/sarif_format.rs +++ b/src/reporter/sarif_format.rs @@ -61,8 +61,9 @@ impl DetailsReporter { _no_dedup: bool, args: &cli::commands::scan::ScanArgs, ) -> Result<()> { - let records = self.build_finding_records(args)?; - let finding_rule_ids: HashSet<_> = records.iter().map(|r| r.rule.name.clone()).collect(); + let envelope = self.build_report_envelope(args)?; + let finding_rule_ids: HashSet<_> = + envelope.findings.iter().map(|r| r.rule.name.clone()).collect(); let rules: Vec = get_builtin_rules(None)? .iter_rules() .par_bridge() @@ -106,9 +107,21 @@ impl DetailsReporter { .build()?; let sarif_results: Vec = - records.iter().filter_map(|r| self.record_to_sarif_result(r).ok()).collect(); + envelope.findings.iter().filter_map(|r| self.record_to_sarif_result(r).ok()).collect(); - let run = sarif::RunBuilder::default().tool(tool).results(sarif_results).build()?; + let mut run_builder = sarif::RunBuilder::default(); + run_builder.tool(tool); + run_builder.results(sarif_results); + + if let Some(access_map) = envelope.access_map { + let mut props = BTreeMap::new(); + props.insert("access_map".to_string(), serde_json::to_value(access_map)?); + let property_bag = + sarif::PropertyBagBuilder::default().additional_properties(props).build()?; + run_builder.properties(property_bag); + } + + let run = run_builder.build()?; let sarif = sarif::SarifBuilder::default() .version(sarif::Version::V2_1_0.to_string()) .schema(sarif::SCHEMA_URL) diff --git a/src/scanner/enumerate.rs b/src/scanner/enumerate.rs index 1a354ae..eb32f7f 100644 --- a/src/scanner/enumerate.rs +++ b/src/scanner/enumerate.rs @@ -237,11 +237,14 @@ pub fn enumerate_filesystem_inputs( // nothing to record } Ok(Some((origin_set, blob_metadata, vec_of_matches))) => { + let origin_set = Arc::new(origin_set); + let blob_metadata = Arc::new(blob_metadata); + for (_, single_match) in vec_of_matches { // Send each match send_ds.send(( - Arc::new(origin_set.clone()), - Arc::new(blob_metadata.clone()), + origin_set.clone(), + blob_metadata.clone(), single_match, ))?; } diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 6d98b34..4a819f1 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -2,11 +2,11 @@ pub(crate) use docker::save_docker_images; pub(crate) use enumerate::enumerate_filesystem_inputs; pub(crate) use repos::{ - clone_or_update_git_repos, enumerate_azure_repos, enumerate_bitbucket_repos, + clone_or_update_git_repos_streaming, enumerate_azure_repos, enumerate_bitbucket_repos, enumerate_github_repos, enumerate_huggingface_repos, }; pub use runner::{load_and_record_rules, run_async_scan, run_scan}; -pub(crate) use validation::run_secret_validation; +pub(crate) use validation::{run_secret_validation, AccessMapCollector}; mod docker; mod enumerate; diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 331f610..9339fa6 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -4,7 +4,9 @@ use std::{ }; use anyhow::{Context, Result}; +use crossbeam_channel; use indicatif::{HumanCount, ProgressBar, ProgressStyle}; +use rayon::ThreadPoolBuilder; use tokio::time::Duration; use tracing::{debug, error, info}; use url::Url; @@ -32,20 +34,25 @@ use crate::{ pub type DatastoreMessage = (OriginSet, BlobMetadata, Vec<(Option, Match)>); -pub fn clone_or_update_git_repos( +pub fn clone_or_update_git_repos_streaming( args: &scan::ScanArgs, global_args: &global::GlobalArgs, repo_urls: &[GitUrl], datastore: &Arc>, -) -> Result> { - let mut input_roots = args.input_specifier_args.path_inputs.clone(); + mut on_repo_ready: F, +) -> Result<()> +where + F: FnMut(PathBuf) + Send, +{ if repo_urls.is_empty() { - return Ok(input_roots); + return Ok(()); } + info!("{} Git URLs to fetch", repo_urls.len()); for repo_url in repo_urls { debug!("Need to fetch {repo_url}") } + let clone_mode = if args.input_specifier_args.git_history == GitHistoryMode::None { CloneMode::Checkout } else { @@ -54,7 +61,6 @@ pub fn clone_or_update_git_repos( GitCloneMode::Bare => CloneMode::Bare, } }; - let git = Git::new(global_args.ignore_certs); let progress = if global_args.use_progress() { let style = ProgressStyle::with_template( @@ -70,56 +76,89 @@ pub fn clone_or_update_git_repos( ProgressBar::hidden() }; - for repo_url in repo_urls { - let output_dir = { - let datastore = datastore.lock().unwrap(); - datastore.clone_destination(repo_url) - }; - if output_dir.is_dir() { - progress.suspend(|| info!("Updating clone of {repo_url}...")); - match git.update_clone(repo_url, &output_dir) { - Ok(()) => { - input_roots.push(output_dir); - progress.inc(1); - continue; - } - Err(e) => { - progress.suspend(|| { - debug!( - "Failed to update clone of {repo_url} at {}: {e}", - output_dir.display() - ) - }); - if let Err(e) = std::fs::remove_dir_all(&output_dir) { - progress.suspend(|| { - debug!( - "Failed to remove clone directory at {}: {e}", - output_dir.display() - ) - }); + let (ready_tx, ready_rx) = crossbeam_channel::unbounded(); + let clone_concurrency = std::cmp::max(1, args.num_jobs); + let ignore_certs = global_args.ignore_certs; + + ThreadPoolBuilder::new() + .num_threads(clone_concurrency) + .build() + .context("Failed to build git clone thread pool")? + .scope(|scope| { + for repo_url in repo_urls { + let ready_tx = ready_tx.clone(); + let datastore = Arc::clone(datastore); + let repo_url = repo_url.clone(); + let progress = progress.clone(); + scope.spawn(move |_| { + let git = Git::new(ignore_certs); + let output_dir = { + let datastore = datastore.lock().unwrap(); + datastore.clone_destination(&repo_url) + }; + + if output_dir.is_dir() { + progress.suspend(|| info!("Updating clone of {repo_url}...")); + match git.update_clone(&repo_url, &output_dir) { + Ok(()) => { + let _ = ready_tx.send(output_dir); + progress.inc(1); + return; + } + Err(e) => { + progress.suspend(|| { + debug!( + "Failed to update clone of {repo_url} at {}: {e}", + output_dir.display() + ) + }); + if let Err(e) = std::fs::remove_dir_all(&output_dir) { + progress.suspend(|| { + debug!( + "Failed to remove clone directory at {}: {e}", + output_dir.display() + ) + }); + } + } + } } - } + + progress.suspend(|| info!("Cloning {repo_url}...")); + if let Err(e) = git.create_fresh_clone(&repo_url, &output_dir, clone_mode) { + progress.suspend(|| { + if repo_url.as_str().ends_with(".wiki.git") { + info!("Wiki repository not found for {repo_url}, skipping"); + debug!( + "Failed to clone {repo_url} to {}: {e}", + output_dir.display() + ); + } else { + error!( + "Failed to clone {repo_url} to {}: {e}", + output_dir.display() + ); + } + debug!("Skipping scan of {repo_url}"); + }); + progress.inc(1); + return; + } + + let _ = ready_tx.send(output_dir); + progress.inc(1); + }); } - } - progress.suspend(|| info!("Cloning {repo_url}...")); - if let Err(e) = git.create_fresh_clone(repo_url, &output_dir, clone_mode) { - progress.suspend(|| { - if repo_url.as_str().ends_with(".wiki.git") { - info!("Wiki repository not found for {repo_url}, skipping"); - debug!("Failed to clone {repo_url} to {}: {e}", output_dir.display()); - } else { - error!("Failed to clone {repo_url} to {}: {e}", output_dir.display()); - } - debug!("Skipping scan of {repo_url}"); - }); - progress.inc(1); - continue; - } - input_roots.push(output_dir); - progress.inc(1); - } + + drop(ready_tx); + + for repo_root in ready_rx.iter() { + on_repo_ready(repo_root); + } + }); + progress.finish(); - Ok(input_roots) + Ok(()) } pub async fn enumerate_github_repos( @@ -195,7 +234,7 @@ pub async fn enumerate_gitlab_repos( let mut repo_urls = args.input_specifier_args.git_url.clone(); if !repo_specifiers.is_empty() { - let mut progress = if global_args.use_progress() { + let progress = if global_args.use_progress() { let style = ProgressStyle::with_template("{spinner} {msg} {human_len} [{elapsed_precise}]") .expect("progress bar style template should compile"); diff --git a/src/scanner/runner.rs b/src/scanner/runner.rs index a8b51a8..5e2215d 100644 --- a/src/scanner/runner.rs +++ b/src/scanner/runner.rs @@ -1,16 +1,22 @@ use std::{ fs, - sync::{Arc, Mutex}, + path::PathBuf, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, Mutex, + }, }; use anyhow::{bail, Context, Result}; +use crossbeam_channel; use crossbeam_skiplist::SkipMap; use indicatif::ProgressBar; +use tokio::runtime::Handle; use tokio::time::{Duration, Instant}; use tracing::{debug, error, error_span, info, trace}; use crate::{ - azure, bitbucket, + access_map, azure, bitbucket, cli::{commands::scan, global}, findings_store, findings_store::{FindingsStore, FindingsStoreMessage}, @@ -20,10 +26,11 @@ use crate::{ reporter::styles::Styles, rule_loader::RuleLoader, rule_profiling::ConcurrentRuleProfiler, + rules::rule::Validation, rules_database::RulesDatabase, safe_list, scanner::{ - clone_or_update_git_repos, enumerate_azure_repos, enumerate_bitbucket_repos, + clone_or_update_git_repos_streaming, enumerate_azure_repos, enumerate_bitbucket_repos, enumerate_filesystem_inputs, enumerate_github_repos, enumerate_huggingface_repos, repos::{ enumerate_gitea_repos, enumerate_gitlab_repos, fetch_confluence_pages, @@ -31,7 +38,8 @@ use crate::{ fetch_slack_messages, }, run_secret_validation, save_docker_images, - summary::print_scan_summary, + summary::{compute_scan_totals, print_scan_summary}, + AccessMapCollector, }, util::set_redaction_enabled, }; @@ -122,7 +130,31 @@ pub async fn run_async_scan( repo_urls.sort(); repo_urls.dedup(); - let mut input_roots = clone_or_update_git_repos(args, global_args, &repo_urls, &datastore)?; + let mut input_roots = args.input_specifier_args.path_inputs.clone(); + let (repo_tx, repo_rx) = crossbeam_channel::unbounded(); + let repo_clone_handle = if repo_urls.is_empty() { + None + } else { + let clone_args = args.clone(); + let clone_globals = global_args.clone(); + let clone_repo_urls = repo_urls.clone(); + let clone_datastore = Arc::clone(&datastore); + let clone_repo_tx = repo_tx.clone(); + Some(std::thread::spawn(move || { + if let Err(e) = clone_or_update_git_repos_streaming( + &clone_args, + &clone_globals, + &clone_repo_urls, + &clone_datastore, + |path| { + let _ = clone_repo_tx.send(path); + }, + ) { + error!("Failed to fetch one or more Git repositories: {e}"); + } + })) + }; + drop(repo_tx); // Fetch issues, gists, and wikis if enabled let bitbucket_auth = bitbucket::AuthConfig::from_env(); @@ -176,14 +208,14 @@ pub async fn run_async_scan( let shared_profiler = Arc::new(ConcurrentRuleProfiler::new()); let enable_profiling = args.rule_stats; - let matcher_stats = Mutex::new(MatcherStats::default()); + let matcher_stats = Arc::new(Mutex::new(MatcherStats::default())); // Fetch S3 objects if requested (scanned immediately) fetch_s3_objects( args, &datastore, rules_db, - &matcher_stats, + matcher_stats.as_ref(), enable_profiling, Arc::clone(&shared_profiler), progress_enabled, @@ -194,7 +226,7 @@ pub async fn run_async_scan( args, &datastore, rules_db, - &matcher_stats, + matcher_stats.as_ref(), enable_profiling, Arc::clone(&shared_profiler), progress_enabled, @@ -203,56 +235,21 @@ pub async fn run_async_scan( let has_remote_objects = args.input_specifier_args.s3_bucket.is_some() || args.input_specifier_args.gcs_bucket.is_some(); - if input_roots.is_empty() && !has_remote_objects { + if input_roots.is_empty() && repo_urls.is_empty() && !has_remote_objects { bail!("No inputs to scan"); } - if !input_roots.is_empty() { - let _inputs = enumerate_filesystem_inputs( - args, - datastore.clone(), - &input_roots, - progress_enabled, - rules_db, - enable_profiling, - Arc::clone(&shared_profiler), - &matcher_stats, - )?; - } - - if !args.no_dedup { - // Final deduplication step before validation (or before reporting) - let reporter = crate::reporter::DetailsReporter { - datastore: Arc::clone(&datastore), - styles: Styles::new(global_args.use_color(std::io::stdout())), - only_valid: args.only_valid, - }; - - // Retrieve all matches, regardless of filtering, from the datastore - let all_matches = reporter.get_unfiltered_matches(Some(false))?; - // Deduplicate the matches using the reporter’s helper - let deduped_matches = reporter.deduplicate_matches(all_matches, args.no_dedup); - - let deduped_arcs: Vec> = deduped_matches - .into_iter() - .map(|rm| Arc::new((Arc::new(rm.origin), Arc::new(rm.blob_metadata), rm.m))) - .collect(); - let mut ds = datastore.lock().unwrap(); - ds.replace_matches(deduped_arcs); - } - - // If baseline management is enabled, apply the baseline - if args.baseline_file.is_some() || args.manage_baseline { - let path = args - .baseline_file + let baseline_path = Arc::new( + args.baseline_file .clone() - .unwrap_or_else(|| std::path::PathBuf::from("baseline-file.yaml")); - let mut ds = datastore.lock().unwrap(); - crate::baseline::apply_baseline(&mut ds, &path, args.manage_baseline, &input_roots)?; - } + .unwrap_or_else(|| std::path::PathBuf::from("baseline-file.yaml")), + ); let mut skip_aws_accounts = args.skip_aws_account.clone(); + let mut access_map_collector = + if args.access_map { Some(AccessMapCollector::default()) } else { None }; + if let Some(path) = args.skip_aws_account_file.as_ref() { let contents = fs::read_to_string(path).with_context(|| { format!("Failed to read --skip-aws-account-file {}", path.display()) @@ -271,23 +268,349 @@ pub async fn run_async_scan( crate::validation::set_skip_aws_account_ids(skip_aws_accounts); - // If validation is enabled, run it as a second phase - if !args.no_validate { + let repo_roots = expand_repo_roots(&input_roots)?; + let git_repo_count = + repo_roots.iter().filter(|p| p.join(".git").is_dir()).count() + repo_urls.len(); + let use_parallel_repo_scan = git_repo_count > 10; + + let validation_deps = if !args.no_validate { info!("Starting secret validation phase..."); - // Create validation dependencies - let client = reqwest::Client::builder() - .danger_accept_invalid_certs(global_args.ignore_certs) - .timeout(Duration::from_secs(30)) - .build()?; - let parser = register_all(liquid::ParserBuilder::with_stdlib()).build()?; - let cache = Arc::new(SkipMap::new()); - // Run validation - run_secret_validation(Arc::clone(&datastore), &parser, &client, &cache, args.num_jobs) + Some(Arc::new(( + register_all(liquid::ParserBuilder::with_stdlib()).build()?, + reqwest::Client::builder() + .danger_accept_invalid_certs(global_args.ignore_certs) + .timeout(Duration::from_secs(30)) + .build()?, + Arc::new(SkipMap::new()), + ))) + } else { + None + }; + + if !use_parallel_repo_scan { + let mut streamed_roots = Vec::new(); + if !input_roots.is_empty() { + let _inputs = enumerate_filesystem_inputs( + args, + datastore.clone(), + &input_roots, + progress_enabled, + rules_db, + enable_profiling, + Arc::clone(&shared_profiler), + matcher_stats.as_ref(), + )?; + } + + for repo_root in repo_rx.clone().iter() { + enumerate_filesystem_inputs( + args, + datastore.clone(), + &[repo_root.clone()], + progress_enabled, + rules_db, + enable_profiling, + Arc::clone(&shared_profiler), + matcher_stats.as_ref(), + )?; + streamed_roots.push(repo_root); + } + input_roots.extend(streamed_roots); + + if let Some(handle) = repo_clone_handle { + let _ = handle.join(); + } + + if !args.no_dedup { + let reporter = crate::reporter::DetailsReporter { + datastore: Arc::clone(&datastore), + styles: Styles::new(global_args.use_color(std::io::stdout())), + only_valid: args.only_valid, + }; + + let all_matches = reporter.get_unfiltered_matches(Some(false))?; + let deduped_matches = reporter.deduplicate_matches(all_matches, args.no_dedup); + + let deduped_arcs: Vec> = deduped_matches + .into_iter() + .map(|rm| Arc::new((Arc::new(rm.origin), Arc::new(rm.blob_metadata), rm.m))) + .collect(); + let mut ds = datastore.lock().unwrap(); + ds.replace_matches(deduped_arcs); + } + + if args.baseline_file.is_some() || args.manage_baseline { + let mut ds = datastore.lock().unwrap(); + crate::baseline::apply_baseline( + &mut ds, + baseline_path.as_ref(), + args.manage_baseline, + &input_roots, + )?; + } + + if let Some(validation) = &validation_deps { + let (parser, client, cache) = (&validation.0, &validation.1, &validation.2); + run_secret_validation( + Arc::clone(&datastore), + parser, + client, + cache, + args.num_jobs, + None, + access_map_collector.clone(), + ) .await?; + } + + if let Some(collector) = access_map_collector.take() { + finalize_access_map(&datastore, collector, args).await?; + } + + crate::reporter::run(global_args, Arc::clone(&datastore), args) + .context("Failed to run report command")?; + print_scan_summary( + start_time, + scan_started_at, + &datastore, + global_args, + args, + rules_db, + matcher_stats.as_ref(), + if enable_profiling { Some(shared_profiler.as_ref()) } else { None }, + update_status, + None, + None, + ); + return Ok(()); } - // // Call cmd_report here - crate::reporter::run(global_args, Arc::clone(&datastore), args) - .context("Failed to run report command")?; + + let deduplicate_new_matches = + |store: &Arc>, start_index: usize| -> Result<()> { + if args.no_dedup { + return Ok(()); + } + + let reporter = crate::reporter::DetailsReporter { + datastore: Arc::clone(store), + styles: Styles::new(global_args.use_color(std::io::stdout())), + only_valid: args.only_valid, + }; + + let all_matches = reporter.get_unfiltered_matches(Some(false))?; + if start_index >= all_matches.len() { + return Ok(()); + } + + let deduped_matches = + reporter.deduplicate_matches(all_matches[start_index..].to_vec(), args.no_dedup); + + let deduped_arcs: Vec> = deduped_matches + .into_iter() + .map(|rm| Arc::new((Arc::new(rm.origin), Arc::new(rm.blob_metadata), rm.m))) + .collect(); + + let mut ds = store.lock().unwrap(); + let mut preserved = ds.get_matches()[..start_index].to_vec(); + preserved.extend(deduped_arcs); + ds.replace_matches(preserved); + Ok(()) + }; + + deduplicate_new_matches(&datastore, 0)?; + + if args.baseline_file.is_some() || args.manage_baseline { + let mut ds = datastore.lock().unwrap(); + crate::baseline::apply_baseline( + &mut ds, + baseline_path.as_ref(), + args.manage_baseline, + &repo_roots, + )?; + } + + if let Some(validation) = &validation_deps { + let (parser, client, cache) = (&validation.0, &validation.1, &validation.2); + let initial_match_count = { datastore.lock().unwrap().get_matches().len() }; + if initial_match_count > 0 { + run_secret_validation( + Arc::clone(&datastore), + parser, + client, + cache, + args.num_jobs, + Some(0..initial_match_count), + access_map_collector.clone(), + ) + .await?; + } + } + + let repo_concurrency = std::cmp::max(1, args.num_jobs); + let rt_handle = Handle::current(); + + let base_clone_root = { datastore.lock().unwrap().clone_root() }; + let repo_rules = datastore.lock().unwrap().get_rules()?; + + let ran_repo_scan = Arc::new(AtomicBool::new(false)); + let repo_errors: Arc>> = Arc::new(Mutex::new(Vec::new())); + + rayon::ThreadPoolBuilder::new() + .num_threads(repo_concurrency) + .build() + .context("Failed to build repo scan thread pool")? + .scope(|scope| { + let spawn_repo_scan = |root: PathBuf| { + let repo_rules = repo_rules.clone(); + let base_clone_root = base_clone_root.clone(); + let baseline_path = Arc::clone(&baseline_path); + let shared_profiler = Arc::clone(&shared_profiler); + let args = args.clone(); + let root = root.clone(); + let validation_deps = validation_deps.clone(); + let matcher_stats = Arc::clone(&matcher_stats); + let rt_handle = rt_handle.clone(); + let ran_repo_scan = Arc::clone(&ran_repo_scan); + let repo_errors = Arc::clone(&repo_errors); + let datastore = Arc::clone(&datastore); + let access_map = access_map_collector.clone(); + + scope.spawn(move |_| { + let result: Result<()> = (|| { + let repo_datastore = + Arc::new(Mutex::new(FindingsStore::new(base_clone_root.clone()))); + { + let mut ds = repo_datastore.lock().unwrap(); + ds.record_rules(&repo_rules); + } + + let repo_matcher_stats = Mutex::new(MatcherStats::default()); + + enumerate_filesystem_inputs( + &args, + Arc::clone(&repo_datastore), + &[root.clone()], + progress_enabled, + rules_db, + enable_profiling, + Arc::clone(&shared_profiler), + &repo_matcher_stats, + ) + .and_then(|_| deduplicate_new_matches(&repo_datastore, 0))?; + + if args.baseline_file.is_some() || args.manage_baseline { + let mut ds = repo_datastore.lock().unwrap(); + crate::baseline::apply_baseline( + &mut ds, + baseline_path.as_ref(), + args.manage_baseline, + &[root.clone()], + )?; + } + + if let Some(validation) = validation_deps.clone() { + let (parser, client, cache) = + (&validation.0, &validation.1, &validation.2); + let match_count = + { repo_datastore.lock().unwrap().get_matches().len() }; + if match_count > 0 { + rt_handle.block_on(run_secret_validation( + Arc::clone(&repo_datastore), + parser, + client, + cache, + args.num_jobs, + Some(0..match_count), + access_map.clone(), + ))?; + } + } + + { + let mut global_stats = matcher_stats.lock().unwrap(); + global_stats.update(&repo_matcher_stats.lock().unwrap()); + } + + crate::reporter::run(global_args, Arc::clone(&repo_datastore), &args) + .context("Failed to run report command")?; + + { + let mut ds = datastore.lock().unwrap(); + ds.merge_from(&repo_datastore.lock().unwrap(), !args.no_dedup); + } + + ran_repo_scan.store(true, Ordering::Relaxed); + Ok(()) + })(); + + if let Err(e) = result { + error!("Repository scan failed: {e}"); + repo_errors.lock().unwrap().push(e); + } + }); + }; + + for root in repo_roots.clone() { + spawn_repo_scan(root); + } + + for root in repo_rx.clone().iter() { + spawn_repo_scan(root); + } + }); + + if let Some(handle) = repo_clone_handle { + let _ = handle.join(); + } + + if let Some(err) = repo_errors.lock().unwrap().pop() { + return Err(err); + } + + if !ran_repo_scan.load(Ordering::Relaxed) { + deduplicate_new_matches(&datastore, 0)?; + + if args.baseline_file.is_some() || args.manage_baseline { + let mut ds = datastore.lock().unwrap(); + crate::baseline::apply_baseline( + &mut ds, + baseline_path.as_ref(), + args.manage_baseline, + &repo_roots, + )?; + } + + if let Some(validation) = &validation_deps { + let (parser, client, cache) = (&validation.0, &validation.1, &validation.2); + run_secret_validation( + Arc::clone(&datastore), + parser, + client, + cache, + args.num_jobs, + None, + access_map_collector.clone(), + ) + .await?; + } + + if let Some(collector) = access_map_collector.take() { + finalize_access_map(&datastore, collector, args).await?; + } + + crate::reporter::run(global_args, Arc::clone(&datastore), args) + .context("Failed to run report command")?; + } + + let aggregate_summary = if ran_repo_scan.load(Ordering::Relaxed) { + let totals = compute_scan_totals(&datastore, args, matcher_stats.as_ref()); + let mut sorted: Vec<_> = datastore.lock().unwrap().get_summary().into_iter().collect(); + sorted.sort_by(|a, b| b.1.cmp(&a.1)); + Some((totals, sorted)) + } else { + None + }; + print_scan_summary( start_time, scan_started_at, @@ -295,13 +618,116 @@ pub async fn run_async_scan( global_args, args, rules_db, - &matcher_stats, + matcher_stats.as_ref(), if enable_profiling { Some(shared_profiler.as_ref()) } else { None }, update_status, + None, + aggregate_summary, ); + + if let Some(collector) = access_map_collector { + finalize_access_map(&datastore, collector, args).await?; + } else { + maybe_hint_access_map(&datastore, args); + } Ok(()) } +async fn finalize_access_map( + datastore: &Arc>, + collector: AccessMapCollector, + args: &scan::ScanArgs, +) -> Result<()> { + let requests = collector.into_requests(); + + if requests.is_empty() { + debug!("access-map enabled but no validated AWS or GCP credentials were collected; skipping report output"); + let mut ds = datastore.lock().unwrap(); + ds.set_access_map_results(Vec::new()); + return Ok(()); + } + + let results = access_map::map_requests(requests).await; + + { + let mut ds = datastore.lock().unwrap(); + ds.set_access_map_results(results.clone()); + } + + if let Some(html_path) = &args.access_map_html { + access_map::write_reports(&results, html_path)?; + info!("wrote access-map HTML report to {}", html_path.display()); + } + + // if args.access_map_html.is_none() { + // eprintln!( + // "Tip: rerun with --access-map-html /path/to/report.html for an interactive access-map viewer." + // ); + // } + + Ok(()) +} + +fn expand_repo_roots(input_roots: &[PathBuf]) -> Result> { + let mut repo_roots = Vec::new(); + + for root in input_roots { + if root.join(".git").is_dir() { + repo_roots.push(root.clone()); + continue; + } + + if !root.is_dir() { + repo_roots.push(root.clone()); + continue; + } + + let mut child_roots = Vec::new(); + let mut non_repo_children = Vec::new(); + for entry in fs::read_dir(root).with_context(|| { + format!("Failed to read directory while expanding repo roots: {}", root.display()) + })? { + let entry = entry?; + let child_path = entry.path(); + if child_path.join(".git").is_dir() { + child_roots.push(child_path); + } else { + non_repo_children.push(child_path); + } + } + + if child_roots.is_empty() { + repo_roots.push(root.clone()); + } else { + repo_roots.extend(child_roots); + repo_roots.extend(non_repo_children); + } + } + + Ok(repo_roots) +} + +fn maybe_hint_access_map(datastore: &Arc>, args: &scan::ScanArgs) { + if args.access_map || args.no_validate { + return; + } + + let has_mappable_identities = { + let ds = datastore.lock().unwrap(); + ds.get_matches().iter().any(|entry| { + let rule = &entry.2.rule; + entry.2.validation_success + && matches!(rule.syntax().validation, Some(Validation::AWS | Validation::GCP)) + }) + }; + + if has_mappable_identities { + eprintln!( + "Access map not requested. Rerun with --access-map to include resource-level permissions." + ); + } +} + fn initialize_environment() -> Result<()> { let init_progress = ProgressBar::new_spinner(); init_progress.set_message("Initializing thread pool..."); diff --git a/src/scanner/summary.rs b/src/scanner/summary.rs index 16d0798..99764c9 100644 --- a/src/scanner/summary.rs +++ b/src/scanner/summary.rs @@ -23,6 +23,29 @@ use crate::{ update::{UpdateCheckStatus, UpdateStatus}, }; +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct ScanSummaryTotals { + pub findings: usize, + pub successful_validations: usize, + pub failed_validations: usize, + pub blobs_scanned: u64, + pub bytes_scanned: u64, +} + +impl ScanSummaryTotals { + pub fn delta_since(&self, baseline: &Self) -> Self { + Self { + findings: self.findings.saturating_sub(baseline.findings), + successful_validations: self + .successful_validations + .saturating_sub(baseline.successful_validations), + failed_validations: self.failed_validations.saturating_sub(baseline.failed_validations), + blobs_scanned: self.blobs_scanned.saturating_sub(baseline.blobs_scanned), + bytes_scanned: self.bytes_scanned.saturating_sub(baseline.bytes_scanned), + } + } +} + macro_rules! safe_println { ($($arg:tt)*) => { if let Err(e) = writeln!(io::stdout(), $($arg)*) { @@ -37,6 +60,59 @@ macro_rules! safe_println { }; } +pub fn compute_scan_totals( + datastore: &Arc>, + args: &scan::ScanArgs, + matcher_stats: &Mutex, +) -> ScanSummaryTotals { + let ds = datastore.lock().unwrap(); + + let all_matches = ds.get_matches(); + + let total_findings = if args.no_dedup { + all_matches.iter().fold(0, |count, msg| { + let (origin_set, _, match_item) = &**msg; + if match_item.validation_success { + count + origin_set.len() + } else { + count + 1 + } + }) + } else { + ds.get_num_matches() + }; + + let (successful_validations, failed_validations) = + all_matches.iter().fold((0, 0), |(success, fail), msg| { + let (origin_set, _, match_item) = &**msg; + if match_item.validation_success { + if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { + if args.no_dedup { + (success + origin_set.len(), fail) + } else { + (success + 1, fail) + } + } else { + (success, fail) + } + } else if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { + (success, fail + 1) + } else { + (success, fail) + } + }); + + let matcher_stats = matcher_stats.lock().unwrap(); + + ScanSummaryTotals { + findings: total_findings, + successful_validations, + failed_validations, + blobs_scanned: matcher_stats.blobs_scanned, + bytes_scanned: matcher_stats.bytes_scanned, + } +} + pub fn print_scan_summary( start_time: Instant, scan_started_at: chrono::DateTime, @@ -48,6 +124,8 @@ pub fn print_scan_summary( matcher_stats: &Mutex, profiler: Option<&ConcurrentRuleProfiler>, update_status: &UpdateStatus, + repo_context: Option<(&str, ScanSummaryTotals)>, + precomputed_summary: Option<(ScanSummaryTotals, Vec<(&'static str, usize)>)>, ) { if global_args.quiet { if args.rule_stats { @@ -86,71 +164,50 @@ pub fn print_scan_summary( return; } - let ds = datastore.lock().unwrap(); - - let num_rules = rules_db.num_rules(); - let findings_by_rule = ds.get_summary(); - let mut sorted_findings: Vec<_> = findings_by_rule.into_iter().collect(); - sorted_findings.sort_by(|a, b| b.1.cmp(&a.1)); + let (num_rules, sorted_findings) = if let Some((_, findings)) = &precomputed_summary { + (rules_db.num_rules(), findings.clone()) + } else { + let ds = datastore.lock().unwrap(); + let num_rules = rules_db.num_rules(); + let findings_by_rule = ds.get_summary(); + let mut sorted: Vec<_> = findings_by_rule.into_iter().collect(); + sorted.sort_by(|a, b| b.1.cmp(&a.1)); + (num_rules, sorted) + }; let duration = start_time.elapsed(); - let all_matches = ds.get_matches(); - - let total_findings = if args.no_dedup { - all_matches.iter().fold(0, |count, msg| { - let (origin_set, _, match_item) = &**msg; - if match_item.validation_success { - count + origin_set.len() - } else { - count + 1 - } - }) + let totals = if let Some((totals, _)) = &precomputed_summary { + *totals } else { - ds.get_num_matches() + compute_scan_totals(datastore, args, matcher_stats) }; + let delta_totals = repo_context.map(|(_, baseline)| totals.delta_since(&baseline)); - let (successful_validations, failed_validations) = - all_matches.iter().fold((0, 0), |(success, fail), msg| { - let (origin_set, _, match_item) = &**msg; - if match_item.validation_success { - if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { - if args.no_dedup { - (success + origin_set.len(), fail) - } else { - (success + 1, fail) - } - } else { - (success, fail) - } - } else if match_item.validation_response_status != StatusCode::CONTINUE.as_u16() { - (success, fail + 1) - } else { - (success, fail) - } - }); - let matcher_stats = matcher_stats.lock().unwrap(); + let should_print_overall = repo_context.is_none(); if args.output_args.format == ReportOutputFormat::Json || args.output_args.format == ReportOutputFormat::Jsonl { - let summary = json!({ - "findings": total_findings, - "successful_validations": successful_validations, - "failed_validations": failed_validations, - "rules_applied": num_rules, - "blobs_scanned": matcher_stats.blobs_scanned, - "bytes_scanned": matcher_stats.bytes_scanned, - "scan_duration": duration.as_secs_f64(), - "scan_date": scan_started_at.to_rfc3339(), - "kingfisher": { - "version_used": update_status.running_version.clone(), - "latest_version": update_status.latest_version.clone(), - "update_check_status": update_status.check_status.as_str(), - "update_check_message": update_status.message.clone(), - }, - "findings_by_rule": sorted_findings - }); - safe_println!("{}", summary.to_string()); + if should_print_overall { + let summary = json!({ + "findings": totals.findings, + "successful_validations": totals.successful_validations, + "failed_validations": totals.failed_validations, + "rules_applied": num_rules, + "blobs_scanned": totals.blobs_scanned, + "bytes_scanned": totals.bytes_scanned, + "scan_duration": duration.as_secs_f64(), + "scan_date": scan_started_at.to_rfc3339(), + "kingfisher": { + "version_used": update_status.running_version.clone(), + "latest_version": update_status.latest_version.clone(), + "update_check_status": update_status.check_status.as_str(), + "update_check_message": update_status.message.clone(), + }, + "findings_by_rule": sorted_findings + }); + safe_println!("{}", summary.to_string()); + } } else if args.output_args.format == ReportOutputFormat::Pretty || args.output_args.output.is_some() { @@ -163,34 +220,67 @@ pub fn print_scan_summary( } }; - safe_println!("\n=========================================="); - safe_println!("Scan Summary:"); - safe_println!("=========================================="); - safe_println!(" |Findings....................: {}", total_findings.separate_with_commas()); - safe_println!( - " |__Successful Validations....: {}", - successful_validations.separate_with_commas() - ); - safe_println!( - " |__Failed Validations........: {}", - failed_validations.separate_with_commas() - ); - safe_println!(" |Rules Applied...............: {}", num_rules.separate_with_commas()); - safe_println!( - " |__Blobs Scanned.............: {}", - matcher_stats.blobs_scanned.separate_with_commas() - ); - safe_println!( - " |Bytes Scanned...............: {}", - HumanBytes(matcher_stats.bytes_scanned) - ); - safe_println!(" |Scan Duration...............: {}", humantime::format_duration(duration)); - safe_println!(" |Scan Date...................: {}", scan_date); - safe_println!(" |Kingfisher Version..........: {}", &update_status.running_version); - safe_println!(" |__Latest Version............: {}", latest_version); + if let Some((repo_name, baseline)) = repo_context { + let delta = delta_totals.unwrap_or_default(); + safe_println!("\n=========================================="); + safe_println!("Repository Summary: {}", repo_name); + safe_println!("=========================================="); + safe_println!( + " |Findings added..............: {}", + delta.findings.separate_with_commas() + ); + safe_println!( + " |__Successful Validations....: {}", + delta.successful_validations.separate_with_commas() + ); + safe_println!( + " |__Failed Validations........: {}", + delta.failed_validations.separate_with_commas() + ); + safe_println!( + " |Blobs Scanned (delta)......: {}", + delta.blobs_scanned.separate_with_commas() + ); + safe_println!(" |Bytes Scanned (delta)......: {}", HumanBytes(delta.bytes_scanned)); + safe_println!( + " |Baseline findings...........: {}", + baseline.findings.separate_with_commas() + ); + } + + if should_print_overall { + safe_println!("\n=========================================="); + safe_println!("Scan Summary:"); + safe_println!("=========================================="); + safe_println!( + " |Findings....................: {}", + totals.findings.separate_with_commas() + ); + safe_println!( + " |__Successful Validations....: {}", + totals.successful_validations.separate_with_commas() + ); + safe_println!( + " |__Failed Validations........: {}", + totals.failed_validations.separate_with_commas() + ); + safe_println!(" |Rules Applied...............: {}", num_rules.separate_with_commas()); + safe_println!( + " |__Blobs Scanned.............: {}", + totals.blobs_scanned.separate_with_commas() + ); + safe_println!(" |Bytes Scanned...............: {}", HumanBytes(totals.bytes_scanned)); + safe_println!( + " |Scan Duration...............: {}", + humantime::format_duration(duration) + ); + safe_println!(" |Scan Date...................: {}", scan_date); + safe_println!(" |Kingfisher Version..........: {}", &update_status.running_version); + safe_println!(" |__Latest Version............: {}", latest_version); + } } - if args.rule_stats { + if should_print_overall && args.rule_stats { if let Some(prof) = profiler { let stats = prof.generate_report(); if !stats.is_empty() { diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 6544d40..46a0f4b 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -17,13 +17,45 @@ use rustc_hash::FxHashMap; use tokio::{sync::Notify, time::timeout}; use crate::{ + access_map::AccessMapRequest, blob::BlobId, findings_store::{FindingsStore, FindingsStoreMessage}, location::OffsetSpan, matcher::{Match, OwnedBlobMatch}, - validation::{collect_variables_and_dependencies, validate_single_match, CachedResponse}, + rules::rule::Validation, + validation::{ + collect_variables_and_dependencies, utils, validate_single_match, CachedResponse, + }, + validation_body, }; +#[derive(Clone, Default)] +pub struct AccessMapCollector { + inner: Arc>, +} + +impl AccessMapCollector { + pub fn record_aws(&self, access_key: &str, secret_key: &str) { + let key = xxhash_rust::xxh3::xxh3_64(format!("aws|{access_key}|{secret_key}").as_bytes()); + self.inner.entry(key).or_insert_with(|| AccessMapRequest::Aws { + access_key: access_key.to_string(), + secret_key: secret_key.to_string(), + session_token: None, + }); + } + + pub fn record_gcp(&self, credential_json: &str) { + let key = xxhash_rust::xxh3::xxh3_64(credential_json.as_bytes()); + self.inner.entry(key).or_insert_with(|| AccessMapRequest::Gcp { + credential_json: credential_json.to_string(), + }); + } + + pub fn into_requests(self) -> Vec { + self.inner.iter().map(|entry| entry.value().clone()).collect() + } +} + #[allow(clippy::too_many_arguments)] pub async fn run_secret_validation( datastore: Arc>, @@ -31,6 +63,8 @@ pub async fn run_secret_validation( client: &Client, cache: &Arc>, num_jobs: usize, + range: Option>, + access_map: Option, ) -> Result<()> { // ── 1. Concurrency & counters ─────────────────────────────────────────── let concurrency = if num_jobs > 0 { num_jobs } else { num_cpus::get() }; @@ -43,7 +77,13 @@ pub async fn run_secret_validation( let ds = datastore.lock().unwrap(); let rules = ds.get_rules()?; let mut map: FxHashMap>> = FxHashMap::default(); - for arc_msg in ds.get_matches().iter().map(Arc::clone) { + let matches = if let Some(r) = range.clone() { + ds.get_matches()[r].to_vec() + } else { + ds.get_matches().to_vec() + }; + + for arc_msg in matches.into_iter() { map.entry(arc_msg.1.id).or_default().push(arc_msg); } (rules, map) @@ -103,6 +143,7 @@ pub async fn run_secret_validation( let fail = fail_count.clone(); // *** FIX: Clone the progress bar for each concurrent task *** let pb = pb.clone(); + let access_map = access_map.clone(); async move { let secret = rep_arc @@ -119,7 +160,7 @@ pub async fn run_secret_validation( dashmap::mapref::entry::Entry::Vacant(entry) => { // *** FIX: Corrected placeholder to match struct definition *** entry.insert(CachedResponse { - body: String::new(), + body: validation_body::from_string(String::new()), status: StatusCode::ACCEPTED, is_valid: false, timestamp: Instant::now(), @@ -143,6 +184,7 @@ pub async fn run_secret_validation( &success, &fail, &cache_glob, + access_map.as_ref(), ) .await; @@ -215,6 +257,7 @@ pub async fn run_secret_validation( let success = success_count.clone(); let fail = fail_count.clone(); let cache_glob = cache.clone(); + let access_map = access_map.clone(); async move { let owned = matches_for_blob @@ -248,6 +291,7 @@ pub async fn run_secret_validation( let success = success.clone(); let fail = fail.clone(); let cache_glob = cache_glob.clone(); + let access_map = access_map.clone(); async move { validate_single( @@ -261,6 +305,7 @@ pub async fn run_secret_validation( &success, &fail, &cache_glob, + access_map.as_ref(), ) .await; for d in &mut dups { @@ -342,6 +387,7 @@ async fn validate_single( success_count: &AtomicUsize, fail_count: &AtomicUsize, cache2: &Arc>, + access_map: Option<&AccessMapCollector>, ) { // Build key let dep_vars_str = dep_vars @@ -364,6 +410,7 @@ async fn validate_single( } else if om.validation_response_status != http::StatusCode::CONTINUE { fail_count.fetch_add(1, Ordering::Relaxed); } + maybe_record_access_map(om, access_map); return; } @@ -384,6 +431,7 @@ async fn validate_single( } else if om.validation_response_status != http::StatusCode::CONTINUE { fail_count.fetch_add(1, Ordering::Relaxed); } + maybe_record_access_map(om, access_map); return; // Exit early if cached result is found } return; @@ -414,11 +462,12 @@ async fn validate_single( } Err(_) => { om.validation_success = false; - om.validation_response_body = "Validation timed out".to_string(); + om.validation_response_body = validation_body::from_string("Validation timed out"); om.validation_response_status = http::StatusCode::REQUEST_TIMEOUT; fail_count.fetch_add(1, Ordering::Relaxed); } } + maybe_record_access_map(om, access_map); // Remove from `in_progress` // in_progress.remove(&cache_key); in_progress.remove(&cache_key); @@ -446,3 +495,53 @@ fn build_cache_key( let capture0 = om.captures.captures.get(0).map_or(String::new(), |c| c.raw_value().to_string()); format!("{}|{}|{}", om.rule.name(), capture0, dep_vars_str) } + +fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapCollector>) { + let collector = match collector { + Some(c) if om.validation_success => c, + _ => return, + }; + + let captures = utils::process_captures(&om.captures); + + match om.rule.syntax().validation { + Some(Validation::AWS) => { + let secret = captures + .iter() + .find(|(name, ..)| name == "TOKEN") + .map(|(_, value, ..)| value.clone()) + .unwrap_or_default(); + + let mut akid = utils::find_closest_variable(&captures, &secret, "TOKEN", "AKID") + .unwrap_or_default(); + + if akid.is_empty() { + akid = extract_akid_from_body(&om.validation_response_body).unwrap_or_default(); + } + + if !akid.is_empty() && !secret.is_empty() { + collector.record_aws(&akid, &secret); + } + } + Some(Validation::GCP) => { + if let Some((_, value, ..)) = captures.iter().find(|(name, ..)| name == "TOKEN") { + if !value.is_empty() { + collector.record_gcp(value); + } + } + } + _ => {} + } +} + +fn extract_akid_from_body(body: &validation_body::ValidationResponseBody) -> Option { + static AKID_RE: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { + regex::Regex::new( + r"(?xi)\b(?:A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[0-9A-Z]{16}\b", + ) + .expect("valid regex") + }); + + let text = validation_body::clone_as_string(body); + AKID_RE.find(&text).map(|m| m.as_str().to_string()) +} diff --git a/src/update.rs b/src/update.rs index 615733f..62c0043 100644 --- a/src/update.rs +++ b/src/update.rs @@ -20,6 +20,9 @@ use std::io::{ErrorKind, Write}; use self_update::{backends::github::Update, cargo_crate_version, errors::Error as UpdError}; use semver::Version; use tracing::error; +use tracing::warn; + +use tokio::task; use crate::{cli::global::GlobalArgs, reporter::styles::Styles}; @@ -256,3 +259,21 @@ pub fn check_for_update(global_args: &GlobalArgs, base_url: Option<&str>) -> Upd check_status: UpdateCheckStatus::Ok, } } + +/// Run the update check on a blocking thread so it can safely be invoked from async +/// contexts without creating nested Tokio runtimes. +pub async fn check_for_update_async( + global_args: &GlobalArgs, + base_url: Option<&str>, +) -> UpdateStatus { + let args = global_args.clone(); + let base = base_url.map(str::to_owned); + + match task::spawn_blocking(move || check_for_update(&args, base.as_deref())).await { + Ok(status) => status, + Err(err) => { + warn!("Update check task cancelled: {err}"); + UpdateStatus::default() + } + } +} diff --git a/src/validation.rs b/src/validation.rs index 8ffcd66..423aec3 100644 --- a/src/validation.rs +++ b/src/validation.rs @@ -22,12 +22,13 @@ use crate::{ location::OffsetSpan, matcher::{OwnedBlobMatch, SerializableCaptures}, rules::rule::Validation, + validation_body::{self, ValidationResponseBody}, }; mod aws; mod azure; mod coinbase; -mod gcp; +pub mod gcp; mod httpvalidation; mod jdbc; mod jwt; @@ -36,7 +37,7 @@ mod mysql; mod postgres; pub use mysql::validate_mysql; pub use postgres::validate_postgres; -mod utils; +pub mod utils; const VALIDATION_CACHE_SECONDS: u64 = 1200; // 20 minutes const MAX_VALIDATION_BODY_LEN: usize = 2048; @@ -137,14 +138,14 @@ pub fn is_parseable_mysql_uri(uri: &str) -> bool { #[derive(Clone)] pub struct CachedResponse { - pub body: String, + pub body: ValidationResponseBody, pub status: StatusCode, pub is_valid: bool, pub timestamp: Instant, } impl CachedResponse { - pub fn new(body: String, status: StatusCode, is_valid: bool) -> Self { + pub fn new(body: ValidationResponseBody, status: StatusCode, is_valid: bool) -> Self { Self { body, status, is_valid, timestamp: Instant::now() } } @@ -268,7 +269,8 @@ pub async fn validate_single_match( if timeout_result.is_err() { m.validation_success = false; - m.validation_response_body = "Validation timed out after 60 seconds".to_string(); + m.validation_response_body = + validation_body::from_string("Validation timed out after 60 seconds"); m.validation_response_status = StatusCode::REQUEST_TIMEOUT; } } @@ -329,8 +331,10 @@ async fn timed_validate_single_match<'a>( if let Some(missing) = missing_dependencies.get(&m.rule.syntax().id) { if !missing.is_empty() { m.validation_success = false; - m.validation_response_body = - format!("Validation skipped - missing dependent rules: {}", missing.join(", ")); + m.validation_response_body = validation_body::from_string(format!( + "Validation skipped - missing dependent rules: {}", + missing.join(", ") + )); m.validation_response_status = StatusCode::PRECONDITION_REQUIRED; commit_and_return(m); return; @@ -343,7 +347,8 @@ async fn timed_validate_single_match<'a>( Ok(_) => utils::process_captures(&m.captures), Err(e) => { m.validation_success = false; - m.validation_response_body = format!("Regex error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("Regex error: {}", e)); m.validation_response_status = StatusCode::INTERNAL_SERVER_ERROR; commit_and_return(m); return; @@ -390,7 +395,7 @@ async fn timed_validate_single_match<'a>( Ok(u) => u, Err(e) => { m.validation_success = false; - m.validation_response_body = e; + m.validation_response_body = validation_body::from_string(e); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -410,7 +415,7 @@ async fn timed_validate_single_match<'a>( Ok(rb) => rb, Err(e) => { m.validation_success = false; - m.validation_response_body = e; + m.validation_response_body = validation_body::from_string(e); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -559,7 +564,10 @@ async fn timed_validate_single_match<'a>( Ok(b) => b, Err(e) => { m.validation_success = false; - m.validation_response_body = format!("Error reading response: {}", e); + m.validation_response_body = validation_body::from_string(format!( + "Error reading response: {}", + e + )); m.validation_response_status = StatusCode::BAD_GATEWAY; commit_and_return(m); return; @@ -568,7 +576,8 @@ async fn timed_validate_single_match<'a>( truncate_to_char_boundary(&mut body, MAX_VALIDATION_BODY_LEN); m.validation_response_status = status; - m.validation_response_body = body.clone(); + let body_opt = validation_body::from_string(body.clone()); + m.validation_response_body = body_opt.clone(); let matchers = http_validation .request .response_matcher @@ -587,7 +596,7 @@ async fn timed_validate_single_match<'a>( cache.insert( cache_key, CachedResponse { - body, + body: body_opt, status, is_valid: m.validation_success, timestamp: Instant::now(), @@ -597,7 +606,8 @@ async fn timed_validate_single_match<'a>( } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("HTTP error: {:?}", e); + m.validation_response_body = + validation_body::from_string(format!("HTTP error: {:?}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -613,7 +623,8 @@ async fn timed_validate_single_match<'a>( if uri.is_empty() { m.validation_success = false; - m.validation_response_body = "MongoDB URI not found.".to_string(); + m.validation_response_body = + validation_body::from_string("MongoDB URI not found.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -634,13 +645,14 @@ async fn timed_validate_single_match<'a>( match mongodb::validate_mongodb(&uri).await { Ok((ok, msg)) => { m.validation_success = ok; - m.validation_response_body = msg; + m.validation_response_body = validation_body::from_string(msg); m.validation_response_status = if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("MongoDB validation error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("MongoDB validation error: {}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -656,7 +668,8 @@ async fn timed_validate_single_match<'a>( if mysql_url.is_empty() { m.validation_success = false; - m.validation_response_body = "MySQL URL not found.".to_string(); + m.validation_response_body = + validation_body::from_string("MySQL URL not found.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -677,17 +690,18 @@ async fn timed_validate_single_match<'a>( match mysql::validate_mysql(&mysql_url).await { Ok((ok, meta)) => { m.validation_success = ok; - m.validation_response_body = if ok { + m.validation_response_body = validation_body::from_string(if ok { format!("MySQL connection is valid. Metadata: {:?}", meta) } else { "MySQL connection failed.".to_string() - }; + }); m.validation_response_status = if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("MySQL error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("MySQL error: {}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -716,7 +730,9 @@ async fn timed_validate_single_match<'a>( if storage_account.is_empty() || storage_key.is_empty() { m.validation_success = false; - m.validation_response_body = "Missing Azure Storage account or key.".to_string(); + m.validation_response_body = validation_body::from_string( + "Missing Azure Storage account or key.".to_string(), + ); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -748,7 +764,8 @@ async fn timed_validate_single_match<'a>( } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("Azure Storage error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("Azure Storage error: {}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -773,7 +790,8 @@ async fn timed_validate_single_match<'a>( if jdbc_conn.is_empty() { m.validation_success = false; - m.validation_response_body = "JDBC connection string not found.".to_string(); + m.validation_response_body = + validation_body::from_string("JDBC connection string not found.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -794,12 +812,13 @@ async fn timed_validate_single_match<'a>( match jdbc::validate_jdbc(&jdbc_conn).await { Ok(outcome) => { m.validation_success = outcome.valid; - m.validation_response_body = outcome.message; + m.validation_response_body = validation_body::from_string(outcome.message); m.validation_response_status = outcome.status; } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("JDBC validation error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("JDBC validation error: {}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -825,7 +844,8 @@ async fn timed_validate_single_match<'a>( if pg_url.is_empty() { m.validation_success = false; - m.validation_response_body = "Postgres URL not found.".to_string(); + m.validation_response_body = + validation_body::from_string("Postgres URL not found.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -846,17 +866,18 @@ async fn timed_validate_single_match<'a>( match postgres::validate_postgres(&pg_url).await { Ok((ok, meta)) => { m.validation_success = ok; - m.validation_response_body = if ok { + m.validation_response_body = validation_body::from_string(if ok { format!("Postgres connection is valid. Metadata: {:?}", meta) } else { "Postgres connection failed.".to_string() - }; + }); m.validation_response_status = if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("Postgres error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("Postgres error: {}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -880,7 +901,8 @@ async fn timed_validate_single_match<'a>( if token.is_empty() { m.validation_success = false; - m.validation_response_body = "JWT token not found.".to_string(); + m.validation_response_body = + validation_body::from_string("JWT token not found.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -889,13 +911,14 @@ async fn timed_validate_single_match<'a>( match jwt::validate_jwt(&token).await { Ok((ok, msg)) => { m.validation_success = ok; - m.validation_response_body = msg; + m.validation_response_body = validation_body::from_string(msg); m.validation_response_status = if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("JWT validation error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("JWT validation error: {}", e)); m.validation_response_status = StatusCode::BAD_REQUEST; } } @@ -912,7 +935,9 @@ async fn timed_validate_single_match<'a>( if akid.is_empty() || secret.is_empty() { m.validation_success = false; - m.validation_response_body = "Missing AWS access-key ID or secret.".to_string(); + m.validation_response_body = validation_body::from_string( + "Missing AWS access-key ID or secret.".to_string(), + ); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -932,10 +957,10 @@ async fn timed_validate_single_match<'a>( if let Some(account_id) = aws::should_skip_aws_validation(&akid) { m.validation_success = false; - m.validation_response_body = format!( + m.validation_response_body = validation_body::from_string(format!( "(skip list entry) AWS validation not attempted for account {}.", account_id - ); + )); m.validation_response_status = StatusCode::CONTINUE; cache.insert( cache_key, @@ -952,7 +977,10 @@ async fn timed_validate_single_match<'a>( if let Err(e) = aws::validate_aws_credentials_input(&akid, &secret) { m.validation_success = false; - m.validation_response_body = format!("Invalid AWS credentials ({}): {}", akid, e); + m.validation_response_body = validation_body::from_string(format!( + "Invalid AWS credentials ({}): {}", + akid, e + )); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -962,15 +990,17 @@ async fn timed_validate_single_match<'a>( Ok((ok, msg)) => { m.validation_success = ok; if ok { - m.validation_response_body = format!("{} --- ARN: {}", akid, msg); - m.validation_response_status = StatusCode::OK; + let mut body = format!("{} --- ARN: {}", akid, msg); if let Ok(acct) = aws::aws_key_to_account_number(&akid) { - m.validation_response_body - .push_str(&format!(" --- AWS Account Number: {:012}", acct)); + body.push_str(&format!(" --- AWS Account Number: {:012}", acct)); } + m.validation_response_body = validation_body::from_string(body); + m.validation_response_status = StatusCode::OK; } else { - m.validation_response_body = - format!("AWS validation error ({}): {}", akid, msg); + m.validation_response_body = validation_body::from_string(format!( + "AWS validation error ({}): {}", + akid, msg + )); m.validation_response_status = StatusCode::UNAUTHORIZED; } cache.insert( @@ -985,7 +1015,10 @@ async fn timed_validate_single_match<'a>( } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("AWS validation error ({}): {}", akid, e); + m.validation_response_body = validation_body::from_string(format!( + "AWS validation error ({}): {}", + akid, e + )); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -1001,7 +1034,8 @@ async fn timed_validate_single_match<'a>( if gcp_json.is_empty() { m.validation_success = false; - m.validation_response_body = "GCP JSON not found.".to_string(); + m.validation_response_body = + validation_body::from_string("GCP JSON not found.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -1024,20 +1058,27 @@ async fn timed_validate_single_match<'a>( match validator.validate_gcp_credentials(&gcp_json.as_bytes()).await { Ok((ok, meta)) => { m.validation_success = ok; - m.validation_response_body = meta.join("\n"); + m.validation_response_body = + validation_body::from_string(meta.join("\n")); m.validation_response_status = if ok { StatusCode::OK } else { StatusCode::UNAUTHORIZED }; } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("GCP validation error: {}", e); + m.validation_response_body = validation_body::from_string(format!( + "GCP validation error: {}", + e + )); m.validation_response_status = StatusCode::BAD_GATEWAY; } } } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("Failed to create GCP validator: {}", e); + m.validation_response_body = validation_body::from_string(format!( + "Failed to create GCP validator: {}", + e + )); m.validation_response_status = StatusCode::INTERNAL_SERVER_ERROR; } } @@ -1066,7 +1107,8 @@ async fn timed_validate_single_match<'a>( if cred_name.is_empty() || private_key.is_empty() { m.validation_success = false; - m.validation_response_body = "Missing key name or private key.".to_string(); + m.validation_response_body = + validation_body::from_string("Missing key name or private key.".to_string()); m.validation_response_status = StatusCode::BAD_REQUEST; commit_and_return(m); return; @@ -1083,7 +1125,8 @@ async fn timed_validate_single_match<'a>( } Err(e) => { m.validation_success = false; - m.validation_response_body = format!("Coinbase validation error: {}", e); + m.validation_response_body = + validation_body::from_string(format!("Coinbase validation error: {}", e)); m.validation_response_status = StatusCode::BAD_GATEWAY; } } @@ -1092,7 +1135,8 @@ async fn timed_validate_single_match<'a>( Some(Validation::Raw(raw)) => { debug!("Raw validation not implemented: {}", raw); m.validation_success = false; - m.validation_response_body = "Validator not implemented".to_string(); + m.validation_response_body = + validation_body::from_string("Validator not implemented".to_string()); m.validation_response_status = StatusCode::NOT_IMPLEMENTED; } None => { /* no validation specified */ } diff --git a/src/validation/azure.rs b/src/validation/azure.rs index fe8d4ff..915ee6d 100644 --- a/src/validation/azure.rs +++ b/src/validation/azure.rs @@ -10,7 +10,10 @@ use reqwest::{header::HeaderValue, Client}; use serde_json::Value as JsonValue; use sha2::Sha256; -use crate::validation::{Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; +use crate::{ + validation::{Cache, CachedResponse, ValidationResponseBody, VALIDATION_CACHE_SECONDS}, + validation_body, +}; pub fn generate_azure_cache_key(azure_json: &str) -> String { use sha1::{Digest, Sha1}; @@ -23,7 +26,7 @@ pub fn generate_azure_cache_key(azure_json: &str) -> String { pub async fn validate_azure_storage_credentials( azure_json: &str, cache: &Cache, -) -> Result<(bool, String)> { +) -> Result<(bool, ValidationResponseBody)> { let cache_key = generate_azure_cache_key(azure_json); /* ── short-circuit cached result ───────────────────────────── */ @@ -39,7 +42,8 @@ pub async fn validate_azure_storage_credentials( let storage_account = tok["storage_account"].as_str().unwrap_or(""); let storage_key = tok["storage_key"].as_str().unwrap_or(""); if storage_account.is_empty() || storage_key.is_empty() { - let msg = "Missing storage_account or storage_key".to_string(); + let msg = + validation_body::from_string("Missing storage_account or storage_key".to_string()); cache.insert(cache_key, CachedResponse::new(msg.clone(), StatusCode::BAD_REQUEST, false)); return Ok((false, msg)); } @@ -86,7 +90,8 @@ pub async fn validate_azure_storage_credentials( if !status.is_success() { let body = format!("Azure Storage validation failed (HTTP {}): {body_txt}", status); - cache.insert(cache_key, CachedResponse::new(body.clone(), status, false)); + let body_opt = validation_body::from_string(body.clone()); + cache.insert(cache_key, CachedResponse::new(body_opt, status, false)); return Err(anyhow!(body)); } @@ -111,6 +116,7 @@ pub async fn validate_azure_storage_credentials( /* ── success ─────────────────────────────────────────────── */ let body = format!("Account: {}; Containers: {:?}", storage_account, names); - cache.insert(cache_key, CachedResponse::new(body.clone(), StatusCode::OK, true)); - Ok((true, body)) + let body_opt = validation_body::from_string(body); + cache.insert(cache_key, CachedResponse::new(body_opt.clone(), StatusCode::OK, true)); + Ok((true, body_opt)) } diff --git a/src/validation/coinbase.rs b/src/validation/coinbase.rs index 926033d..33dc6e8 100644 --- a/src/validation/coinbase.rs +++ b/src/validation/coinbase.rs @@ -15,7 +15,12 @@ use rand::TryRngCore; use reqwest::{Client, StatusCode, Url}; use sha1::{Digest, Sha1}; -use crate::validation::{httpvalidation, Cache, CachedResponse, VALIDATION_CACHE_SECONDS}; +use crate::{ + validation::{ + httpvalidation, Cache, CachedResponse, ValidationResponseBody, VALIDATION_CACHE_SECONDS, + }, + validation_body, +}; pub fn generate_coinbase_cache_key(cred_name: &str, private_key: &str) -> String { let mut h = Sha1::new(); @@ -31,7 +36,7 @@ pub async fn validate_cdp_api_key( client: &Client, parser: &liquid::Parser, cache: &Cache, -) -> Result<(bool, String)> { +) -> Result<(bool, ValidationResponseBody)> { let cache_key = generate_coinbase_cache_key(cred_name, private_key_pem); if let Some(entry) = cache.get(&cache_key) { let c = entry.value(); @@ -61,7 +66,7 @@ pub async fn validate_cdp_api_key( let status = resp.status(); let body = resp.text().await.unwrap_or_default(); let ok = status == StatusCode::OK; - let msg = body; + let msg = validation_body::from_string(body); cache.insert(cache_key.clone(), CachedResponse::new(msg.clone(), status, ok)); diff --git a/src/validation/gcp.rs b/src/validation/gcp.rs index 87499dc..44fcaf4 100644 --- a/src/validation/gcp.rs +++ b/src/validation/gcp.rs @@ -19,10 +19,66 @@ pub struct GcpValidator { client: Client, } +/// Context returned after exchanging a service account key for an access token. +#[derive(Debug, Clone)] +pub struct GcpTokenContext { + pub access_token: String, + pub project_id: String, + pub client_email: String, +} + impl GcpValidator { pub fn global() -> Result<&'static Self> { GLOBAL_VALIDATOR.get_or_try_init(Self::new) } + + /// Retrieve a reference to the underlying HTTP client. + pub fn client(&self) -> &Client { + &self.client + } + + /// Given a service account key JSON blob, mint an OAuth2 access token and return + /// the token alongside basic identity details. + pub async fn get_access_token_from_sa_json(&self, gcp_json: &str) -> Result { + let _permit = self.semaphore.acquire().await?; + let token_info: JsonValue = serde_json::from_str(gcp_json)?; + + // Extract required fields. + let project_id = token_info["project_id"].as_str().unwrap_or("").to_string(); + let client_email = token_info["client_email"].as_str().unwrap_or("").to_string(); + let private_key = token_info["private_key"].as_str().unwrap_or("").to_string(); + let token_uri = token_info["token_uri"].as_str().unwrap_or("").to_string(); + + if project_id.is_empty() + || client_email.is_empty() + || private_key.is_empty() + || token_uri.is_empty() + { + return Err(anyhow!( + "Missing required GCP fields: project_id/client_email/private_key/token_uri" + )); + } + + let jwt = self.create_jwt(&client_email, &private_key, &token_uri)?; + let response = self + .client + .post(&token_uri) + .form(&[ + ("grant_type", "urn:ietf:params:oauth:grant-type:jwt-bearer"), + ("assertion", &jwt), + ]) + .send() + .await? + .error_for_status()?; + + let json: JsonValue = response.json().await?; + let access_token = json["access_token"] + .as_str() + .ok_or_else(|| anyhow!("Missing access_token in GCP response"))? + .to_string(); + + Ok(GcpTokenContext { access_token, project_id, client_email }) + } } /// Generate a standardized cache key for GCP validation attempts. @@ -48,54 +104,22 @@ impl GcpValidator { } pub async fn validate_gcp_credentials(&self, gcp_json: &[u8]) -> Result<(bool, Vec)> { - let _permit = self.semaphore.acquire().await?; let gcp_json_str = String::from_utf8_lossy(gcp_json); - let token_info: JsonValue = serde_json::from_str(&gcp_json_str)?; + let ctx = match self.get_access_token_from_sa_json(&gcp_json_str).await { + Ok(ctx) => ctx, + Err(err) => { + debug!("Missing required GCP fields: {err}"); + return Ok((false, vec![])); + } + }; - // Extract required fields. - let project_id = token_info["project_id"].as_str().unwrap_or(""); - let client_email = token_info["client_email"].as_str().unwrap_or(""); - let private_key = token_info["private_key"].as_str().unwrap_or(""); - let token_uri = token_info["token_uri"].as_str().unwrap_or(""); - if project_id.is_empty() - || client_email.is_empty() - || private_key.is_empty() - || token_uri.is_empty() - { - debug!( - "Missing required GCP fields: project_id='{}', client_email='{}', private_key present={}, token_uri='{}'", - project_id, - client_email, - !private_key.is_empty(), - token_uri - ); - return Ok((false, vec![])); - } + let metadata = vec![ + "GCP Credential Type == service_account".to_string(), + format!("GCP Project ID == {}", ctx.project_id), + format!("GCP Client Email == {}", ctx.client_email), + ]; - // Generate JWT - let jwt = self.create_jwt(client_email, private_key, token_uri)?; - - // Request an access token - // let client = Client::new(); - let response = self - .client - .post(token_uri) - .form(&[ - ("grant_type", "urn:ietf:params:oauth:grant-type:jwt-bearer"), - ("assertion", &jwt), - ]) - .send() - .await?; - if response.status().is_success() { - let metadata = vec![ - "GCP Credential Type == service_account".to_string(), - format!("GCP Project ID == {}", project_id), - format!("GCP Client Email == {}", client_email), - ]; - Ok((true, metadata)) - } else { - Err(anyhow!("Failed to validate GCP credentials")) - } + Ok((true, metadata)) } fn create_jwt( diff --git a/src/validation/utils.rs b/src/validation/utils.rs index e15c7e2..1031436 100644 --- a/src/validation/utils.rs +++ b/src/validation/utils.rs @@ -156,7 +156,7 @@ mod tests { }, SerializableCapture { // This is group 2 (named "foo") - name: Some("foo".to_string()), + name: Some("foo"), match_number: 2, // Corrected match_number start: 1, end: 4, @@ -189,7 +189,7 @@ mod tests { // We only get the explicit captures ("foo" and group 2). SerializableCapture { // This is group 1 (named "foo") - name: Some("foo".to_string()), + name: Some("foo"), match_number: 1, // Corrected match_number start: 0, end: 2, diff --git a/src/validation_body.rs b/src/validation_body.rs new file mode 100644 index 0000000..96ce95f --- /dev/null +++ b/src/validation_body.rs @@ -0,0 +1,46 @@ +use schemars::{gen::SchemaGenerator, schema::Schema, JsonSchema}; +use serde::{Deserialize, Deserializer, Serializer}; +use std::borrow::Cow; + +/// Storage for validation response payloads. `None` avoids heap allocation when validation is +/// disabled or produces no body. +pub type ValidationResponseBody = Option>; + +#[inline] +pub fn from_string(body: impl Into) -> ValidationResponseBody { + let body = body.into(); + if body.is_empty() { + None + } else { + Some(body.into_boxed_str()) + } +} + +#[inline] +pub fn as_str(body: &ValidationResponseBody) -> &str { + body.as_deref().unwrap_or("") +} + +#[inline] +pub fn clone_as_string(body: &ValidationResponseBody) -> String { + as_str(body).to_string() +} + +pub fn serialize(body: &ValidationResponseBody, serializer: S) -> Result +where + S: Serializer, +{ + serializer.serialize_str(as_str(body)) +} + +pub fn deserialize<'de, D>(deserializer: D) -> Result +where + D: Deserializer<'de>, +{ + let body: Cow<'de, str> = Deserialize::deserialize(deserializer)?; + Ok(from_string(body)) +} + +pub fn schema(gen: &mut SchemaGenerator) -> Schema { + String::json_schema(gen) +} diff --git a/tests/fingerprint_dedup.rs b/tests/fingerprint_dedup.rs index 7a4c701..3186a48 100644 --- a/tests/fingerprint_dedup.rs +++ b/tests/fingerprint_dedup.rs @@ -37,13 +37,13 @@ fn make_match(fp: u64, rule_id: &str) -> Match { }; let rule = Arc::new(Rule::new(syntax)); Match { - location: Location { - offset_span: OffsetSpan { start: 0, end: 10 }, - source_span: SourceSpan { + location: Location::with_source_span( + OffsetSpan { start: 0, end: 10 }, + Some(SourceSpan { start: SourcePoint { line: 1, column: 0 }, end: SourcePoint { line: 1, column: 10 }, - }, - }, + }), + ), groups: SerializableCaptures { captures: smallvec![SerializableCapture { name: None, @@ -56,7 +56,7 @@ fn make_match(fp: u64, rule_id: &str) -> Match { blob_id: BlobId::new(b"dummy"), finding_fingerprint: fp, rule, - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: 0, validation_success: false, calculated_entropy: 0.0, @@ -126,7 +126,7 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: 0, validation_success: false, }, @@ -142,7 +142,7 @@ fn reporter_deduplicates_across_git_commits() -> Result<()> { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: 0, validation_success: false, }, @@ -184,7 +184,7 @@ fn dedup_preserves_distinct_rules_with_same_fingerprint() -> Result<()> { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: 0, validation_success: false, }, @@ -200,7 +200,7 @@ fn dedup_preserves_distinct_rules_with_same_fingerprint() -> Result<()> { comment: None, match_confidence: Confidence::Medium, visible: true, - validation_response_body: String::new(), + validation_response_body: None, validation_response_status: 0, validation_success: false, }, diff --git a/tests/int_allowlist.rs b/tests/int_allowlist.rs index ca813c7..be83f57 100644 --- a/tests/int_allowlist.rs +++ b/tests/int_allowlist.rs @@ -134,6 +134,8 @@ fn run_skiplist(skip_regex: Vec, skip_skipword: Vec) -> Result Result<()> { }, confidence: ConfidenceLevel::Medium, no_validate: false, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: None, diff --git a/tests/int_dedup.rs b/tests/int_dedup.rs index 9607ca1..99e60d2 100644 --- a/tests/int_dedup.rs +++ b/tests/int_dedup.rs @@ -153,6 +153,8 @@ rules: }, confidence: ConfidenceLevel::Low, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), diff --git a/tests/int_github.rs b/tests/int_github.rs index 71e2d3a..25ec8a1 100644 --- a/tests/int_github.rs +++ b/tests/int_github.rs @@ -140,6 +140,8 @@ fn test_github_remote_scan() -> Result<()> { }, confidence: ConfidenceLevel::Medium, no_validate: false, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: None, diff --git a/tests/int_gitlab.rs b/tests/int_gitlab.rs index f850a83..02a6fe5 100644 --- a/tests/int_gitlab.rs +++ b/tests/int_gitlab.rs @@ -139,6 +139,8 @@ fn test_gitlab_remote_scan() -> Result<()> { }, confidence: ConfidenceLevel::Medium, no_validate: false, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: None, @@ -291,6 +293,8 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> { }, confidence: ConfidenceLevel::Medium, no_validate: false, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: None, diff --git a/tests/int_local_path_validation.rs b/tests/int_local_path_validation.rs new file mode 100644 index 0000000..6287745 --- /dev/null +++ b/tests/int_local_path_validation.rs @@ -0,0 +1,32 @@ +use std::time::Duration; + +use anyhow::Result; +use assert_cmd::Command; +use tempfile::tempdir; + +#[test] +fn scan_local_path_finishes_without_repo_inputs() -> Result<()> { + let dir = tempdir()?; + let file_path = dir.path().join("sample.txt"); + std::fs::write(&file_path, "hello world")?; + + let mut cmd = Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")); + cmd.args([ + "scan", + file_path.to_str().expect("temp path is valid UTF-8"), + "--no-update-check", + "--format", + "json", + "--only-valid", + ]); + // .timeout(Duration::from_secs(40)); + + let output = cmd.output()?; + if !output.status.success() { + eprintln!("stdout: {}", String::from_utf8_lossy(&output.stdout)); + eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr)); + } + assert!(output.status.success()); + + Ok(()) +} diff --git a/tests/int_redact.rs b/tests/int_redact.rs index 6da5516..5aefb26 100644 --- a/tests/int_redact.rs +++ b/tests/int_redact.rs @@ -116,6 +116,8 @@ async fn test_redact_hashes_finding_values() -> Result<()> { }, confidence: ConfidenceLevel::Low, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), diff --git a/tests/int_slack.rs b/tests/int_slack.rs index b391c07..40f2d80 100644 --- a/tests/int_slack.rs +++ b/tests/int_slack.rs @@ -125,6 +125,8 @@ impl TestContext { }, confidence: ConfidenceLevel::Low, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), @@ -264,6 +266,8 @@ async fn test_scan_slack_messages() -> Result<()> { }, confidence: ConfidenceLevel::Low, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), diff --git a/tests/int_validation_cache.rs b/tests/int_validation_cache.rs index c509a7d..7cb7416 100644 --- a/tests/int_validation_cache.rs +++ b/tests/int_validation_cache.rs @@ -196,6 +196,8 @@ async fn test_validation_cache_and_depvars() -> Result<()> { }, confidence: ConfidenceLevel::Low, no_validate: false, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), diff --git a/tests/int_vulnerable_files.rs b/tests/int_vulnerable_files.rs index 5d67b43..86e5186 100644 --- a/tests/int_vulnerable_files.rs +++ b/tests/int_vulnerable_files.rs @@ -139,6 +139,8 @@ impl TestContext { }, confidence: ConfidenceLevel::Low, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), @@ -268,6 +270,8 @@ impl TestContext { }, confidence: ConfidenceLevel::Low, no_validate: true, + access_map: false, + access_map_html: None, rule_stats: false, only_valid: false, min_entropy: Some(0.0), diff --git a/vendor/vectorscan-rs/vectorscan-rs/src/native.rs b/vendor/vectorscan-rs/vectorscan-rs/src/native.rs index bf75b4b..32c1c86 100644 --- a/vendor/vectorscan-rs/vectorscan-rs/src/native.rs +++ b/vendor/vectorscan-rs/vectorscan-rs/src/native.rs @@ -35,7 +35,7 @@ impl BlockDatabase { } /// Create a new scanner from this database - pub fn create_scanner(&self) -> Result { + pub fn create_scanner(&self) -> Result, Error> { BlockScanner::new(self) } @@ -126,7 +126,7 @@ impl StreamingDatabase { } /// Create a new scanner from this database - pub fn create_scanner(&self) -> Result { + pub fn create_scanner(&self) -> Result, Error> { StreamingScanner::new(self) }