diff --git a/CHANGELOG.md b/CHANGELOG.md index 1fc6900..f58095f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,10 @@ All notable changes to this project will be documented in this file. - Added/updated `pipedrive` and `amplitude` rules - Access Map: added Buildkite provider. Enumerates token scopes, user identity, organizations, and pipelines with severity classification based on scope risk. - Access Map: added Harness provider. Uses `x-api-key` authentication to enumerate organizations/projects when permitted (best-effort). -- Access Map CLI: added providers `buildkite`, `harness`. +- Access Map: added OpenAI provider. Supports standalone `access-map openai` and automatic mapping for validated `kingfisher.openai.*` findings. +- Access Map: added Anthropic provider. Supports standalone `access-map anthropic` and automatic mapping for validated `kingfisher.anthropic.*` findings. +- Access Map: added Salesforce provider. Supports standalone `access-map salesforce` (token + instance) and automatic mapping for validated `kingfisher.salesforce.*` findings. +- Access Map CLI: added providers `buildkite`, `harness`, `openai`, `anthropic`, `salesforce`. - Reports: omit `validate`/`revoke` command hints when required template vars are missing (prevents suggesting unrunnable commands, e.g. Harness `ACCOUNTIDENTIFIER`). - Access Map GCP: added resource enumeration for Cloud KMS key rings, Cloud Functions, Firestore databases, Cloud Spanner instances, and project service accounts. - Access Map GCP: populated `token_details` with service account metadata (display name, unique ID, disabled status). diff --git a/crates/kingfisher-rules/data/rules/salesforce.yml b/crates/kingfisher-rules/data/rules/salesforce.yml index 3da3a6a..4ddd3e2 100644 --- a/crates/kingfisher-rules/data/rules/salesforce.yml +++ b/crates/kingfisher-rules/data/rules/salesforce.yml @@ -1,5 +1,5 @@ rules: - - name: Salesforce Access / Refresh Token + - name: Salesforce Access Token id: kingfisher.salesforce.1 pattern: | (?xi) @@ -8,7 +8,7 @@ rules: 00 [A-Z0-9]{13} ! - [A-Z0-9._-]{90,120} + [A-Z0-9._-]{80,260} ) pattern_requirements: min_digits: 6 @@ -253,5 +253,65 @@ rules: true https://api.example.net/oauth/token + references: + - https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_oauth_and_connected_apps.htm + + - name: Salesforce Refresh Token + id: kingfisher.salesforce.6 + pattern: | + (?xi)(?s) + (?:salesforce|sforce|login\.salesforce\.com|test\.salesforce\.com|my\.salesforce\.com) + (?:.|[\n\r]){0,256}? + \brefresh(?:_|[\s-])token\b + (?:.|[\n\r]){0,24}? + (?: + [:=] + | + ["']\s*:\s*["'] + ) + \s* + ( + 5A[A-Z0-9._~-]{40,510} + ) + (?: + \b + | + ["'] + ) + pattern_requirements: + min_digits: 4 + min_entropy: 3.5 + confidence: medium + examples: + - | + { + "instance_url": "https://mydomain.my.salesforce.com", + "refresh_token": "5Aep861vGfRt9a8nT3qgV7wU1rYp3kL2mN8dQ6zX4cB7jH9sT1vW2xY3zA4bC5dE6fG7hI8jK9mN0pQ1rS2tU3vW4xY5z" + } + - | + salesforce: + token_endpoint: https://login.salesforce.com/services/oauth2/token + refresh_token: 5AefmTn2q8JdV4pP7xR1wY5zC9kL3mN6qS0uV2xY8bD1fG4hJ7kM9nQ2rT5vW8yZ1aC3eF6gH9jK2mP5sR8uV1xY4 + references: + - https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_understanding_web_server_oauth_flow.htm + - https://help.salesforce.com/s/articleView?id=sf.remoteaccess_oauth_refresh_token_flow.htm&type=5 + + - name: Salesforce Connected App Consumer Key (Prefixed) + id: kingfisher.salesforce.7 + pattern: | + (?xi)(?s) + (?:salesforce|sforce|connected(?:_|[\s-])app|consumer(?:_|[\s-])key|client(?:_|[\s-])id) + (?:.|[\n\r]){0,128}? + \b + ( + 3MVG9[A-Z0-9._~-]{20,180} + ) + \b + pattern_requirements: + min_digits: 4 + min_entropy: 3.6 + confidence: medium + examples: + - 3MVG9P8aWj9n4kT2xQ5mV7rY1bC3dF6gH8jK0mN2pR4tU6wX8zA1cE3gH5kM7qS9uV2xY4bD6fJ8nP1rT3vW5yZ7 references: - https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/intro_oauth_and_connected_apps.htm \ No newline at end of file diff --git a/docs/ACCESS_MAP.md b/docs/ACCESS_MAP.md index b0db889..cb06c5f 100644 --- a/docs/ACCESS_MAP.md +++ b/docs/ACCESS_MAP.md @@ -260,8 +260,64 @@ kingfisher access-map harness ./harness.token --json-out harness.access-map.json - Access map uses `https://app.harness.io` as the API base. +### OpenAI (`openai`) + +- **Credential**: a single OpenAI API key string (read from a file for `kingfisher access-map openai `). +- **Token types supported**: OpenAI keys accepted by `Authorization: Bearer ` (for example `sk-...`, `sk-proj-...`, `sk-svcacct-...`). + +Kingfisher performs read-only enumeration via: + +- `GET https://api.openai.com/v1/models` to list accessible models and infer organization ownership. +- `GET https://api.openai.com/v1/me` for token identity metadata when available. +- `GET https://api.openai.com/v1/organization/projects` for project visibility when the key has permission (best-effort). + +#### Standalone example (OpenAI) + +```bash +printf '%s' 'sk-example...' > ./openai.token +kingfisher access-map openai ./openai.token --json-out openai.access-map.json +``` + +#### Notes (OpenAI) + +- Access map uses `https://api.openai.com/v1` as the API base. + +### Salesforce (`salesforce`) + +- **Credential**: Salesforce access token plus instance domain. +- **Supported standalone formats** for `kingfisher access-map salesforce `: + - JSON: + - `token` (or `access_token`) + - `instance_url` (or `instance`), such as `https://mydomain.my.salesforce.com` + - Free-form text containing both: + - a Salesforce access token (`00...!...`) + - an instance host (`.my.salesforce.com`) + +Kingfisher performs read-only enumeration via: + +- `GET /services/data/v60.0/limits` to confirm API access and gather account-level API context. +- `GET /services/oauth2/userinfo` for identity metadata when available. +- `GET /services/data/v60.0/sobjects` for visible object metadata (best-effort). + +#### Standalone example (Salesforce) + +```bash +cat > ./salesforce.json <<'EOF' +{ + "token": "00DE0X0A0M0PeLE!AQcAQH0dMHEXAMPLE...", + "instance_url": "https://mydomain.my.salesforce.com" +} +EOF + +kingfisher access-map salesforce ./salesforce.json --json-out salesforce.access-map.json +``` + +#### Notes (Salesforce) + +- Access map currently targets `https://.my.salesforce.com` and API version `v60.0`. + ## Notes on access-map generation during `scan --access-map` - Access-map entries are only recorded for **validated** findings. - Some providers require extra context that Kingfisher infers from the finding context or validation response (for example, Azure DevOps organization name). -- Validated Hugging Face, Gitea, Bitbucket, and Buildkite credentials discovered during scans with `--access-map` are automatically collected and mapped, matching the existing behavior for other platforms. +- Validated Hugging Face, Gitea, Bitbucket, Buildkite, Harness, OpenAI, Anthropic, and Salesforce credentials discovered during scans with `--access-map` are automatically collected and mapped, matching the existing behavior for other platforms. diff --git a/src/access_map.rs b/src/access_map.rs index e6df880..a9f8621 100644 --- a/src/access_map.rs +++ b/src/access_map.rs @@ -4,6 +4,7 @@ use serde::Serialize; use crate::cli::commands::access_map::{AccessMapArgs, AccessMapProvider}; +mod anthropic; mod aws; mod azure; mod azure_devops; @@ -16,8 +17,10 @@ mod gitlab; mod harness; mod huggingface; pub(crate) mod mongodb; +mod openai; pub(crate) mod postgres; mod report; +mod salesforce; mod slack; /// Trait for access map providers that map a single token to an access profile. @@ -52,6 +55,9 @@ pub async fn run(args: AccessMapArgs) -> Result<()> { AccessMapProvider::Bitbucket => bitbucket::map_access(&args).await?, AccessMapProvider::Buildkite => buildkite::map_access(&args).await?, AccessMapProvider::Harness => harness::map_access(&args).await?, + AccessMapProvider::Openai => openai::map_access(&args).await?, + AccessMapProvider::Anthropic => anthropic::map_access(&args).await?, + AccessMapProvider::Salesforce => salesforce::map_access(&args).await?, }; let json = serde_json::to_string_pretty(&result)?; @@ -104,6 +110,12 @@ pub enum AccessMapRequest { Buildkite { token: String, fingerprint: String }, /// A Harness API token (x-api-key). Harness { token: String, fingerprint: String }, + /// An OpenAI API token. + OpenAI { token: String, fingerprint: String }, + /// An Anthropic API token. + Anthropic { token: String, fingerprint: String }, + /// A Salesforce access token plus instance domain. + Salesforce { token: String, instance: String, fingerprint: String }, } /// Structured output describing the resolved identity and its risk profile. @@ -304,6 +316,18 @@ pub async fn map_requests(requests: Vec) -> Vec { (map_token(&HarnessMapper, &token).await, fingerprint) } + AccessMapRequest::OpenAI { token, fingerprint } => { + (map_token(&OpenAiMapper, &token).await, fingerprint) + } + AccessMapRequest::Anthropic { token, fingerprint } => { + (map_token(&AnthropicMapper, &token).await, fingerprint) + } + AccessMapRequest::Salesforce { token, instance, fingerprint } => ( + salesforce::map_access_from_token_and_instance(&token, &instance) + .await + .unwrap_or_else(|err| build_failed_result("salesforce", "token", err)), + fingerprint, + ), }; mapped.fingerprint = Some(fp); @@ -435,6 +459,32 @@ impl TokenAccessMapper for HarnessMapper { } } +/// OpenAI access mapper. +pub struct OpenAiMapper; + +impl TokenAccessMapper for OpenAiMapper { + fn cloud_name(&self) -> &'static str { + "openai" + } + + async fn map_access_from_token(&self, token: &str) -> Result { + openai::map_access_from_token(token).await + } +} + +/// Anthropic access mapper. +pub struct AnthropicMapper; + +impl TokenAccessMapper for AnthropicMapper { + fn cloud_name(&self) -> &'static str { + "anthropic" + } + + async fn map_access_from_token(&self, token: &str) -> Result { + anthropic::map_access_from_token(token).await + } +} + // ------------------------------------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------------------------------------- diff --git a/src/access_map/anthropic.rs b/src/access_map/anthropic.rs new file mode 100644 index 0000000..f07bd2b --- /dev/null +++ b/src/access_map/anthropic.rs @@ -0,0 +1,180 @@ +use anyhow::{anyhow, Context, Result}; +use reqwest::{header, Client}; +use serde::Deserialize; +use tracing::warn; + +use crate::{cli::commands::access_map::AccessMapArgs, validation::GLOBAL_USER_AGENT}; + +use super::{ + build_recommendations, AccessMapResult, AccessSummary, AccessTokenDetails, PermissionSummary, + ResourceExposure, RoleBinding, Severity, +}; + +const ANTHROPIC_API: &str = "https://api.anthropic.com/v1"; +const ANTHROPIC_VERSION: &str = "2023-06-01"; +const MAX_MODEL_RESOURCES: usize = 50; + +#[derive(Debug, Deserialize, Default, Clone)] +struct AnthropicModelsResponse { + #[serde(default)] + data: Vec, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct AnthropicModel { + #[serde(default)] + id: Option, + #[serde(default)] + display_name: Option, +} + +pub async fn map_access(args: &AccessMapArgs) -> Result { + let token = if let Some(path) = args.credential_path.as_deref() { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read Anthropic token from {}", path.display()))?; + raw.trim().to_string() + } else { + return Err(anyhow!("Anthropic access-map requires a validated token from scan results")); + }; + + map_access_from_token(&token).await +} + +pub async fn map_access_from_token(token: &str) -> Result { + let client = Client::builder() + .user_agent(GLOBAL_USER_AGENT.as_str()) + .build() + .context("Failed to build Anthropic HTTP client")?; + + let mut risk_notes = Vec::new(); + let mut roles = Vec::new(); + let mut permissions = PermissionSummary::default(); + let mut resources = Vec::new(); + + let models = list_models(&client, token).await.unwrap_or_else(|err| { + warn!("Anthropic access-map: model enumeration failed: {err}"); + risk_notes.push(format!("Model enumeration failed: {err}")); + Vec::new() + }); + + let token_kind = detect_token_type(token); + roles.push(RoleBinding { + name: format!("token_type:{token_kind}"), + source: "anthropic".into(), + permissions: vec![format!("token:{token_kind}")], + }); + permissions.read_only.push("models:list".to_string()); + + for model in models.iter().take(MAX_MODEL_RESOURCES) { + let model_name = model + .id + .clone() + .or_else(|| model.display_name.clone()) + .unwrap_or_else(|| "unknown_model".to_string()); + resources.push(ResourceExposure { + resource_type: "model".into(), + name: model_name, + permissions: vec!["model:read".to_string()], + risk: severity_to_str(Severity::Low).to_string(), + reason: "Model accessible to this Anthropic key".to_string(), + }); + } + + if models.len() > MAX_MODEL_RESOURCES { + risk_notes.push(format!( + "Model resource list truncated to first {MAX_MODEL_RESOURCES} entries ({} total models visible)", + models.len() + )); + } + + if resources.is_empty() { + resources.push(ResourceExposure { + resource_type: "account".into(), + name: "anthropic_api_key".into(), + permissions: Vec::new(), + risk: severity_to_str(Severity::Low).to_string(), + reason: "Anthropic account associated with this API key".to_string(), + }); + risk_notes.push("No models were enumerable for this key".to_string()); + } + + permissions.read_only.sort(); + permissions.read_only.dedup(); + + let severity = Severity::Low; + + Ok(AccessMapResult { + cloud: "anthropic".into(), + identity: AccessSummary { + id: "anthropic_api_key".into(), + access_type: "token".into(), + project: None, + tenant: None, + account_id: None, + }, + roles, + permissions, + resources, + severity, + recommendations: build_recommendations(severity), + risk_notes, + token_details: Some(AccessTokenDetails { + name: None, + username: None, + account_type: Some("api_key".into()), + company: None, + location: None, + email: None, + url: Some("https://console.anthropic.com/settings/keys".into()), + token_type: Some(token_kind.to_string()), + created_at: None, + last_used_at: None, + expires_at: None, + user_id: None, + scopes: Vec::new(), + }), + provider_metadata: None, + fingerprint: None, + }) +} + +async fn list_models(client: &Client, token: &str) -> Result> { + let resp = client + .get(format!("{ANTHROPIC_API}/models")) + .header("x-api-key", token) + .header("anthropic-version", ANTHROPIC_VERSION) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("Anthropic access-map: failed to list models")?; + + if !resp.status().is_success() { + return Err(anyhow!( + "Anthropic access-map: model listing failed with HTTP {}", + resp.status() + )); + } + + let body: AnthropicModelsResponse = + resp.json().await.context("Anthropic access-map: invalid model list JSON")?; + Ok(body.data) +} + +fn detect_token_type(token: &str) -> &'static str { + if token.starts_with("sk-ant-admin") { + "admin_api_key" + } else if token.starts_with("sk-ant-api") { + "api_key" + } else { + "unknown_api_key" + } +} + +fn severity_to_str(severity: Severity) -> &'static str { + match severity { + Severity::Low => "low", + Severity::Medium => "medium", + Severity::High => "high", + Severity::Critical => "critical", + } +} diff --git a/src/access_map/gcp.rs b/src/access_map/gcp.rs index f81ae1e..a468a01 100644 --- a/src/access_map/gcp.rs +++ b/src/access_map/gcp.rs @@ -47,17 +47,14 @@ pub async fn map_access_from_json(data: &str) -> Result { let mut project_id = if token_context.project_id.is_empty() { None } else { Some(token_context.project_id) }; - let sa_metadata = match fetch_service_account_metadata(&http_client, &access_token, &client_email) - .await - { - Ok(meta) => meta, - Err(err) => { - verbose_warn!( - "GCP access-map: failed to fetch service account metadata: {err}" - ); - ServiceAccountMetadata::default() - } - }; + let sa_metadata = + match fetch_service_account_metadata(&http_client, &access_token, &client_email).await { + Ok(meta) => meta, + Err(err) => { + verbose_warn!("GCP access-map: failed to fetch service account metadata: {err}"); + ServiceAccountMetadata::default() + } + }; if project_id.is_none() { project_id = sa_metadata.project_id.clone(); @@ -1105,9 +1102,8 @@ async fn enumerate_resources( } else if status.is_success() { let json: Value = serde_json::from_slice(&body)?; if let Some(items) = json.get("secrets").and_then(|i| i.as_array()) { - let can_access_values = perm_set - .iter() - .any(|p| p.contains("secretmanager.versions.access")); + let can_access_values = + perm_set.iter().any(|p| p.contains("secretmanager.versions.access")); let can_write = perm_set.iter().any(|p| { p.contains("secretmanager.secrets.create") || p.contains("secretmanager.secrets.update") @@ -1197,7 +1193,11 @@ async fn enumerate_resources( name: name.to_string(), permissions: matching_permissions( &perm_set, - &["cloudkms.cryptoKeys.", "cloudkms.keyRings.", "cloudkms.cryptoKeyVersions."], + &[ + "cloudkms.cryptoKeys.", + "cloudkms.keyRings.", + "cloudkms.cryptoKeyVersions.", + ], ), risk: risk.into(), reason: reason.into(), @@ -1265,24 +1265,20 @@ async fn enumerate_resources( } if add_service_accounts { - let url = format!( - "https://iam.googleapis.com/v1/projects/{}/serviceAccounts", - project_id - ); + let url = format!("https://iam.googleapis.com/v1/projects/{}/serviceAccounts", project_id); let resp = client.get(&url).bearer_auth(token).send().await?; let status = resp.status(); let body = resp.bytes().await?; if let Some(disabled) = service_disabled_message(&body)? { - verbose_warn!( - "GCP access-map: IAM API disabled for project {project_id}: {disabled}" - ); + verbose_warn!("GCP access-map: IAM API disabled for project {project_id}: {disabled}"); } else if status.is_success() { let json: Value = serde_json::from_slice(&body)?; if let Some(accounts) = json.get("accounts").and_then(|a| a.as_array()) { - let can_impersonate = perm_set - .iter() - .any(|p| p.contains("serviceAccounts.actAs") || p.contains("serviceAccounts.getAccessToken")); + let can_impersonate = perm_set.iter().any(|p| { + p.contains("serviceAccounts.actAs") + || p.contains("serviceAccounts.getAccessToken") + }); for sa in accounts { if let Some(email) = sa.get("email").and_then(|e| e.as_str()) { @@ -1313,10 +1309,7 @@ async fn enumerate_resources( } if add_firestore { - let url = format!( - "https://firestore.googleapis.com/v1/projects/{}/databases", - project_id - ); + let url = format!("https://firestore.googleapis.com/v1/projects/{}/databases", project_id); let resp = client.get(&url).bearer_auth(token).send().await?; let status = resp.status(); let body = resp.bytes().await?; @@ -1363,10 +1356,7 @@ async fn enumerate_resources( } if add_spanner { - let url = format!( - "https://spanner.googleapis.com/v1/projects/{}/instances", - project_id - ); + let url = format!("https://spanner.googleapis.com/v1/projects/{}/instances", project_id); let resp = client.get(&url).bearer_auth(token).send().await?; let status = resp.status(); let body = resp.bytes().await?; diff --git a/src/access_map/openai.rs b/src/access_map/openai.rs new file mode 100644 index 0000000..a6b30b6 --- /dev/null +++ b/src/access_map/openai.rs @@ -0,0 +1,328 @@ +use std::collections::BTreeSet; + +use anyhow::{anyhow, Context, Result}; +use reqwest::{header, Client, StatusCode}; +use serde::Deserialize; +use tracing::warn; + +use crate::{cli::commands::access_map::AccessMapArgs, validation::GLOBAL_USER_AGENT}; + +use super::{ + build_recommendations, AccessMapResult, AccessSummary, AccessTokenDetails, PermissionSummary, + ResourceExposure, RoleBinding, Severity, +}; + +const OPENAI_API: &str = "https://api.openai.com/v1"; +const MAX_MODEL_RESOURCES: usize = 50; + +#[derive(Debug, Deserialize, Default, Clone)] +struct OpenAiMe { + #[serde(default)] + id: Option, + #[serde(default)] + name: Option, + #[serde(default)] + email: Option, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct OpenAiModelsResponse { + #[serde(default)] + data: Vec, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct OpenAiModel { + #[serde(default)] + id: Option, + #[serde(default, rename = "owned_by")] + owned_by: Option, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct OpenAiProjectsResponse { + #[serde(default)] + data: Vec, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct OpenAiProject { + #[serde(default)] + id: Option, + #[serde(default)] + name: Option, + #[serde(default)] + archived: bool, +} + +pub async fn map_access(args: &AccessMapArgs) -> Result { + let token = if let Some(path) = args.credential_path.as_deref() { + let raw = std::fs::read_to_string(path) + .with_context(|| format!("Failed to read OpenAI token from {}", path.display()))?; + raw.trim().to_string() + } else { + return Err(anyhow!("OpenAI access-map requires a validated token from scan results")); + }; + + map_access_from_token(&token).await +} + +pub async fn map_access_from_token(token: &str) -> Result { + let client = Client::builder() + .user_agent(GLOBAL_USER_AGENT.as_str()) + .build() + .context("Failed to build OpenAI HTTP client")?; + + let mut risk_notes = Vec::new(); + let mut roles = Vec::new(); + let mut permissions = PermissionSummary::default(); + let mut resources = Vec::new(); + + let models_result = list_models(&client, token).await; + let me_result = fetch_me(&client, token).await; + if models_result.is_err() && me_result.is_err() { + return Err(anyhow!( + "OpenAI access-map: both /models and /me lookups failed; token may not be valid for access mapping" + )); + } + + let models = models_result.unwrap_or_else(|err| { + warn!("OpenAI access-map: model enumeration failed: {err}"); + risk_notes.push(format!("Model enumeration failed: {err}")); + Vec::new() + }); + let me = me_result.unwrap_or_else(|err| { + warn!("OpenAI access-map: /me lookup failed: {err}"); + risk_notes.push(format!("Identity lookup failed: {err}")); + OpenAiMe::default() + }); + + let token_kind = detect_token_type(token); + roles.push(RoleBinding { + name: format!("token_type:{token_kind}"), + source: "openai".into(), + permissions: vec![format!("token:{token_kind}")], + }); + + permissions.read_only.push("models:list".to_string()); + + let projects = list_projects(&client, token).await.unwrap_or_else(|err| { + warn!("OpenAI access-map: project enumeration failed: {err}"); + risk_notes.push(format!("Project enumeration failed: {err}")); + Vec::new() + }); + + if !projects.is_empty() { + permissions.risky.push("projects:list".to_string()); + } + + let identity_id = me + .email + .clone() + .or_else(|| me.name.clone()) + .or_else(|| me.id.clone()) + .unwrap_or_else(|| "openai_api_key".to_string()); + + let mut owners = BTreeSet::new(); + for model in &models { + if let Some(owner) = model.owned_by.as_ref() { + if !owner.is_empty() { + owners.insert(owner.clone()); + } + } + } + + for owner in owners { + resources.push(ResourceExposure { + resource_type: "organization".into(), + name: owner, + permissions: vec!["models:list".to_string()], + risk: severity_to_str(Severity::Low).to_string(), + reason: "Organization inferred from accessible models".to_string(), + }); + } + + for project in &projects { + let project_name = project + .name + .clone() + .or_else(|| project.id.clone()) + .unwrap_or_else(|| "unknown_project".to_string()); + let risk = if project.archived { Severity::Low } else { Severity::Medium }; + resources.push(ResourceExposure { + resource_type: "project".into(), + name: project_name, + permissions: vec!["project:read".to_string()], + risk: severity_to_str(risk).to_string(), + reason: "Project visible to this OpenAI key".to_string(), + }); + } + + let mut model_count = 0usize; + for model in &models { + if model_count >= MAX_MODEL_RESOURCES { + break; + } + if let Some(model_id) = model.id.as_ref() { + resources.push(ResourceExposure { + resource_type: "model".into(), + name: model_id.clone(), + permissions: vec!["model:read".to_string()], + risk: severity_to_str(Severity::Low).to_string(), + reason: "Model accessible to this OpenAI key".to_string(), + }); + model_count += 1; + } + } + if models.len() > MAX_MODEL_RESOURCES { + risk_notes.push(format!( + "Model resource list truncated to first {MAX_MODEL_RESOURCES} entries ({} total models visible)", + models.len() + )); + } + + if resources.is_empty() { + resources.push(ResourceExposure { + resource_type: "account".into(), + name: identity_id.clone(), + permissions: Vec::new(), + risk: severity_to_str(Severity::Low).to_string(), + reason: "OpenAI account associated with this API key".to_string(), + }); + risk_notes.push("No projects, organizations, or models were enumerable".to_string()); + } + + permissions.admin.sort(); + permissions.admin.dedup(); + permissions.risky.sort(); + permissions.risky.dedup(); + permissions.read_only.sort(); + permissions.read_only.dedup(); + + let severity = derive_severity(&permissions, projects.len(), models.len()); + + Ok(AccessMapResult { + cloud: "openai".into(), + identity: AccessSummary { + id: identity_id, + access_type: "token".into(), + project: None, + tenant: None, + account_id: me.id.clone(), + }, + roles, + permissions, + resources, + severity, + recommendations: build_recommendations(severity), + risk_notes, + token_details: Some(AccessTokenDetails { + name: me.name, + username: None, + account_type: Some("api_key".into()), + company: None, + location: None, + email: me.email, + url: Some("https://platform.openai.com/".into()), + token_type: Some(token_kind.to_string()), + created_at: None, + last_used_at: None, + expires_at: None, + user_id: me.id, + scopes: Vec::new(), + }), + provider_metadata: None, + fingerprint: None, + }) +} + +async fn list_models(client: &Client, token: &str) -> Result> { + let resp = client + .get(format!("{OPENAI_API}/models")) + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("OpenAI access-map: failed to list models")?; + + if !resp.status().is_success() { + return Err(anyhow!("OpenAI access-map: model listing failed with HTTP {}", resp.status())); + } + + let body: OpenAiModelsResponse = + resp.json().await.context("OpenAI access-map: invalid model list JSON")?; + Ok(body.data) +} + +async fn fetch_me(client: &Client, token: &str) -> Result { + let resp = client + .get(format!("{OPENAI_API}/me")) + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("OpenAI access-map: failed to query /me")?; + + if !resp.status().is_success() { + return Err(anyhow!("OpenAI access-map: /me failed with HTTP {}", resp.status())); + } + + resp.json().await.context("OpenAI access-map: invalid /me JSON") +} + +async fn list_projects(client: &Client, token: &str) -> Result> { + let resp = client + .get(format!("{OPENAI_API}/organization/projects")) + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("OpenAI access-map: failed to list organization projects")?; + + match resp.status() { + StatusCode::OK => { + let body: OpenAiProjectsResponse = + resp.json().await.context("OpenAI access-map: invalid projects JSON")?; + Ok(body.data) + } + StatusCode::FORBIDDEN | StatusCode::NOT_FOUND => Ok(Vec::new()), + StatusCode::UNAUTHORIZED => { + Err(anyhow!("OpenAI access-map: project listing unauthorized (401)")) + } + status => Err(anyhow!("OpenAI access-map: project listing failed with HTTP {status}")), + } +} + +fn detect_token_type(token: &str) -> &'static str { + if token.starts_with("sk-proj-") { + "project_api_key" + } else if token.starts_with("sk-svcacct-") { + "service_account_api_key" + } else if token.starts_with("sk-None-") { + "legacy_api_key" + } else { + "api_key" + } +} + +fn derive_severity(permissions: &PermissionSummary, projects: usize, models: usize) -> Severity { + if !permissions.admin.is_empty() { + return Severity::High; + } + if !permissions.risky.is_empty() || projects > 0 { + return Severity::Medium; + } + if models > 0 { + return Severity::Low; + } + Severity::Low +} + +fn severity_to_str(severity: Severity) -> &'static str { + match severity { + Severity::Low => "low", + Severity::Medium => "medium", + Severity::High => "high", + Severity::Critical => "critical", + } +} diff --git a/src/access_map/salesforce.rs b/src/access_map/salesforce.rs new file mode 100644 index 0000000..4de578d --- /dev/null +++ b/src/access_map/salesforce.rs @@ -0,0 +1,306 @@ +use anyhow::{anyhow, Context, Result}; +use once_cell::sync::Lazy; +use regex::Regex; +use reqwest::{header, Client, StatusCode}; +use serde_json::Value; +use tracing::warn; + +use crate::{cli::commands::access_map::AccessMapArgs, validation::GLOBAL_USER_AGENT}; + +use super::{ + build_recommendations, AccessMapResult, AccessSummary, AccessTokenDetails, PermissionSummary, + ResourceExposure, RoleBinding, Severity, +}; + +const SALESFORCE_API_VERSION: &str = "v60.0"; +const MAX_OBJECT_RESOURCES: usize = 100; + +static TOKEN_RE: Lazy = Lazy::new(|| { + Regex::new(r"(?xi)\b(00[A-Z0-9]{13}![A-Z0-9._-]{80,260})\b") + .expect("valid salesforce token regex") +}); +static INSTANCE_RE: Lazy = Lazy::new(|| { + Regex::new(r"(?xi)\b([A-Z0-9-]{5,128})\.my\.salesforce\.com\b") + .expect("valid salesforce instance regex") +}); + +pub async fn map_access(args: &AccessMapArgs) -> Result { + let path = args.credential_path.as_deref().ok_or_else(|| { + anyhow!("Salesforce access-map requires a credential file with token and instance") + })?; + let raw = std::fs::read_to_string(path).with_context(|| { + format!("Failed to read Salesforce credential file from {}", path.display()) + })?; + let (token, instance) = parse_salesforce_credentials(&raw)?; + map_access_from_token_and_instance(&token, &instance).await +} + +pub async fn map_access_from_token_and_instance( + token: &str, + instance: &str, +) -> Result { + let instance = normalize_instance(instance) + .ok_or_else(|| anyhow!("Salesforce access-map requires a valid instance domain"))?; + let base_url = format!("https://{instance}.my.salesforce.com"); + + let client = Client::builder() + .user_agent(GLOBAL_USER_AGENT.as_str()) + .build() + .context("Failed to build Salesforce HTTP client")?; + + let mut risk_notes = Vec::new(); + let mut permissions = PermissionSummary::default(); + permissions.read_only.push("limits:read".to_string()); + + let limits = fetch_limits(&client, token, &base_url).await?; + let user_info = fetch_user_info(&client, token, &base_url).await.unwrap_or_else(|err| { + warn!("Salesforce access-map: userinfo lookup failed: {err}"); + risk_notes.push(format!("Identity lookup failed: {err}")); + Value::Null + }); + let objects = list_sobjects(&client, token, &base_url).await.unwrap_or_else(|err| { + warn!("Salesforce access-map: sobject enumeration failed: {err}"); + risk_notes.push(format!("Object enumeration failed: {err}")); + Vec::new() + }); + + if !objects.is_empty() { + permissions.read_only.push("sobjects:list".to_string()); + } + permissions.risky.push("rest_api:access".to_string()); + permissions.read_only.sort(); + permissions.read_only.dedup(); + permissions.risky.sort(); + permissions.risky.dedup(); + + let organization_id = + value_as_string(&user_info, &["organization_id", "organizationId", "org_id", "orgId"]); + let user_id = value_as_string(&user_info, &["user_id", "userId", "sub", "id"]); + let username = + value_as_string(&user_info, &["preferred_username", "preferredUsername", "email", "name"]); + + let identity_id = username + .clone() + .or_else(|| user_id.clone()) + .or_else(|| organization_id.clone()) + .unwrap_or_else(|| "salesforce_access_token".to_string()); + + let roles = vec![RoleBinding { + name: "token_type:access_token".into(), + source: "salesforce".into(), + permissions: vec!["rest_api:access".into()], + }]; + + let mut resources = vec![ResourceExposure { + resource_type: "salesforce_org".into(), + name: organization_id.clone().unwrap_or_else(|| instance.clone()), + permissions: vec!["limits:read".into()], + risk: severity_to_str(Severity::Medium).to_string(), + reason: "Salesforce org reachable with this access token".to_string(), + }]; + + for object_name in objects.iter().take(MAX_OBJECT_RESOURCES) { + resources.push(ResourceExposure { + resource_type: "sobject".into(), + name: object_name.clone(), + permissions: vec!["object:read_metadata".into()], + risk: severity_to_str(Severity::Low).to_string(), + reason: "Object metadata visible to this token".to_string(), + }); + } + if objects.len() > MAX_OBJECT_RESOURCES { + risk_notes.push(format!( + "Object resource list truncated to first {MAX_OBJECT_RESOURCES} entries ({} total objects visible)", + objects.len() + )); + } + + if !limits.is_object() { + risk_notes.push("Salesforce limits response was not a JSON object".to_string()); + } + + let severity = Severity::Medium; + Ok(AccessMapResult { + cloud: "salesforce".into(), + identity: AccessSummary { + id: identity_id, + access_type: "token".into(), + project: organization_id.clone(), + tenant: None, + account_id: organization_id.clone(), + }, + roles, + permissions, + resources, + severity, + recommendations: build_recommendations(severity), + risk_notes, + token_details: Some(AccessTokenDetails { + name: username, + username: None, + account_type: Some("access_token".into()), + company: None, + location: None, + email: None, + url: Some(base_url), + token_type: Some("access_token".into()), + created_at: None, + last_used_at: None, + expires_at: None, + user_id, + scopes: Vec::new(), + }), + provider_metadata: None, + fingerprint: None, + }) +} + +fn parse_salesforce_credentials(raw: &str) -> Result<(String, String)> { + if let Ok(json) = serde_json::from_str::(raw) { + let token = value_as_string(&json, &["token", "access_token", "salesforce_token"]); + let instance = + value_as_string(&json, &["instance", "instance_url", "instanceUrl", "domain", "host"]); + + if let (Some(token), Some(instance)) = (token, instance) { + let normalized = normalize_instance(&instance).ok_or_else(|| { + anyhow!("Credential JSON contains an invalid Salesforce instance") + })?; + return Ok((token, normalized)); + } + } + + let token = TOKEN_RE.captures(raw).and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())); + let instance = + INSTANCE_RE.captures(raw).and_then(|caps| caps.get(1).map(|m| m.as_str().to_string())); + + if let (Some(token), Some(instance)) = (token, instance) { + return Ok((token, instance)); + } + + let lines: Vec<&str> = raw + .lines() + .map(str::trim) + .filter(|line| !line.is_empty() && !line.starts_with('#')) + .collect(); + if lines.len() >= 2 { + if let Some(instance) = normalize_instance(lines[1]) { + return Ok((lines[0].to_string(), instance)); + } + } + + Err(anyhow!( + "Salesforce credential format not recognized. Provide JSON with token + instance_url, or text containing both." + )) +} + +fn normalize_instance(raw: &str) -> Option { + let mut value = raw.trim().trim_matches('/').to_ascii_lowercase(); + if value.starts_with("https://") { + value = value.trim_start_matches("https://").to_string(); + } else if value.starts_with("http://") { + value = value.trim_start_matches("http://").to_string(); + } + if let Some(rest) = value.strip_suffix(".my.salesforce.com") { + value = rest.to_string(); + } + value = value.split('/').next().unwrap_or_default().to_string(); + + if value.len() < 5 || value.len() > 128 { + return None; + } + if !value.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') { + return None; + } + Some(value) +} + +async fn fetch_limits(client: &Client, token: &str, base_url: &str) -> Result { + let resp = client + .get(format!("{base_url}/services/data/{SALESFORCE_API_VERSION}/limits")) + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("Salesforce access-map: failed to query limits endpoint")?; + + if resp.status() != StatusCode::OK { + return Err(anyhow!( + "Salesforce access-map: limits endpoint failed with HTTP {}", + resp.status() + )); + } + + resp.json().await.context("Salesforce access-map: invalid limits JSON") +} + +async fn fetch_user_info(client: &Client, token: &str, base_url: &str) -> Result { + let resp = client + .get(format!("{base_url}/services/oauth2/userinfo")) + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("Salesforce access-map: failed to query userinfo endpoint")?; + + if !resp.status().is_success() { + return Err(anyhow!( + "Salesforce access-map: userinfo lookup failed with HTTP {}", + resp.status() + )); + } + + resp.json().await.context("Salesforce access-map: invalid userinfo JSON") +} + +async fn list_sobjects(client: &Client, token: &str, base_url: &str) -> Result> { + let resp = client + .get(format!("{base_url}/services/data/{SALESFORCE_API_VERSION}/sobjects")) + .header(header::AUTHORIZATION, format!("Bearer {token}")) + .header(header::ACCEPT, "application/json") + .send() + .await + .context("Salesforce access-map: failed to query sobjects endpoint")?; + + if !resp.status().is_success() { + return Err(anyhow!( + "Salesforce access-map: sobjects listing failed with HTTP {}", + resp.status() + )); + } + + let body: Value = resp.json().await.context("Salesforce access-map: invalid sobjects JSON")?; + let mut names = Vec::new(); + if let Some(arr) = body.get("sobjects").and_then(|v| v.as_array()) { + for item in arr { + if let Some(name) = value_as_string(item, &["name", "label"]) { + if !name.is_empty() { + names.push(name); + } + } + } + } + names.sort(); + names.dedup(); + Ok(names) +} + +fn value_as_string(value: &Value, keys: &[&str]) -> Option { + for key in keys { + if let Some(s) = value.get(*key).and_then(|v| v.as_str()) { + let trimmed = s.trim(); + if !trimmed.is_empty() { + return Some(trimmed.to_string()); + } + } + } + None +} + +fn severity_to_str(severity: Severity) -> &'static str { + match severity { + Severity::Low => "low", + Severity::Medium => "medium", + Severity::High => "high", + Severity::Critical => "critical", + } +} diff --git a/src/cli/commands/access_map.rs b/src/cli/commands/access_map.rs index 9592822..18a0e73 100644 --- a/src/cli/commands/access_map.rs +++ b/src/cli/commands/access_map.rs @@ -5,7 +5,7 @@ use clap::{Args, ValueEnum}; /// Inspect a cloud credential and derive the effective identity and blast radius. #[derive(Args, Debug)] pub struct AccessMapArgs { - /// Cloud provider: aws | gcp | azure | github | gitlab | slack | postgres | mongodb | huggingface | gitea | bitbucket | buildkite | harness + /// Cloud provider: aws | gcp | azure | github | gitlab | slack | postgres | mongodb | huggingface | gitea | bitbucket | buildkite | harness | openai | anthropic | salesforce #[clap(value_parser, value_name = "PROVIDER")] pub provider: AccessMapProvider, @@ -53,4 +53,10 @@ pub enum AccessMapProvider { Buildkite, /// Harness Harness, + /// OpenAI + Openai, + /// Anthropic + Anthropic, + /// Salesforce + Salesforce, } diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 7bd1272..3543cbe 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -152,6 +152,30 @@ impl AccessMapCollector { .or_insert_with(|| AccessMapRequest::Harness { token: token.to_string(), fingerprint }); } + pub fn record_openai(&self, token: &str, fingerprint: String) { + let key = xxhash_rust::xxh3::xxh3_64(format!("openai|{token}").as_bytes()); + self.inner + .entry(key) + .or_insert_with(|| AccessMapRequest::OpenAI { token: token.to_string(), fingerprint }); + } + + pub fn record_anthropic(&self, token: &str, fingerprint: String) { + let key = xxhash_rust::xxh3::xxh3_64(format!("anthropic|{token}").as_bytes()); + self.inner.entry(key).or_insert_with(|| AccessMapRequest::Anthropic { + token: token.to_string(), + fingerprint, + }); + } + + pub fn record_salesforce(&self, token: &str, instance: &str, fingerprint: String) { + let key = xxhash_rust::xxh3::xxh3_64(format!("salesforce|{instance}|{token}").as_bytes()); + self.inner.entry(key).or_insert_with(|| AccessMapRequest::Salesforce { + token: token.to_string(), + instance: instance.to_string(), + fingerprint, + }); + } + pub fn into_requests(self) -> Vec { self.inner.iter().map(|entry| entry.value().clone()).collect() } @@ -796,6 +820,36 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl } } } + if om.rule.id().starts_with("kingfisher.openai.") { + if let Some((_, value, ..)) = captures.iter().find(|(name, ..)| name == "TOKEN") { + if !value.is_empty() { + collector.record_openai(value, fp.clone()); + } + } + } + if om.rule.id().starts_with("kingfisher.anthropic.") { + if let Some((_, value, ..)) = captures.iter().find(|(name, ..)| name == "TOKEN") { + if !value.is_empty() { + collector.record_anthropic(value, fp.clone()); + } + } + } + if om.rule.id().starts_with("kingfisher.salesforce.") { + let token = captures + .iter() + .find(|(name, ..)| name == "TOKEN") + .map(|(_, value, ..)| value.clone()) + .unwrap_or_default(); + let instance = captures + .iter() + .find(|(name, ..)| name == "INSTANCE") + .map(|(_, value, ..)| value.clone()) + .unwrap_or_default(); + + if !token.is_empty() && !instance.is_empty() { + collector.record_salesforce(&token, &instance, fp.clone()); + } + } } } }