diff --git a/CHANGELOG.md b/CHANGELOG.md index d217f01..7373afe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ All notable changes to this project will be documented in this file. - Access Map: added OpenAI provider. Supports standalone `access-map openai` and automatic mapping for validated `kingfisher.openai.*` findings. Enumerates organizations (from `/v1/me`), projects, and API key permission scopes by probing endpoints for restricted key detection. - Access Map: added Anthropic provider. Supports standalone `access-map anthropic` and automatic mapping for validated `kingfisher.anthropic.*` findings. - Access Map: added Salesforce provider. Supports standalone `access-map salesforce` (token + instance) and automatic mapping for validated `kingfisher.salesforce.*` findings. -- Access Map CLI: added providers `buildkite`, `harness`, `openai`, `anthropic`, `salesforce`. -- Reports: omit `validate`/`revoke` command hints when required template vars are missing (prevents suggesting unrunnable commands, e.g. Harness `ACCOUNTIDENTIFIER`). +- Added Weights & Biases support: new `kingfisher.wandb.2` rule for `wandb_v1_...` keys (legacy `kingfisher.wandb.1` retained), plus Access Map provider/CLI support (`weightsandbiases`, alias `wandb`). +- Reports: always emit `validate`/`revoke` command hints when supported by a rule (no suppression for missing template vars). - Access Map GCP: added resource enumeration for Cloud KMS key rings, Cloud Functions, Firestore databases, Cloud Spanner instances, and project service accounts. - Access Map GCP: populated `token_details` with service account metadata (display name, unique ID, disabled status). - Access Map GCP: fixed BigQuery and Secret Manager risk assessment to detect write permissions and `secretmanager.versions.access`. diff --git a/crates/kingfisher-rules/data/rules/weightsandbiases.yml b/crates/kingfisher-rules/data/rules/weightsandbiases.yml index b60f491..82637c6 100644 --- a/crates/kingfisher-rules/data/rules/weightsandbiases.yml +++ b/crates/kingfisher-rules/data/rules/weightsandbiases.yml @@ -37,3 +37,41 @@ rules: - '"username"' references: - https://docs.wandb.ai/ref/cli/wandb-login + - https://docs.wandb.ai/support/rotate_revoke_access + + - name: Weights and Biases API Key (v1) + id: kingfisher.wandb.2 + pattern: | + (?x) + \b + ( + wandb_v1_[A-Za-z0-9_]{77} + ) + \b + pattern_requirements: + min_digits: 2 + confidence: medium + min_entropy: 3.5 + examples: + - "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" + - "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" + validation: + type: Http + content: + request: + method: POST + url: "https://api.wandb.ai/graphql" + headers: + Authorization: "Basic {{ 'api:' | append: TOKEN | b64enc }}" + Content-Type: "application/json" + body: | + {"query":"query { viewer { email username } }"} + response_matcher: + - report_response: true + - type: JsonValid + - type: WordMatch + words: + - '"username"' + references: + - https://docs.wandb.ai/ref/cli/wandb-login + - https://docs.wandb.ai/support/rotate_revoke_access diff --git a/docs/ACCESS_MAP.md b/docs/ACCESS_MAP.md index cb06c5f..0a105ae 100644 --- a/docs/ACCESS_MAP.md +++ b/docs/ACCESS_MAP.md @@ -316,8 +316,31 @@ kingfisher access-map salesforce ./salesforce.json --json-out salesforce.access- - Access map currently targets `https://.my.salesforce.com` and API version `v60.0`. +### Weights & Biases (`weightsandbiases` / `wandb`) + +- **Credential**: a single Weights & Biases API key string (read from a file for `kingfisher access-map weightsandbiases `). +- **Token types supported**: + - Legacy 40-character hex API keys + - New v1 keys (`wandb_v1_...`) + +Kingfisher performs read-only identity resolution via: + +- `POST https://api.wandb.ai/graphql` with a GraphQL `viewer` query. + +#### Standalone example (Weights & Biases) + +```bash +printf '%s' 'wandb_v1_example...' > ./wandb.token +kingfisher access-map weightsandbiases ./wandb.token --json-out wandb.access-map.json +``` + +#### Notes (Weights & Biases) + +- Access map uses `https://api.wandb.ai/graphql` as the API endpoint. +- W&B key introspection does not currently expose fine-grained scopes in this workflow, so risk is reported conservatively. + ## Notes on access-map generation during `scan --access-map` - Access-map entries are only recorded for **validated** findings. - Some providers require extra context that Kingfisher infers from the finding context or validation response (for example, Azure DevOps organization name). -- Validated Hugging Face, Gitea, Bitbucket, Buildkite, Harness, OpenAI, Anthropic, and Salesforce credentials discovered during scans with `--access-map` are automatically collected and mapped, matching the existing behavior for other platforms. +- Validated Hugging Face, Gitea, Bitbucket, Buildkite, Harness, OpenAI, Anthropic, Salesforce, and Weights & Biases credentials discovered during scans with `--access-map` are automatically collected and mapped, matching the existing behavior for other platforms. diff --git a/src/access_map.rs b/src/access_map.rs index a9f8621..bfb0074 100644 --- a/src/access_map.rs +++ b/src/access_map.rs @@ -22,6 +22,7 @@ pub(crate) mod postgres; mod report; mod salesforce; mod slack; +mod weightsandbiases; /// Trait for access map providers that map a single token to an access profile. /// @@ -58,6 +59,7 @@ pub async fn run(args: AccessMapArgs) -> Result<()> { AccessMapProvider::Openai => openai::map_access(&args).await?, AccessMapProvider::Anthropic => anthropic::map_access(&args).await?, AccessMapProvider::Salesforce => salesforce::map_access(&args).await?, + AccessMapProvider::Weightsandbiases => weightsandbiases::map_access(&args).await?, }; let json = serde_json::to_string_pretty(&result)?; @@ -116,6 +118,8 @@ pub enum AccessMapRequest { Anthropic { token: String, fingerprint: String }, /// A Salesforce access token plus instance domain. Salesforce { token: String, instance: String, fingerprint: String }, + /// A Weights & Biases API token. + WeightsAndBiases { token: String, fingerprint: String }, } /// Structured output describing the resolved identity and its risk profile. @@ -328,6 +332,9 @@ pub async fn map_requests(requests: Vec) -> Vec { + (map_token(&WeightsAndBiasesMapper, &token).await, fingerprint) + } }; mapped.fingerprint = Some(fp); @@ -485,6 +492,19 @@ impl TokenAccessMapper for AnthropicMapper { } } +/// Weights & Biases access mapper. +pub struct WeightsAndBiasesMapper; + +impl TokenAccessMapper for WeightsAndBiasesMapper { + fn cloud_name(&self) -> &'static str { + "weightsandbiases" + } + + async fn map_access_from_token(&self, token: &str) -> Result { + weightsandbiases::map_access_from_token(token).await + } +} + // ------------------------------------------------------------------------------------------------- // Helper functions // ------------------------------------------------------------------------------------------------- diff --git a/src/access_map/weightsandbiases.rs b/src/access_map/weightsandbiases.rs new file mode 100644 index 0000000..95c2afe --- /dev/null +++ b/src/access_map/weightsandbiases.rs @@ -0,0 +1,192 @@ +use anyhow::{anyhow, Context, Result}; +use reqwest::{header, Client}; +use serde::Deserialize; +use serde_json::json; + +use crate::{cli::commands::access_map::AccessMapArgs, validation::GLOBAL_USER_AGENT}; + +use super::{ + build_recommendations, AccessMapResult, AccessSummary, AccessTokenDetails, PermissionSummary, + ResourceExposure, RoleBinding, Severity, +}; + +const WANDB_API: &str = "https://api.wandb.ai/graphql"; + +#[derive(Debug, Deserialize, Default, Clone)] +struct GraphQlError { + #[serde(default)] + message: Option, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct GraphQlResponse { + #[serde(default)] + data: Option, + #[serde(default)] + errors: Vec, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct ViewerData { + #[serde(default)] + viewer: Option, +} + +#[derive(Debug, Deserialize, Default, Clone)] +struct Viewer { + #[serde(default)] + id: Option, + #[serde(default)] + username: Option, + #[serde(default)] + email: Option, + #[serde(default)] + name: Option, +} + +pub async fn map_access(args: &AccessMapArgs) -> Result { + let token = if let Some(path) = args.credential_path.as_deref() { + let raw = std::fs::read_to_string(path).with_context(|| { + format!("Failed to read Weights & Biases token from {}", path.display()) + })?; + raw.trim().to_string() + } else { + return Err(anyhow!( + "Weights & Biases access-map requires a validated token from scan results" + )); + }; + + map_access_from_token(&token).await +} + +pub async fn map_access_from_token(token: &str) -> Result { + let client = Client::builder() + .user_agent(GLOBAL_USER_AGENT.as_str()) + .build() + .context("Failed to build Weights & Biases HTTP client")?; + + let viewer = fetch_viewer(&client, token).await?; + let token_kind = detect_token_type(token).to_string(); + + let identity_id = viewer + .email + .clone() + .or_else(|| viewer.username.clone()) + .or_else(|| viewer.name.clone()) + .or_else(|| viewer.id.clone()) + .unwrap_or_else(|| "wandb_user".to_string()); + + let mut permissions = PermissionSummary::default(); + permissions.risky.push("workspace:api_access".to_string()); + permissions.read_only.push("viewer:read".to_string()); + + let mut roles = Vec::new(); + roles.push(RoleBinding { + name: format!("token_type:{token_kind}"), + source: "weightsandbiases".into(), + permissions: vec![format!("token:{token_kind}")], + }); + + let mut resources = Vec::new(); + resources.push(ResourceExposure { + resource_type: "account".into(), + name: identity_id.clone(), + permissions: vec!["viewer:read".to_string(), "workspace:api_access".to_string()], + risk: severity_to_str(Severity::Medium).to_string(), + reason: "W&B account reachable with this API key".to_string(), + }); + + let risk_notes = vec![ + "W&B does not expose fine-grained token scopes in this introspection path".to_string(), + ]; + let severity = Severity::Medium; + + Ok(AccessMapResult { + cloud: "weightsandbiases".into(), + identity: AccessSummary { + id: identity_id, + access_type: "token".into(), + project: None, + tenant: None, + account_id: viewer.id.clone(), + }, + roles, + permissions, + resources, + severity, + recommendations: build_recommendations(severity), + risk_notes, + token_details: Some(AccessTokenDetails { + name: viewer.name, + username: viewer.username, + account_type: Some("api_key".into()), + company: None, + location: None, + email: viewer.email, + url: Some("https://wandb.ai/settings".into()), + token_type: Some(token_kind), + created_at: None, + last_used_at: None, + expires_at: None, + user_id: viewer.id, + scopes: Vec::new(), + }), + provider_metadata: None, + fingerprint: None, + }) +} + +async fn fetch_viewer(client: &Client, token: &str) -> Result { + let resp = client + .post(WANDB_API) + .basic_auth("api", Some(token)) + .header(header::CONTENT_TYPE, "application/json") + .header(header::ACCEPT, "application/json") + .json(&json!({ + "query": "query { viewer { id username email name } }" + })) + .send() + .await + .context("Weights & Biases access-map: failed to query viewer")?; + + if !resp.status().is_success() { + return Err(anyhow!( + "Weights & Biases access-map: viewer lookup failed with HTTP {}", + resp.status() + )); + } + + let body: GraphQlResponse = + resp.json().await.context("Weights & Biases access-map: invalid GraphQL response JSON")?; + + if !body.errors.is_empty() { + let msg = + body.errors.iter().filter_map(|e| e.message.as_deref()).collect::>().join("; "); + if body.data.as_ref().and_then(|d| d.viewer.as_ref()).is_none() { + return Err(anyhow!("Weights & Biases access-map: GraphQL returned errors: {msg}")); + } + } + + body.data + .and_then(|d| d.viewer) + .ok_or_else(|| anyhow!("Weights & Biases access-map: viewer data not present")) +} + +fn detect_token_type(token: &str) -> &'static str { + if token.starts_with("wandb_v1_") { + "wandb_v1" + } else if token.len() == 40 && token.chars().all(|c| c.is_ascii_hexdigit()) { + "legacy_api_key" + } else { + "api_key" + } +} + +fn severity_to_str(severity: Severity) -> &'static str { + match severity { + Severity::Low => "low", + Severity::Medium => "medium", + Severity::High => "high", + Severity::Critical => "critical", + } +} diff --git a/src/cli/commands/access_map.rs b/src/cli/commands/access_map.rs index 18a0e73..e95d522 100644 --- a/src/cli/commands/access_map.rs +++ b/src/cli/commands/access_map.rs @@ -5,7 +5,7 @@ use clap::{Args, ValueEnum}; /// Inspect a cloud credential and derive the effective identity and blast radius. #[derive(Args, Debug)] pub struct AccessMapArgs { - /// Cloud provider: aws | gcp | azure | github | gitlab | slack | postgres | mongodb | huggingface | gitea | bitbucket | buildkite | harness | openai | anthropic | salesforce + /// Cloud provider: aws | gcp | azure | github | gitlab | slack | postgres | mongodb | huggingface | gitea | bitbucket | buildkite | harness | openai | anthropic | salesforce | weightsandbiases #[clap(value_parser, value_name = "PROVIDER")] pub provider: AccessMapProvider, @@ -59,4 +59,7 @@ pub enum AccessMapProvider { Anthropic, /// Salesforce Salesforce, + /// Weights & Biases + #[clap(alias = "wandb")] + Weightsandbiases, } diff --git a/src/reporter.rs b/src/reporter.rs index d8e2b7f..9f4219c 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -300,25 +300,6 @@ fn build_revoke_command( ) -> Option { let required_vars = required_vars_for_revocation(revocation); - // Only generate a revoke command when the report can produce a *runnable* command line. - // If a revocation template references variables we can't populate from the finding data, - // omit the revoke command entirely (instead of suggesting a command that will fail at runtime). - let mut provided_vars: BTreeSet = BTreeSet::new(); - provided_vars.insert("TOKEN".to_string()); - for (k, v) in dependent_captures { - if !v.trim().is_empty() { - provided_vars.insert(k.to_ascii_uppercase()); - } - } - if let Some(akid) = akid_from_captures.or(akid_from_validation_body) { - if !akid.trim().is_empty() { - provided_vars.insert("AKID".to_string()); - } - } - if required_vars.iter().any(|req| !provided_vars.contains(req)) { - return None; - } - let var_args = build_var_args( dependent_captures, akid_from_captures, @@ -386,23 +367,6 @@ fn build_validate_command( let required_vars = required_vars_for_validation(validation); - // Same as revoke: only emit a validate command if it's runnable from the report output. - let mut provided_vars: BTreeSet = BTreeSet::new(); - provided_vars.insert("TOKEN".to_string()); - for (k, v) in dependent_captures { - if !v.trim().is_empty() { - provided_vars.insert(k.to_ascii_uppercase()); - } - } - if let Some(akid) = akid_from_captures.or(akid_from_validation_body) { - if !akid.trim().is_empty() { - provided_vars.insert("AKID".to_string()); - } - } - if required_vars.iter().any(|req| !provided_vars.contains(req)) { - return None; - } - let var_args = build_var_args( dependent_captures, akid_from_captures, @@ -1693,7 +1657,7 @@ mod tests { } #[test] - fn build_revoke_command_is_omitted_when_required_vars_missing() { + fn build_revoke_command_is_emitted_when_required_vars_missing() { // Revocation template requires ACCOUNTIDENTIFIER, but the finding doesn't have it. let revocation = Revocation::Http(crate::rules::HttpValidation { request: crate::rules::HttpRequest { @@ -1718,7 +1682,9 @@ mod tests { None, ); - assert!(cmd.is_none(), "command should be omitted when vars missing, got: {cmd:?}"); + let cmd = cmd.expect("command should still be emitted when vars are missing"); + assert!(cmd.contains("kingfisher revoke --rule kingfisher.example.1")); + assert!(cmd.contains("'secret'")); } fn sample_scan_args() -> ScanArgs { diff --git a/src/scanner/validation.rs b/src/scanner/validation.rs index 3543cbe..2b99084 100644 --- a/src/scanner/validation.rs +++ b/src/scanner/validation.rs @@ -176,6 +176,14 @@ impl AccessMapCollector { }); } + pub fn record_weightsandbiases(&self, token: &str, fingerprint: String) { + let key = xxhash_rust::xxh3::xxh3_64(format!("weightsandbiases|{token}").as_bytes()); + self.inner.entry(key).or_insert_with(|| AccessMapRequest::WeightsAndBiases { + token: token.to_string(), + fingerprint, + }); + } + pub fn into_requests(self) -> Vec { self.inner.iter().map(|entry| entry.value().clone()).collect() } @@ -850,6 +858,13 @@ fn maybe_record_access_map(om: &OwnedBlobMatch, collector: Option<&AccessMapColl collector.record_salesforce(&token, &instance, fp.clone()); } } + if om.rule.id().starts_with("kingfisher.wandb.") { + if let Some((_, value, ..)) = captures.iter().find(|(name, ..)| name == "TOKEN") { + if !value.is_empty() { + collector.record_weightsandbiases(value, fp.clone()); + } + } + } } } }