Added first-class Azure Repos support, including CLI commands, enumeration, and documentation updates

This commit is contained in:
Mick Grove 2025-10-04 23:12:28 -07:00
commit 69dc42f5bb
26 changed files with 1003 additions and 17 deletions

View file

@ -2,7 +2,8 @@
All notable changes to this project will be documented in this file.
## [Unreleased]
## [v1.55.0]
- Added first-class Azure Repos support, including CLI commands, enumeration, and documentation updates
- Improved performance of tree-sitter parsing
- Updated Windows build script to ensure static binary is produced

View file

@ -10,7 +10,7 @@ publish = false
[package]
name = "kingfisher"
version = "1.54.0"
version = "1.55.0"
description = "MongoDB's blazingly fast secret scanning and validation tool"
edition.workspace = true
rust-version.workspace = true

View file

@ -574,6 +574,47 @@ kingfisher gitlab repos list --group my-group --include-subgroups
kingfisher gitlab repos list --group my-group --gitlab-exclude my-group/**/legacy-*
```
## Scanning Azure Repos
### Scan Azure DevOps organization or collection (requires `KF_AZURE_TOKEN` or `KF_AZURE_PAT`)
```bash
kingfisher scan --azure-organization my-org
# Azure DevOps Server example
KF_AZURE_PAT="pat" kingfisher scan --azure-organization DefaultCollection --azure-base-url https://ado.internal.example/tfs/
```
### Scan specific Azure DevOps projects
Projects are specified as `ORGANIZATION/PROJECT`. Repeat the flag for multiple projects.
```bash
kingfisher scan --azure-project my-org/payments --azure-project my-org/core-platform
```
### Skip specific Azure repositories during enumeration
Repeat `--azure-exclude` to ignore repositories when scanning organizations or projects.
Use identifiers like `ORGANIZATION/PROJECT/REPOSITORY` or gitignore-style patterns such as
`my-org/*/archive-*`.
```bash
kingfisher scan --azure-organization my-org \
--azure-exclude my-org/payments/legacy-service \
--azure-exclude my-org/**/archive-*
```
### List Azure repositories
```bash
kingfisher azure repos list --organization my-org
# list repositories for specific projects
kingfisher azure repos list --project my-org/app --project my-org/api
# skip specific repositories while listing (supports glob patterns)
kingfisher azure repos list --organization my-org --azure-exclude my-org/**/experimental-*
```
## Scanning Gitea
### Scan Gitea organization (requires `KF_GITEA_TOKEN`)
@ -769,6 +810,8 @@ KF_SLACK_TOKEN="xoxp-1234..." kingfisher scan \
| `KF_GITLAB_TOKEN` | GitLab Personal Access Token |
| `KF_GITEA_TOKEN` | Gitea Personal Access Token |
| `KF_GITEA_USERNAME` | Username for private Gitea clones (used with `KF_GITEA_TOKEN`) |
| `KF_AZURE_TOKEN` / `KF_AZURE_PAT` | Azure DevOps Personal Access Token |
| `KF_AZURE_USERNAME` | Username to use with Azure DevOps PATs (defaults to `pat` when unset) |
| `KF_BITBUCKET_USERNAME` | Bitbucket username for basic authentication |
| `KF_BITBUCKET_APP_PASSWORD` / `KF_BITBUCKET_TOKEN` | Bitbucket app password or server token |
| `KF_BITBUCKET_OAUTH_TOKEN` | Bitbucket OAuth or PAT token |

View file

@ -1,13 +1,27 @@
rules:
- name: Azure DevOps Personal Access Token
- name: Azure DevOps Organization
id: kingfisher.azure.devops.1
pattern: |
(?xi)
\b
azure
(?:.|[\n\r]){0,32}?
dev\.azure\.com/
(
[a-z0-9]{75}AZDO[a-z0-9]{5}
[a-z0-9][a-z0-9-]{0,61}[a-z0-9]
)
confidence: medium
min_entropy: 2.5
visible: false
examples:
- https://dev.azure.com/contoso
- dev.azure.com/somebody123
- name: Azure DevOps Personal Access Token
id: kingfisher.azure.devops.2
pattern: |
(?xi)
\b
(
[a-z0-9]{75,76}AZDO[a-z0-9]{4,5}
)
\b
min_entropy: 3
@ -17,16 +31,20 @@ rules:
references:
- https://learn.microsoft.com/en-us/rest/api/azure/devops/profile/profiles/get?view=azure-devops-rest-7.1&tabs=HTTP
- https://learn.microsoft.com/en-us/azure/devops/release-notes/2024/general/sprint-241-update
depends_on_rule:
- rule_id: kingfisher.azure.devops.1
variable: AZURE_DEVOPS_ORG
validation:
type: Http
content:
request:
headers:
Authorization: 'Basic {{ ":" | append: TOKEN | b64enc }}'
Accept: application/json
method: GET
url: https://app.vssps.visualstudio.com/_apis/profile/profiles/me?api-version=7.1-preview.1
url: "https://dev.azure.com/{{ AZURE_DEVOPS_ORG | split: '/' | last }}/_apis/projects?api-version=7.1-preview.1"
response_matcher:
- report_response: true
- type: StatusMatch
status:
- 200
- 200

576
src/azure.rs Normal file
View file

@ -0,0 +1,576 @@
use std::{
collections::{HashMap, HashSet},
env,
path::{Path, PathBuf},
sync::{Arc, Mutex},
time::Duration,
};
// NOTE: We continue to issue the small number of Azure DevOps Git REST calls we need
// directly through `reqwest` instead of depending on the `azure_devops_rust_api`
// crate. The SDK does not yet expose stable coverage for wiki repositories or the
// preview API surfaces we rely on, while the raw requests keep the binary lean and
// let us opt into newer API versions as Microsoft rolls them out.
use anyhow::{anyhow, Context, Result};
use globset::{Glob, GlobSet, GlobSetBuilder};
use indicatif::{ProgressBar, ProgressStyle};
use serde::Deserialize;
use tracing::warn;
use url::{form_urlencoded, Url};
use crate::{findings_store, git_url::GitUrl};
const API_VERSION: &str = "7.1-preview.1";
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RepoType {
All,
Source,
Fork,
}
impl RepoType {
fn allows(self, is_fork: bool) -> bool {
match self {
RepoType::All => true,
RepoType::Source => !is_fork,
RepoType::Fork => is_fork,
}
}
}
#[derive(Debug, Clone)]
pub struct RepoSpecifiers {
pub organization: Vec<String>,
pub project: Vec<String>,
pub all_projects: bool,
pub repo_filter: RepoType,
pub exclude_repos: Vec<String>,
}
impl RepoSpecifiers {
pub fn is_empty(&self) -> bool {
self.organization.is_empty() && self.project.is_empty()
}
}
#[derive(Debug)]
struct ExcludeMatcher {
exact: HashSet<String>,
globs: Option<GlobSet>,
}
impl ExcludeMatcher {
fn matches(&self, name: &str) -> bool {
if self.exact.contains(name) {
return true;
}
if let Some(globs) = &self.globs {
return globs.is_match(name);
}
false
}
fn is_empty(&self) -> bool {
self.exact.is_empty() && self.globs.is_none()
}
}
fn looks_like_glob(pattern: &str) -> bool {
pattern.contains('*') || pattern.contains('?') || pattern.contains('[')
}
fn encode_segment(segment: &str) -> String {
form_urlencoded::byte_serialize(segment.as_bytes()).collect::<String>()
}
fn normalize_repo_identifier(parts: &[String]) -> Option<String> {
if parts.len() < 3 {
return None;
}
let repo = parts.last()?.trim().trim_matches('/');
let project = parts.get(parts.len() - 2)?.trim().trim_matches('/');
if repo.is_empty() || project.is_empty() {
return None;
}
let owner_segments = &parts[..parts.len() - 2];
let mut normalized: Vec<String> =
owner_segments.iter().map(|s| s.trim().trim_matches('/').to_lowercase()).collect();
normalized.retain(|s| !s.is_empty());
normalized.push(project.to_lowercase());
normalized.push(repo.trim_end_matches(".git").to_lowercase());
if normalized.is_empty() {
None
} else {
Some(normalized.join("/"))
}
}
fn parse_repo_identifier_from_path(path: &str) -> Option<String> {
let segments: Vec<String> = path
.trim_matches('/')
.split('/')
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
.collect();
if segments.len() < 3 {
return None;
}
// Case 1: Azure URL-style with "_git" marker: .../<project>/_git/<repo>
if segments[segments.len().saturating_sub(2)] == "_git" {
let mut trimmed = segments.clone();
let repo = trimmed.pop()?; // <repo>
trimmed.pop()?; // drop "_git"
trimmed.push(repo); // .../<project>/<repo>
return normalize_repo_identifier(&trimmed);
}
// Case 2: Simple path (and glob-friendly): .../<project>/<repo>
// Accept as-is so things like "org/*/repo" work.
normalize_repo_identifier(&segments)
}
fn parse_repo_identifier_from_url(remote_url: &str) -> Option<String> {
let url = Url::parse(remote_url).ok()?;
if let Some(path) = url.path_segments() {
let segments: Vec<String> =
path.filter(|segment| !segment.is_empty()).map(|segment| segment.to_string()).collect();
if segments.len() < 3 {
return None;
}
let mut trimmed = segments.clone();
let repo = trimmed.pop()?;
let marker = trimmed.pop()?;
if marker != "_git" {
return None;
}
trimmed.push(repo);
normalize_repo_identifier(&trimmed)
} else {
None
}
}
fn parse_excluded_repo(raw: &str) -> Option<String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
if let Some(name) = parse_repo_identifier_from_url(trimmed) {
return Some(name);
}
if let Some(idx) = trimmed.rfind(':') {
if let Some(name) = parse_repo_identifier_from_path(&trimmed[idx + 1..]) {
return Some(name);
}
}
parse_repo_identifier_from_path(trimmed)
}
fn build_exclude_matcher(exclude_repos: &[String]) -> ExcludeMatcher {
let mut exact = HashSet::new();
let mut glob_builder = GlobSetBuilder::new();
let mut has_glob = false;
for raw in exclude_repos {
match parse_excluded_repo(raw) {
Some(name) => {
if looks_like_glob(&name) {
match Glob::new(&name) {
Ok(glob) => {
glob_builder.add(glob);
has_glob = true;
}
Err(err) => {
warn!("Ignoring invalid Azure exclusion pattern '{raw}': {err}");
exact.insert(name);
}
}
} else {
exact.insert(name);
}
}
None => {
warn!("Ignoring invalid Azure exclusion '{raw}' (expected organization/project/repository)");
}
}
}
let globs = if has_glob {
match glob_builder.build() {
Ok(set) => Some(set),
Err(err) => {
warn!("Failed to build Azure exclusion patterns: {err}");
None
}
}
} else {
None
};
ExcludeMatcher { exact, globs }
}
fn should_exclude_repo(repo_url: &str, excludes: &ExcludeMatcher) -> bool {
if excludes.is_empty() {
return false;
}
if let Some(name) = parse_repo_identifier_from_url(repo_url) {
return excludes.matches(&name);
}
false
}
#[derive(Debug, Deserialize, Default)]
struct AzureRepository {
#[serde(rename = "remoteUrl")]
remote_url: Option<String>,
#[serde(rename = "webUrl")]
web_url: Option<String>,
#[serde(rename = "isFork", default)]
is_fork: bool,
#[serde(default)]
project: AzureProjectRef,
}
#[derive(Debug, Deserialize, Default)]
struct AzureProjectRef {
name: Option<String>,
}
#[derive(Debug, Deserialize, Default)]
struct AzureListResponse<T> {
value: Vec<T>,
}
struct AzureAuth {
username: Option<String>,
token: Option<String>,
}
impl AzureAuth {
fn from_environment() -> Self {
let token = env::var("KF_AZURE_TOKEN").or_else(|_| env::var("KF_AZURE_PAT")).ok();
let username = env::var("KF_AZURE_USERNAME").ok();
Self { username, token }
}
fn apply(&self, request: reqwest::RequestBuilder) -> reqwest::RequestBuilder {
if let Some(token) = &self.token {
let username = self.username.as_deref().unwrap_or("pat");
request.basic_auth(username, Some(token))
} else {
request
}
}
}
fn sanitize_remote_url(raw: &str) -> Option<String> {
let mut url = Url::parse(raw).ok()?;
if !url.username().is_empty() {
url.set_username("").ok()?;
}
if url.password().is_some() {
url.set_password(None).ok()?;
}
Some(url.to_string())
}
async fn fetch_repositories_for_org(
client: &reqwest::Client,
base_url: &Url,
organization: &str,
auth: &AzureAuth,
) -> Result<Vec<AzureRepository>> {
let base = base_url.as_str().trim_end_matches('/');
let encoded_org = encode_segment(organization);
let url = format!("{base}/{encoded_org}/_apis/git/repositories?api-version={API_VERSION}");
let request = auth.apply(client.get(&url));
let response = request.send().await?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err(anyhow!(
"Azure Repos API request failed for organization '{organization}' ({status}): {body}"
));
}
let payload: AzureListResponse<AzureRepository> = response.json().await?;
Ok(payload.value)
}
fn parse_project_specifiers(projects: &[String]) -> HashMap<String, HashSet<String>> {
let mut map: HashMap<String, HashSet<String>> = HashMap::new();
for raw in projects {
let trimmed = raw.trim();
if trimmed.is_empty() {
continue;
}
let parts: Vec<&str> = trimmed.split('/').filter(|segment| !segment.is_empty()).collect();
if parts.len() < 2 {
warn!(
"Ignoring Azure project specifier '{raw}' (expected format ORGANIZATION/PROJECT)"
);
continue;
}
let project = parts.last().unwrap().to_lowercase();
let organization = parts[..parts.len() - 1].join("/").to_lowercase();
map.entry(organization).or_default().insert(project);
}
map
}
fn canonicalize_organizations(spec: &RepoSpecifiers) -> HashMap<String, String> {
let mut org_lookup: HashMap<String, String> = HashMap::new();
for org in &spec.organization {
let key = org.to_lowercase();
org_lookup.entry(key).or_insert_with(|| org.clone());
}
let project_map = parse_project_specifiers(&spec.project);
for (org_lower, _projects) in project_map {
org_lookup.entry(org_lower.clone()).or_insert(org_lower);
}
org_lookup
}
pub async fn enumerate_repo_urls(
repo_specifiers: &RepoSpecifiers,
base_url: Url,
ignore_certs: bool,
mut progress: Option<&mut ProgressBar>,
) -> Result<Vec<String>> {
let auth = AzureAuth::from_environment();
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(ignore_certs)
.timeout(Duration::from_secs(30))
.build()?;
let exclude_matcher = build_exclude_matcher(&repo_specifiers.exclude_repos);
let project_filters = parse_project_specifiers(&repo_specifiers.project);
let has_project_filters = !project_filters.is_empty();
let org_lookup = canonicalize_organizations(repo_specifiers);
if org_lookup.is_empty() {
return Ok(Vec::new());
}
let mut organizations: Vec<String> = org_lookup.values().cloned().collect();
organizations.sort();
organizations.dedup();
let mut repo_urls = Vec::new();
for org in organizations {
if let Some(pb) = &mut progress {
pb.set_message(format!("Fetching Azure repositories for {org}..."));
}
let repos =
fetch_repositories_for_org(&client, &base_url, &org, &auth).await.with_context(
|| format!("Failed to fetch repositories for Azure organization '{org}'"),
)?;
let org_key = org.to_lowercase();
let project_filter = project_filters.get(&org_key);
for repo in repos {
if !repo_specifiers.repo_filter.allows(repo.is_fork) {
continue;
}
let project_name = repo
.project
.name
.as_deref()
.map(|s| s.trim())
.filter(|s| !s.is_empty())
.unwrap_or("");
if !repo_specifiers.all_projects {
if let Some(filters) = project_filter {
if project_name.is_empty() || !filters.contains(&project_name.to_lowercase()) {
continue;
}
} else if has_project_filters
&& !repo_specifiers
.organization
.iter()
.any(|candidate| candidate.eq_ignore_ascii_case(&org))
{
// Organization derived solely from project filters without an explicit match
continue;
}
}
let remote = repo
.remote_url
.as_deref()
.or(repo.web_url.as_deref())
.ok_or_else(|| anyhow!("Missing remote URL for Azure repository"))?;
let sanitized = match sanitize_remote_url(remote) {
Some(url) => url,
None => {
warn!("Skipping Azure repository with unparsable URL: {remote}");
continue;
}
};
if should_exclude_repo(&sanitized, &exclude_matcher) {
continue;
}
repo_urls.push(sanitized);
}
}
repo_urls.sort();
repo_urls.dedup();
Ok(repo_urls)
}
pub async fn list_repositories(
base_url: Url,
ignore_certs: bool,
progress_enabled: bool,
organizations: &[String],
projects: &[String],
all_projects: bool,
exclude_repos: &[String],
repo_filter: RepoType,
) -> Result<()> {
let repo_specifiers = RepoSpecifiers {
organization: organizations.to_vec(),
project: projects.to_vec(),
all_projects,
repo_filter,
exclude_repos: exclude_repos.to_vec(),
};
if repo_specifiers.is_empty() {
anyhow::bail!("Provide at least one --organization or --project to enumerate Azure Repos");
}
let mut progress = if progress_enabled {
let style = ProgressStyle::with_template("{spinner} {msg} [{elapsed_precise}]")
.expect("progress bar style template should compile");
let pb = ProgressBar::new_spinner()
.with_style(style)
.with_message("Fetching Azure repositories");
pb.enable_steady_tick(Duration::from_millis(500));
pb
} else {
ProgressBar::hidden()
};
let repo_urls =
enumerate_repo_urls(&repo_specifiers, base_url, ignore_certs, Some(&mut progress)).await?;
for url in repo_urls {
println!("{}", url);
}
Ok(())
}
fn parse_repo(repo_url: &GitUrl) -> Option<Url> {
Url::parse(repo_url.as_str()).ok()
}
pub fn wiki_url(repo_url: &GitUrl) -> Option<GitUrl> {
let url = parse_repo(repo_url)?;
let mut segments: Vec<String> = url
.path_segments()
.map(|segments| segments.filter(|s| !s.is_empty()).map(|s| s.to_string()).collect())
.unwrap_or_default();
if segments.len() < 3 {
return None;
}
let mut repo_name = segments.pop()?;
if repo_name.ends_with(".wiki") {
return None;
}
let marker = segments.pop()?;
if marker != "_git" {
return None;
}
repo_name.push_str(".wiki");
segments.push("_git".to_string());
segments.push(repo_name);
let mut new_url = url.clone();
{
let mut path_segments = new_url.path_segments_mut().ok()?;
path_segments.clear();
for segment in segments {
path_segments.push(&segment);
}
}
GitUrl::try_from(new_url).ok()
}
pub async fn fetch_repo_items(
_repo_url: &GitUrl,
_ignore_certs: bool,
_output_root: &Path,
_datastore: &Arc<Mutex<findings_store::FindingsStore>>,
) -> Result<Vec<PathBuf>> {
// Azure DevOps exposes work items and wiki content via additional APIs. For now we
// skip fetching extra artifacts and simply return an empty set so callers can rely
// on the function existing just like the other git host modules.
Ok(Vec::new())
}
#[cfg(test)]
mod tests {
use super::*;
use std::str::FromStr;
#[test]
fn sanitize_remote_url_strips_username() {
let raw = "https://example@dev.azure.com/example/project/_git/repo";
let sanitized = sanitize_remote_url(raw).expect("sanitize");
assert_eq!(sanitized, "https://dev.azure.com/example/project/_git/repo");
}
#[test]
fn parse_repo_identifier_from_url_handles_basic_path() {
let remote = "https://dev.azure.com/org/project/_git/repo";
let ident = parse_repo_identifier_from_url(remote).expect("identifier");
assert_eq!(ident, "org/project/repo");
}
#[test]
fn parse_repo_identifier_from_url_handles_nested_org() {
let remote = "https://ado.example.com/collection/team/project/_git/repo";
let ident = parse_repo_identifier_from_url(remote).expect("identifier");
assert_eq!(ident, "collection/team/project/repo");
}
#[test]
fn parse_excluded_repo_accepts_url() {
let raw = "https://dev.azure.com/org/project/_git/repo";
let ident = parse_excluded_repo(raw).expect("identifier");
assert_eq!(ident, "org/project/repo");
}
#[test]
fn parse_excluded_repo_accepts_path() {
let raw = "org/project/repo";
let ident = parse_excluded_repo(raw).expect("identifier");
assert_eq!(ident, "org/project/repo");
}
#[test]
fn exclude_matcher_matches_glob() {
let matcher = build_exclude_matcher(&["org/*/repo".to_string()]);
assert!(should_exclude_repo("https://dev.azure.com/org/project/_git/repo", &matcher));
}
#[test]
fn wiki_url_appends_suffix() {
let url = GitUrl::from_str("https://dev.azure.com/org/project/_git/repo").unwrap();
let wiki = wiki_url(&url).expect("wiki url");
assert_eq!(wiki.as_str(), "https://dev.azure.com/org/project/_git/repo.wiki");
}
}

98
src/cli/commands/azure.rs Normal file
View file

@ -0,0 +1,98 @@
use clap::{Args, Subcommand, ValueEnum, ValueHint};
use strum_macros::Display;
use url::Url;
use crate::cli::commands::output::OutputArgs;
#[derive(Args, Debug)]
pub struct AzureArgs {
#[command(subcommand)]
pub command: AzureCommand,
/// Override Azure DevOps base URL (e.g. for Azure DevOps Server)
#[arg(global = true, long, default_value = "https://dev.azure.com/", value_hint = ValueHint::Url)]
pub azure_base_url: Url,
}
#[derive(Subcommand, Debug)]
pub enum AzureCommand {
/// Interact with Azure DevOps repositories
#[command(subcommand)]
Repos(AzureReposCommand),
}
#[derive(Subcommand, Debug)]
pub enum AzureReposCommand {
/// List repositories for organizations or projects
List(AzureReposListArgs),
}
#[derive(Args, Debug, Clone)]
pub struct AzureReposListArgs {
#[command(flatten)]
pub repo_specifiers: AzureRepoSpecifiers,
#[command(flatten)]
pub output_args: OutputArgs<AzureOutputFormat>,
}
#[derive(Args, Debug, Clone)]
pub struct AzureRepoSpecifiers {
/// Repositories belonging to these Azure DevOps organizations or collections
#[arg(long = "azure-organization", alias = "organization", value_name = "ORGANIZATION")]
pub organization: Vec<String>,
/// Repositories belonging to the specified Azure DevOps projects (format: ORGANIZATION/PROJECT)
#[arg(long = "azure-project", alias = "project", value_name = "ORGANIZATION/PROJECT")]
pub project: Vec<String>,
/// Include repositories from all projects within the specified organizations
#[arg(long = "azure-all-projects", alias = "all-azure-projects")]
pub all_projects: bool,
/// Skip repositories when enumerating Azure sources (format: ORGANIZATION/PROJECT/REPOSITORY)
#[arg(
long = "azure-exclude",
alias = "azure-exclude-repo",
value_name = "ORGANIZATION/PROJECT/REPOSITORY"
)]
pub exclude_repos: Vec<String>,
/// Filter by repository type
#[arg(long = "azure-repo-type", default_value_t = AzureRepoType::Source)]
pub repo_type: AzureRepoType,
}
impl AzureRepoSpecifiers {
pub fn is_empty(&self) -> bool {
self.organization.is_empty() && self.project.is_empty()
}
}
#[derive(Copy, Clone, Debug, Display, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
#[strum(serialize_all = "kebab-case")]
pub enum AzureRepoType {
Source,
Fork,
All,
}
impl From<AzureRepoType> for crate::azure::RepoType {
fn from(value: AzureRepoType) -> Self {
match value {
AzureRepoType::Source => crate::azure::RepoType::Source,
AzureRepoType::Fork => crate::azure::RepoType::Fork,
AzureRepoType::All => crate::azure::RepoType::All,
}
}
}
#[derive(Copy, Clone, Debug, ValueEnum, Display)]
#[strum(serialize_all = "kebab-case")]
pub enum AzureOutputFormat {
Pretty,
Json,
Jsonl,
Bson,
Sarif,
}

View file

@ -5,6 +5,7 @@ use url::Url;
use crate::{
cli::commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -30,11 +31,14 @@ pub struct InputSpecifierArgs {
"bitbucket_user",
"bitbucket_workspace",
"bitbucket_project",
"azure_organization",
"azure_project",
"git_url",
"all_github_organizations",
"all_gitlab_groups",
"all_gitea_organizations",
"all_bitbucket_workspaces",
"all_azure_projects",
"jira_url",
"confluence_url",
"docker_image",
@ -176,6 +180,38 @@ pub struct InputSpecifierArgs {
#[command(flatten)]
pub bitbucket_auth: BitbucketAuthArgs,
// Azure DevOps Options
/// Scan repositories belonging to the specified Azure DevOps organizations or collections
#[arg(long = "azure-organization")]
pub azure_organization: Vec<String>,
/// Scan repositories belonging to the specified Azure DevOps projects (format: ORGANIZATION/PROJECT)
#[arg(long = "azure-project", value_name = "ORGANIZATION/PROJECT")]
pub azure_project: Vec<String>,
/// Skip repositories when enumerating Azure Repos sources (format: ORGANIZATION/PROJECT/REPOSITORY)
#[arg(
long = "azure-exclude",
alias = "azure-exclude-repo",
value_name = "ORGANIZATION/PROJECT/REPOSITORY"
)]
pub azure_exclude: Vec<String>,
/// Include repositories from every project within the specified Azure organizations
#[arg(long = "all-azure-projects")]
pub all_azure_projects: bool,
/// Use the specified base URL for Azure DevOps (e.g. Azure DevOps Server)
#[arg(
long = "azure-base-url",
default_value = "https://dev.azure.com/",
value_hint = ValueHint::Url
)]
pub azure_base_url: Url,
#[arg(long = "azure-repo-type", default_value_t = AzureRepoType::Source)]
pub azure_repo_type: AzureRepoType,
/// Jira base URL (e.g. https://jira.example.com)
#[arg(long, value_hint = ValueHint::Url, requires = "jql")]
pub jira_url: Option<Url>,

View file

@ -1,3 +1,4 @@
pub mod azure;
pub mod bitbucket;
pub mod gitea;
pub mod github;

View file

@ -7,8 +7,8 @@ use sysinfo::{MemoryRefreshKind, RefreshKind, System};
use tracing::Level;
use crate::cli::commands::{
bitbucket::BitbucketArgs, gitea::GiteaArgs, github::GitHubArgs, gitlab::GitLabArgs,
rules::RulesArgs, scan::ScanArgs,
azure::AzureArgs, bitbucket::BitbucketArgs, gitea::GiteaArgs, github::GitHubArgs,
gitlab::GitLabArgs, rules::RulesArgs, scan::ScanArgs,
};
#[deny(missing_docs)]
@ -77,6 +77,10 @@ pub enum Command {
#[command(name = "bitbucket")]
Bitbucket(BitbucketArgs),
/// Interact with the Azure DevOps API
#[command(name = "azure")]
Azure(AzureArgs),
/// Manage rules
#[command(alias = "rule")]
Rules(RulesArgs),

View file

@ -1,3 +1,4 @@
pub mod azure;
pub mod baseline;
pub mod binary;
pub mod bitbucket;

View file

@ -33,7 +33,7 @@ use std::{
use anyhow::{Context, Result};
use kingfisher::{
bitbucket,
azure, bitbucket,
cli::{
self,
commands::{
@ -71,6 +71,7 @@ use tracing_subscriber::{
use url::Url;
use crate::cli::commands::{
azure::{AzureCommand, AzureRepoType, AzureReposCommand},
bitbucket::{BitbucketAuthArgs, BitbucketCommand, BitbucketRepoType, BitbucketReposCommand},
gitea::{GiteaCommand, GiteaRepoType, GiteaReposCommand},
gitlab::{GitLabCommand, GitLabRepoType, GitLabReposCommand},
@ -91,6 +92,7 @@ fn main() -> anyhow::Result<()> {
Command::GitLab(_) => num_cpus::get(), // Default for GitLab commands
Command::Bitbucket(_) => num_cpus::get(), // Default for Bitbucket commands
Command::Gitea(_) => num_cpus::get(), // Default for Gitea commands
Command::Azure(_) => num_cpus::get(), // Default for Azure commands
Command::Rules(_) => num_cpus::get(), // Default for Rules commands
};
@ -267,6 +269,23 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
}
},
},
Command::Azure(azure_args) => match azure_args.command {
AzureCommand::Repos(repos_command) => match repos_command {
AzureReposCommand::List(list_args) => {
azure::list_repositories(
azure_args.azure_base_url.clone(),
global_args.ignore_certs,
global_args.use_progress(),
&list_args.repo_specifiers.organization,
&list_args.repo_specifiers.project,
list_args.repo_specifiers.all_projects,
&list_args.repo_specifiers.exclude_repos,
list_args.repo_specifiers.repo_type.into(),
)
.await?;
}
},
},
Command::Gitea(gitea_args) => match gitea_args.command {
GiteaCommand::Repos(repos_command) => match repos_command {
GiteaReposCommand::List(list_args) => {
@ -364,6 +383,13 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -667,6 +667,7 @@ mod tests {
cli::commands::output::OutputArgs,
cli::commands::scan::{ConfidenceLevel, ScanArgs},
cli::commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -789,6 +790,12 @@ mod tests {
bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -39,6 +39,7 @@ mod tests {
use crate::util::intern;
use crate::{
blob::BlobId,
cli::commands::azure::AzureRepoType,
cli::commands::bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
cli::commands::gitea::GiteaRepoType,
cli::commands::github::GitHubRepoType,
@ -109,6 +110,13 @@ mod tests {
bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
// Azure DevOps
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
// Jira options
jira_url: None,
jql: None,

View file

@ -2,7 +2,8 @@
pub(crate) use docker::save_docker_images;
pub(crate) use enumerate::enumerate_filesystem_inputs;
pub(crate) use repos::{
clone_or_update_git_repos, enumerate_bitbucket_repos, enumerate_github_repos,
clone_or_update_git_repos, enumerate_azure_repos, enumerate_bitbucket_repos,
enumerate_github_repos,
};
pub use runner::{load_and_record_rules, run_async_scan, run_scan};
pub(crate) use validation::run_secret_validation;

View file

@ -11,7 +11,7 @@ use url::Url;
use crate::blob::BlobIdMap;
use crate::{
bitbucket,
azure, bitbucket,
blob::BlobMetadata,
cli::{
commands::{github::GitCloneMode, github::GitHistoryMode, scan},
@ -370,6 +370,69 @@ pub async fn enumerate_bitbucket_repos(
Ok(repo_urls)
}
pub async fn enumerate_azure_repos(
args: &scan::ScanArgs,
global_args: &global::GlobalArgs,
) -> Result<Vec<GitUrl>> {
let repo_specifiers = azure::RepoSpecifiers {
organization: args.input_specifier_args.azure_organization.clone(),
project: args.input_specifier_args.azure_project.clone(),
all_projects: args.input_specifier_args.all_azure_projects,
repo_filter: args.input_specifier_args.azure_repo_type.into(),
exclude_repos: args.input_specifier_args.azure_exclude.clone(),
};
let mut repo_urls = args.input_specifier_args.git_url.clone();
if !repo_specifiers.is_empty() {
let mut progress = if global_args.use_progress() {
let style =
ProgressStyle::with_template("{spinner} {msg} {human_len} [{elapsed_precise}]")
.expect("progress bar style template should compile");
let pb = ProgressBar::new_spinner()
.with_style(style)
.with_message("Enumerating Azure Repos repositories...");
pb.enable_steady_tick(Duration::from_millis(500));
pb
} else {
ProgressBar::hidden()
};
let mut num_found: u64 = 0;
let base_url = args.input_specifier_args.azure_base_url.clone();
let repo_strings = azure::enumerate_repo_urls(
&repo_specifiers,
base_url,
global_args.ignore_certs,
Some(&mut progress),
)
.await
.context("Failed to enumerate Azure repositories")?;
for repo_string in repo_strings {
match GitUrl::from_str(&repo_string) {
Ok(repo_url) => {
repo_urls.push(repo_url);
num_found += 1;
}
Err(e) => {
progress.suspend(|| {
error!("Failed to parse repo URL from {repo_string}: {e}");
});
}
}
}
progress.finish_with_message(format!(
"Found {} repositories from Azure Repos",
HumanCount(num_found)
));
}
repo_urls.sort();
repo_urls.dedup();
Ok(repo_urls)
}
pub async fn fetch_jira_issues(
args: &scan::ScanArgs,
global_args: &global::GlobalArgs,
@ -519,6 +582,16 @@ pub async fn fetch_git_host_artifacts(
)
.await?,
);
} else if host.contains("dev.azure") || host.contains("visualstudio.com") {
dirs.extend(
azure::fetch_repo_items(
repo_url,
global_args.ignore_certs,
&output_root,
datastore,
)
.await?,
);
}
}
Ok(dirs)

View file

@ -7,7 +7,7 @@ use tokio::time::{Duration, Instant};
use tracing::{debug, error, error_span, info, trace};
use crate::{
bitbucket,
azure, bitbucket,
cli::{commands::scan, global},
findings_store,
findings_store::{FindingsStore, FindingsStoreMessage},
@ -20,8 +20,8 @@ use crate::{
rules_database::RulesDatabase,
safe_list,
scanner::{
clone_or_update_git_repos, enumerate_bitbucket_repos, enumerate_filesystem_inputs,
enumerate_github_repos,
clone_or_update_git_repos, enumerate_azure_repos, enumerate_bitbucket_repos,
enumerate_filesystem_inputs, enumerate_github_repos,
repos::{
enumerate_gitea_repos, enumerate_gitlab_repos, fetch_confluence_pages,
fetch_git_host_artifacts, fetch_jira_issues, fetch_s3_objects, fetch_slack_messages,
@ -75,11 +75,13 @@ pub async fn run_async_scan(
let gitlab_repo_urls = enumerate_gitlab_repos(args, global_args).await?;
let gitea_repo_urls = enumerate_gitea_repos(args, global_args).await?;
let bitbucket_repo_urls = enumerate_bitbucket_repos(args, global_args).await?;
let azure_repo_urls = enumerate_azure_repos(args, global_args).await?;
// Combine repository URLs
repo_urls.extend(gitlab_repo_urls);
repo_urls.extend(gitea_repo_urls);
repo_urls.extend(bitbucket_repo_urls);
repo_urls.extend(azure_repo_urls);
repo_urls.sort();
repo_urls.dedup();
@ -99,6 +101,9 @@ pub async fn run_async_scan(
if let Some(w) = bitbucket::wiki_url(url) {
wiki_urls.push(w);
}
if let Some(w) = azure::wiki_url(url) {
wiki_urls.push(w);
}
}
repo_urls.extend(wiki_urls);
repo_urls.sort();

View file

@ -7,6 +7,7 @@ use anyhow::Result;
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -85,6 +86,12 @@ fn run_skiplist(skip_regex: Vec<String>, skip_skipword: Vec<String>) -> Result<u
bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -75,7 +75,6 @@ fn detects_base64_in_code_with_tree_sitter() -> anyhow::Result<()> {
"scan",
dir.path().to_str().unwrap(),
"--no-binary",
"--no-base64",
"--confidence=low",
"--format",
"json",

View file

@ -7,6 +7,7 @@ use anyhow::{Context, Result};
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -83,6 +84,13 @@ fn test_bitbucket_remote_scan() -> Result<()> {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/")?,
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -11,6 +11,7 @@ use anyhow::Result;
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -100,6 +101,13 @@ rules:
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -8,6 +8,7 @@ use anyhow::{Context, Result};
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -87,6 +88,13 @@ fn test_github_remote_scan() -> Result<()> {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -8,6 +8,7 @@ use anyhow::{Context, Result};
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -86,6 +87,13 @@ fn test_gitlab_remote_scan() -> Result<()> {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/")?,
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,
@ -216,6 +224,13 @@ fn test_gitlab_remote_scan_no_history() -> Result<()> {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/")?,
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -8,6 +8,7 @@ use anyhow::Result;
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -68,6 +69,12 @@ async fn test_redact_hashes_finding_values() -> Result<()> {
bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -7,6 +7,7 @@ use anyhow::Result;
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -75,6 +76,12 @@ impl TestContext {
bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,
@ -191,6 +198,12 @@ async fn test_scan_slack_messages() -> Result<()> {
bitbucket_api_url: Url::parse("https://api.bitbucket.org/2.0/").unwrap(),
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -11,6 +11,7 @@ use anyhow::Result;
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -143,6 +144,13 @@ async fn test_validation_cache_and_depvars() -> Result<()> {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,

View file

@ -9,6 +9,7 @@ use anyhow::{Context, Result};
use kingfisher::{
cli::{
commands::{
azure::AzureRepoType,
bitbucket::{BitbucketAuthArgs, BitbucketRepoType},
gitea::GiteaRepoType,
github::{GitCloneMode, GitHistoryMode, GitHubRepoType},
@ -86,6 +87,13 @@ impl TestContext {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,
@ -189,6 +197,13 @@ impl TestContext {
bitbucket_repo_type: BitbucketRepoType::Source,
bitbucket_auth: BitbucketAuthArgs::default(),
azure_organization: Vec::new(),
azure_project: Vec::new(),
azure_exclude: Vec::new(),
all_azure_projects: false,
azure_base_url: Url::parse("https://dev.azure.com/").unwrap(),
azure_repo_type: AzureRepoType::Source,
jira_url: None,
jql: None,
confluence_url: None,