kingfisher/src/gitea.rs
2025-09-23 13:07:45 -07:00

440 lines
12 KiB
Rust

use std::{collections::HashSet, env, str::FromStr, time::Duration};
use anyhow::{anyhow, Result};
use globset::{Glob, GlobSet, GlobSetBuilder};
use indicatif::{ProgressBar, ProgressStyle};
use reqwest::StatusCode;
use serde::Deserialize;
use tracing::warn;
use url::Url;
use crate::{git_url::GitUrl, validation::GLOBAL_USER_AGENT};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RepoType {
All,
Source,
Fork,
}
impl RepoType {
fn allows(self, is_fork: bool) -> bool {
match self {
RepoType::All => true,
RepoType::Source => !is_fork,
RepoType::Fork => is_fork,
}
}
}
#[derive(Debug, Clone)]
pub struct RepoSpecifiers {
pub user: Vec<String>,
pub organization: Vec<String>,
pub all_organizations: bool,
pub repo_filter: RepoType,
pub exclude_repos: Vec<String>,
}
impl RepoSpecifiers {
pub fn is_empty(&self) -> bool {
self.user.is_empty() && self.organization.is_empty() && !self.all_organizations
}
}
#[derive(Debug, Deserialize)]
struct GiteaRepository {
full_name: String,
clone_url: String,
#[serde(default)]
fork: bool,
}
#[derive(Debug, Deserialize)]
struct GiteaOrganization {
username: String,
}
struct ExcludeMatcher {
exact: HashSet<String>,
globs: Option<GlobSet>,
}
impl ExcludeMatcher {
fn matches(&self, name: &str) -> bool {
if self.exact.contains(name) {
return true;
}
if let Some(globs) = &self.globs {
return globs.is_match(name);
}
false
}
fn is_empty(&self) -> bool {
self.exact.is_empty() && self.globs.is_none()
}
}
fn looks_like_glob(pattern: &str) -> bool {
pattern.contains('*') || pattern.contains('?') || pattern.contains('[')
}
fn normalize_repo_identifier(raw: &str) -> Option<String> {
let trimmed = raw.trim().trim_matches('/');
if trimmed.is_empty() {
return None;
}
let without_git = trimmed.strip_suffix(".git").unwrap_or(trimmed);
let mut parts = without_git.split('/').filter(|segment| !segment.is_empty());
let owner = parts.next()?;
let repo = parts.next()?;
Some(format!("{}/{}", owner.to_lowercase(), repo.to_lowercase()))
}
fn parse_excluded_repo(raw: &str) -> Option<String> {
let trimmed = raw.trim();
if trimmed.is_empty() {
return None;
}
if let Ok(url) = Url::parse(trimmed) {
if let Some(name) = normalize_repo_identifier(url.path()) {
return Some(name);
}
}
if let Some(idx) = trimmed.rfind(':') {
if let Some(name) = normalize_repo_identifier(&trimmed[idx + 1..]) {
return Some(name);
}
}
normalize_repo_identifier(trimmed)
}
fn build_exclude_matcher(excludes: &[String]) -> ExcludeMatcher {
let mut exact = HashSet::new();
let mut glob_builder = GlobSetBuilder::new();
let mut has_glob = false;
for raw in excludes {
match parse_excluded_repo(raw) {
Some(name) => {
if looks_like_glob(&name) {
match Glob::new(&name) {
Ok(glob) => {
glob_builder.add(glob);
has_glob = true;
}
Err(err) => {
warn!("Ignoring invalid Gitea exclusion pattern '{raw}': {err}");
exact.insert(name);
}
}
} else {
exact.insert(name);
}
}
None => {
warn!("Ignoring invalid Gitea exclusion '{raw}' (expected owner/repo)");
}
}
}
let globs = if has_glob {
match glob_builder.build() {
Ok(set) => Some(set),
Err(err) => {
warn!("Failed to build Gitea exclusion patterns: {err}");
None
}
}
} else {
None
};
ExcludeMatcher { exact, globs }
}
fn should_exclude_repo(repo: &GiteaRepository, excludes: &ExcludeMatcher) -> bool {
if excludes.is_empty() {
return false;
}
excludes.matches(&repo.full_name.to_lowercase())
}
async fn fetch_paginated_repos(
client: &reqwest::Client,
token: Option<&str>,
mut url: Url,
repo_filter: RepoType,
excludes: &ExcludeMatcher,
progress: Option<&ProgressBar>,
) -> Result<Vec<String>> {
let mut page = 1u32;
let mut repos = Vec::new();
loop {
url.query_pairs_mut()
.clear()
.append_pair("page", &page.to_string())
.append_pair("limit", "50");
if let Some(pb) = progress {
pb.set_message(format!("Fetching Gitea repositories (page {page})"));
}
let mut req = client.get(url.clone()).header("User-Agent", GLOBAL_USER_AGENT.as_str());
if let Some(token) = token {
req = req.header("Authorization", format!("token {token}"));
}
let resp = req.send().await?;
match resp.status() {
StatusCode::OK => {}
StatusCode::NOT_FOUND => {
warn!("Gitea endpoint {} returned 404", url);
break;
}
status => {
return Err(anyhow!("Failed to fetch repositories from {} (status {status})", url));
}
}
let page_repos: Vec<GiteaRepository> = resp.json().await?;
if page_repos.is_empty() {
break;
}
for repo in page_repos {
if !repo_filter.allows(repo.fork) {
continue;
}
if should_exclude_repo(&repo, excludes) {
continue;
}
repos.push(repo.clone_url);
}
page += 1;
}
Ok(repos)
}
async fn fetch_user_repos(
client: &reqwest::Client,
token: Option<&str>,
api_url: &Url,
username: &str,
repo_filter: RepoType,
excludes: &ExcludeMatcher,
progress: Option<&ProgressBar>,
) -> Result<Vec<String>> {
let endpoint = format!("users/{}/repos", username);
let url = api_url.join(&endpoint)?;
fetch_paginated_repos(client, token, url, repo_filter, excludes, progress).await
}
async fn fetch_org_repos(
client: &reqwest::Client,
token: Option<&str>,
api_url: &Url,
org: &str,
repo_filter: RepoType,
excludes: &ExcludeMatcher,
progress: Option<&ProgressBar>,
) -> Result<Vec<String>> {
let endpoint = format!("orgs/{}/repos", org);
let url = api_url.join(&endpoint)?;
fetch_paginated_repos(client, token, url, repo_filter, excludes, progress).await
}
async fn fetch_authenticated_orgs(
client: &reqwest::Client,
token: Option<&str>,
api_url: &Url,
) -> Result<Vec<String>> {
let Some(token) = token else {
return Err(anyhow!("KF_GITEA_TOKEN must be set to enumerate all organizations"));
};
let url = api_url.join("user/orgs")?;
let resp = client
.get(url.clone())
.header("User-Agent", GLOBAL_USER_AGENT.as_str())
.header("Authorization", format!("token {token}"))
.send()
.await?;
match resp.status() {
StatusCode::OK => {}
StatusCode::NOT_FOUND => {
warn!("Gitea endpoint {} returned 404", url);
return Ok(Vec::new());
}
status => {
return Err(anyhow!(
"Failed to enumerate organizations from {} (status {status})",
url
));
}
}
let orgs: Vec<GiteaOrganization> = resp.json().await?;
Ok(orgs.into_iter().map(|org| org.username).collect())
}
pub async fn enumerate_repo_urls(
specifiers: &RepoSpecifiers,
api_url: Url,
ignore_certs: bool,
mut progress: Option<&mut ProgressBar>,
) -> Result<Vec<String>> {
let excludes = build_exclude_matcher(&specifiers.exclude_repos);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(30))
.danger_accept_invalid_certs(ignore_certs)
.build()?;
let token = env::var("KF_GITEA_TOKEN").ok().filter(|t| !t.is_empty());
let mut repos = Vec::new();
let mut seen = HashSet::new();
for user in &specifiers.user {
if let Some(pb) = progress.as_mut() {
pb.set_message(format!("Enumerating Gitea user {user}"));
}
match fetch_user_repos(
&client,
token.as_deref(),
&api_url,
user,
specifiers.repo_filter,
&excludes,
progress.as_deref(),
)
.await
{
Ok(mut urls) => {
for url in urls.drain(..) {
if seen.insert(url.clone()) {
repos.push(url);
}
}
}
Err(err) => {
warn!("Failed to enumerate Gitea repositories for user {user}: {err}");
}
}
}
let mut organizations = specifiers.organization.clone();
if specifiers.all_organizations {
match fetch_authenticated_orgs(&client, token.as_deref(), &api_url).await {
Ok(mut orgs) => organizations.append(&mut orgs),
Err(err) => warn!("Failed to enumerate Gitea organizations: {err}"),
}
}
organizations.sort();
organizations.dedup();
for org in organizations {
if let Some(pb) = progress.as_mut() {
pb.set_message(format!("Enumerating Gitea organization {org}"));
}
match fetch_org_repos(
&client,
token.as_deref(),
&api_url,
&org,
specifiers.repo_filter,
&excludes,
progress.as_deref(),
)
.await
{
Ok(mut urls) => {
for url in urls.drain(..) {
if seen.insert(url.clone()) {
repos.push(url);
}
}
}
Err(err) => {
warn!("Failed to enumerate Gitea repositories for organization {org}: {err}");
}
}
}
repos.sort();
repos.dedup();
Ok(repos)
}
pub async fn list_repositories(
api_url: Url,
ignore_certs: bool,
progress_enabled: bool,
users: &[String],
orgs: &[String],
all_orgs: bool,
exclude_repos: &[String],
repo_filter: RepoType,
) -> Result<()> {
let mut progress = if progress_enabled {
let style = ProgressStyle::with_template("{spinner} {msg} [{elapsed_precise}]")
.expect("progress bar style template should compile");
let pb = ProgressBar::new_spinner().with_style(style).with_message("Fetching repositories");
pb.enable_steady_tick(Duration::from_millis(500));
pb
} else {
ProgressBar::hidden()
};
let specifiers = RepoSpecifiers {
user: users.to_vec(),
organization: orgs.to_vec(),
all_organizations: all_orgs,
repo_filter,
exclude_repos: exclude_repos.to_vec(),
};
let urls = enumerate_repo_urls(&specifiers, api_url, ignore_certs, Some(&mut progress)).await?;
for url in urls {
println!("{}", url);
}
progress.finish_and_clear();
Ok(())
}
fn parse_repo(repo_url: &GitUrl) -> Option<(String, String, String)> {
let url = Url::parse(repo_url.as_str()).ok()?;
let host = url.host_str()?.to_string();
let mut segments = url.path_segments()?;
let owner = segments.next()?.to_string();
let mut repo = segments.next()?.to_string();
if let Some(stripped) = repo.strip_suffix(".git") {
repo = stripped.to_string();
}
Some((host, owner, repo))
}
pub fn wiki_url(repo_url: &GitUrl) -> Option<GitUrl> {
let (host, owner, repo) = parse_repo(repo_url)?;
let url = format!("https://{host}/{owner}/{repo}.wiki.git");
GitUrl::from_str(&url).ok()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_excluded_repo_variants() {
assert_eq!(parse_excluded_repo("Owner/Repo").as_deref(), Some("owner/repo"));
assert_eq!(
parse_excluded_repo("https://gitea.example.com/Owner/Repo.git").as_deref(),
Some("owner/repo")
);
assert_eq!(
parse_excluded_repo("ssh://git@example.com:3000/Owner/Repo.git").as_deref(),
Some("owner/repo")
);
}
#[test]
fn normalize_repo_identifier_handles_git_suffix() {
assert_eq!(normalize_repo_identifier("owner/repo.git"), Some("owner/repo".into()));
}
}