feat(gitea): add --clone-url-base flag for clone URL rewriting
Some checks failed
ClusterFuzzLite PR fuzzing / PR (address) (pull_request) Has been cancelled
CI Pull Request / Linux x64 (pull_request) Has been cancelled
CI Pull Request / Linux arm64 (pull_request) Has been cancelled
CI Pull Request / macOS arm64 (pull_request) Has been cancelled
CI Pull Request / Windows arm64 (pull_request) Has been cancelled
CI Pull Request / Windows x64 (pull_request) Has been cancelled

When scanning a self-hosted Gitea/Forgejo instance, the API may be
reachable at a different hostname than the git clone endpoint (e.g.,
internal API vs. public clone URL behind a reverse proxy). The
--clone-url-base flag rewrites the scheme, host, and port of clone
URLs returned by the API, preserving the path.

Example:
  kingfisher scan gitea \
    --api-url https://forge.internal.example.com/api/v1/ \
    --clone-url-base https://forge.internal.example.com/ \
    --user eblume

This avoids routing clone traffic through an external proxy when the
API and git endpoints share the same internal host but the instance's
ROOT_URL points to the public endpoint.

Includes unit tests for the URL rewriting function and an integration
test using wiremock to verify the full enumeration path.
This commit is contained in:
Erich Blume 2026-03-29 00:16:28 -07:00
commit 677c7a5d5f
9 changed files with 153 additions and 3 deletions

View file

@ -182,6 +182,10 @@ pub struct InputSpecifierArgs {
)]
pub gitea_api_url: Url,
/// Override base URL for cloning Gitea repositories
#[arg(long, value_hint = ValueHint::Url, hide = true)]
pub gitea_clone_url_base: Option<Url>,
#[arg(long, default_value_t = GiteaRepoType::Source, hide = true)]
pub gitea_repo_type: GiteaRepoType,

View file

@ -284,7 +284,7 @@ pub enum ScanOperation {
pub enum ListRepositoriesCommand {
Github { api_url: Url, specifiers: GitHubRepoSpecifiers },
Gitlab { api_url: Url, specifiers: GitLabRepoSpecifiers },
Gitea { api_url: Url, specifiers: GiteaRepoSpecifiers },
Gitea { api_url: Url, clone_url_base: Option<Url>, specifiers: GiteaRepoSpecifiers },
Bitbucket { api_url: Url, specifiers: BitbucketRepoSpecifiers },
Azure { base_url: Url, specifiers: AzureRepoSpecifiers },
Huggingface { specifiers: HuggingFaceRepoSpecifiers },
@ -396,6 +396,7 @@ impl ScanCommandArgs {
if args.list_only {
Some(ListRepositoriesCommand::Gitea {
api_url: args.api_url,
clone_url_base: args.clone_url_base,
specifiers: args.specifiers,
})
} else {
@ -408,6 +409,8 @@ impl ScanCommandArgs {
args.specifiers.all_organizations;
scan_args.input_specifier_args.gitea_repo_type = args.specifiers.repo_type;
scan_args.input_specifier_args.gitea_api_url = args.api_url;
scan_args.input_specifier_args.gitea_clone_url_base =
args.clone_url_base;
None
}
}
@ -741,6 +744,15 @@ pub struct GiteaScanArgs {
value_hint = ValueHint::Url
)]
pub api_url: Url,
/// Override the base URL used for cloning repositories.
///
/// By default, clone URLs returned by the Gitea/Forgejo API are used as-is.
/// When the API is reachable at a different hostname than the git clone
/// endpoint (e.g., internal API vs. public clone URL), use this flag to
/// rewrite the scheme, host, and port of clone URLs.
#[arg(long = "clone-url-base", value_hint = ValueHint::Url)]
pub clone_url_base: Option<Url>,
}
#[derive(Args, Debug, Clone)]

View file

@ -919,6 +919,7 @@ pub(crate) fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs
gitea_exclude: Vec::new(),
all_gitea_organizations: false,
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
gitea_clone_url_base: None,
gitea_repo_type: GiteaRepoType::Source,
bitbucket_user: Vec::new(),
bitbucket_workspace: Vec::new(),

View file

@ -212,6 +212,7 @@ async fn fetch_authenticated_orgs(
pub async fn enumerate_repo_urls(
specifiers: &RepoSpecifiers,
api_url: Url,
clone_url_base: Option<&Url>,
ignore_certs: bool,
mut progress: Option<&mut ProgressBar>,
) -> Result<Vec<String>> {
@ -291,6 +292,14 @@ pub async fn enumerate_repo_urls(
}
}
// Rewrite clone URLs if a custom base was provided.
if let Some(base) = clone_url_base {
repos = repos
.into_iter()
.map(|raw| rewrite_clone_url(&raw, base).unwrap_or(raw))
.collect();
}
repos.sort();
repos.dedup();
Ok(repos)
@ -298,6 +307,7 @@ pub async fn enumerate_repo_urls(
pub async fn list_repositories(
api_url: Url,
clone_url_base: Option<&Url>,
ignore_certs: bool,
progress_enabled: bool,
users: &[String],
@ -324,7 +334,7 @@ pub async fn list_repositories(
exclude_repos: exclude_repos.to_vec(),
};
let urls = enumerate_repo_urls(&specifiers, api_url, ignore_certs, Some(&mut progress)).await?;
let urls = enumerate_repo_urls(&specifiers, api_url, clone_url_base, ignore_certs, Some(&mut progress)).await?;
for url in urls {
println!("{}", url);
}
@ -332,6 +342,15 @@ pub async fn list_repositories(
Ok(())
}
/// Rewrite a clone URL to use a different base (scheme, host, port), preserving the path.
fn rewrite_clone_url(raw: &str, base: &Url) -> Option<String> {
let mut parsed = Url::parse(raw).ok()?;
parsed.set_scheme(base.scheme()).ok()?;
parsed.set_host(base.host_str()).ok()?;
parsed.set_port(base.port()).ok()?;
Some(parsed.to_string())
}
fn parse_repo(repo_url: &GitUrl) -> Option<(String, String, String)> {
let url = Url::parse(repo_url.as_str()).ok()?;
let host = url.host_str()?.to_string();
@ -371,4 +390,28 @@ mod tests {
fn normalize_repo_identifier_handles_git_suffix() {
assert_eq!(normalize_repo_identifier("owner/repo.git"), Some("owner/repo".into()));
}
#[test]
fn rewrite_clone_url_changes_host() {
let base = Url::parse("https://forge.internal.example.com/").unwrap();
assert_eq!(
rewrite_clone_url("https://forge.public.example.com/owner/repo.git", &base),
Some("https://forge.internal.example.com/owner/repo.git".to_string())
);
}
#[test]
fn rewrite_clone_url_changes_port() {
let base = Url::parse("https://forge.example.com:3000/").unwrap();
assert_eq!(
rewrite_clone_url("https://forge.example.com/owner/repo.git", &base),
Some("https://forge.example.com:3000/owner/repo.git".to_string())
);
}
#[test]
fn rewrite_clone_url_returns_none_for_invalid_url() {
let base = Url::parse("https://forge.example.com/").unwrap();
assert_eq!(rewrite_clone_url("not-a-url", &base), None);
}
}

View file

@ -368,9 +368,10 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
)
.await?;
}
ListRepositoriesCommand::Gitea { api_url, specifiers } => {
ListRepositoriesCommand::Gitea { api_url, clone_url_base, specifiers } => {
gitea::list_repositories(
api_url,
clone_url_base.as_ref(),
global_args.ignore_certs,
global_args.use_progress(),
&specifiers.user,
@ -506,6 +507,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
gitea_exclude: Vec::new(),
all_gitea_organizations: false,
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
gitea_clone_url_base: None,
gitea_repo_type: GiteaRepoType::Source,
bitbucket_user: Vec::new(),

View file

@ -1743,6 +1743,7 @@ mod tests {
gitea_exclude: Vec::new(),
all_gitea_organizations: false,
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
gitea_clone_url_base: None,
gitea_repo_type: GiteaRepoType::Source,
bitbucket_user: Vec::new(),
bitbucket_workspace: Vec::new(),

View file

@ -119,6 +119,7 @@ mod tests {
gitea_exclude: Vec::new(),
all_gitea_organizations: false,
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
gitea_clone_url_base: None,
gitea_repo_type: GiteaRepoType::Source,
// Bitbucket

View file

@ -423,9 +423,11 @@ pub async fn enumerate_gitea_repos(
let mut num_found: u64 = 0;
let api_url = args.input_specifier_args.gitea_api_url.clone();
let clone_url_base = args.input_specifier_args.gitea_clone_url_base.as_ref();
let repo_strings = gitea::enumerate_repo_urls(
&repo_specifiers,
api_url,
clone_url_base,
global_args.ignore_certs,
Some(&mut progress),
)

View file

@ -0,0 +1,84 @@
// tests/int_gitea_clone_url_base.rs
//
// Integration test: verify that --clone-url-base rewrites clone URLs
// returned by the Gitea API during repository enumeration.
//
// Uses wiremock to mock the Gitea API and assert_cmd to exercise the full
// CLI path: argument parsing → API enumeration → URL rewriting → output.
use assert_cmd::Command;
use predicates::str::contains;
use wiremock::{
matchers::{method, path, query_param},
Mock, MockServer, ResponseTemplate,
};
/// Run `kingfisher scan gitea --list-only` against a mock Gitea API with and
/// without --clone-url-base, verifying that clone URLs are rewritten.
#[tokio::test]
async fn clone_url_base_rewrites_listed_urls() {
let mock_server = MockServer::start().await;
let public_host = "https://forge.public.example.com";
let repo_json = serde_json::json!([{
"full_name": "eblume/kingfisher",
"clone_url": format!("{public_host}/eblume/kingfisher.git"),
"fork": false
}]);
// Page 1: return one repo.
Mock::given(method("GET"))
.and(path("/api/v1/users/eblume/repos"))
.and(query_param("page", "1"))
.respond_with(ResponseTemplate::new(200).set_body_json(&repo_json))
.mount(&mock_server)
.await;
// Page 2: return empty array to terminate pagination.
Mock::given(method("GET"))
.and(path("/api/v1/users/eblume/repos"))
.and(query_param("page", "2"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
.mount(&mock_server)
.await;
let api_url = format!("{}/api/v1/", mock_server.uri());
// WITH --clone-url-base: URLs should be rewritten.
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
"gitea",
"--api-url",
&api_url,
"--clone-url-base",
"https://forge.internal.example.com/",
"--user",
"eblume",
"--list-only",
"--no-update-check",
"--quiet",
])
.assert()
.success()
.stdout(contains("https://forge.internal.example.com/eblume/kingfisher.git"));
// WITHOUT --clone-url-base: URLs should be unchanged.
Command::new(assert_cmd::cargo::cargo_bin!("kingfisher"))
.args([
"scan",
"gitea",
"--api-url",
&api_url,
"--user",
"eblume",
"--list-only",
"--no-update-check",
"--quiet",
])
.assert()
.success()
.stdout(contains(&format!(
"{public_host}/eblume/kingfisher.git"
)));
}