diff --git a/src/cli/commands/inputs.rs b/src/cli/commands/inputs.rs index 95d3cbd..0fa2d61 100644 --- a/src/cli/commands/inputs.rs +++ b/src/cli/commands/inputs.rs @@ -182,6 +182,10 @@ pub struct InputSpecifierArgs { )] pub gitea_api_url: Url, + /// Override base URL for cloning Gitea repositories + #[arg(long, value_hint = ValueHint::Url, hide = true)] + pub gitea_clone_url_base: Option, + #[arg(long, default_value_t = GiteaRepoType::Source, hide = true)] pub gitea_repo_type: GiteaRepoType, diff --git a/src/cli/commands/scan.rs b/src/cli/commands/scan.rs index c61fc93..ab76967 100644 --- a/src/cli/commands/scan.rs +++ b/src/cli/commands/scan.rs @@ -284,7 +284,7 @@ pub enum ScanOperation { pub enum ListRepositoriesCommand { Github { api_url: Url, specifiers: GitHubRepoSpecifiers }, Gitlab { api_url: Url, specifiers: GitLabRepoSpecifiers }, - Gitea { api_url: Url, specifiers: GiteaRepoSpecifiers }, + Gitea { api_url: Url, clone_url_base: Option, specifiers: GiteaRepoSpecifiers }, Bitbucket { api_url: Url, specifiers: BitbucketRepoSpecifiers }, Azure { base_url: Url, specifiers: AzureRepoSpecifiers }, Huggingface { specifiers: HuggingFaceRepoSpecifiers }, @@ -396,6 +396,7 @@ impl ScanCommandArgs { if args.list_only { Some(ListRepositoriesCommand::Gitea { api_url: args.api_url, + clone_url_base: args.clone_url_base, specifiers: args.specifiers, }) } else { @@ -408,6 +409,8 @@ impl ScanCommandArgs { args.specifiers.all_organizations; scan_args.input_specifier_args.gitea_repo_type = args.specifiers.repo_type; scan_args.input_specifier_args.gitea_api_url = args.api_url; + scan_args.input_specifier_args.gitea_clone_url_base = + args.clone_url_base; None } } @@ -741,6 +744,15 @@ pub struct GiteaScanArgs { value_hint = ValueHint::Url )] pub api_url: Url, + + /// Override the base URL used for cloning repositories. + /// + /// By default, clone URLs returned by the Gitea/Forgejo API are used as-is. + /// When the API is reachable at a different hostname than the git clone + /// endpoint (e.g., internal API vs. public clone URL), use this flag to + /// rewrite the scheme, host, and port of clone URLs. + #[arg(long = "clone-url-base", value_hint = ValueHint::Url)] + pub clone_url_base: Option, } #[derive(Args, Debug, Clone)] diff --git a/src/direct_validate.rs b/src/direct_validate.rs index 5d68d22..e8ca71e 100644 --- a/src/direct_validate.rs +++ b/src/direct_validate.rs @@ -919,6 +919,7 @@ pub(crate) fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs gitea_exclude: Vec::new(), all_gitea_organizations: false, gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_clone_url_base: None, gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), diff --git a/src/gitea.rs b/src/gitea.rs index d748685..898b813 100644 --- a/src/gitea.rs +++ b/src/gitea.rs @@ -212,6 +212,7 @@ async fn fetch_authenticated_orgs( pub async fn enumerate_repo_urls( specifiers: &RepoSpecifiers, api_url: Url, + clone_url_base: Option<&Url>, ignore_certs: bool, mut progress: Option<&mut ProgressBar>, ) -> Result> { @@ -291,6 +292,14 @@ pub async fn enumerate_repo_urls( } } + // Rewrite clone URLs if a custom base was provided. + if let Some(base) = clone_url_base { + repos = repos + .into_iter() + .map(|raw| rewrite_clone_url(&raw, base).unwrap_or(raw)) + .collect(); + } + repos.sort(); repos.dedup(); Ok(repos) @@ -298,6 +307,7 @@ pub async fn enumerate_repo_urls( pub async fn list_repositories( api_url: Url, + clone_url_base: Option<&Url>, ignore_certs: bool, progress_enabled: bool, users: &[String], @@ -324,7 +334,7 @@ pub async fn list_repositories( exclude_repos: exclude_repos.to_vec(), }; - let urls = enumerate_repo_urls(&specifiers, api_url, ignore_certs, Some(&mut progress)).await?; + let urls = enumerate_repo_urls(&specifiers, api_url, clone_url_base, ignore_certs, Some(&mut progress)).await?; for url in urls { println!("{}", url); } @@ -332,6 +342,15 @@ pub async fn list_repositories( Ok(()) } +/// Rewrite a clone URL to use a different base (scheme, host, port), preserving the path. +fn rewrite_clone_url(raw: &str, base: &Url) -> Option { + let mut parsed = Url::parse(raw).ok()?; + parsed.set_scheme(base.scheme()).ok()?; + parsed.set_host(base.host_str()).ok()?; + parsed.set_port(base.port()).ok()?; + Some(parsed.to_string()) +} + fn parse_repo(repo_url: &GitUrl) -> Option<(String, String, String)> { let url = Url::parse(repo_url.as_str()).ok()?; let host = url.host_str()?.to_string(); @@ -371,4 +390,28 @@ mod tests { fn normalize_repo_identifier_handles_git_suffix() { assert_eq!(normalize_repo_identifier("owner/repo.git"), Some("owner/repo".into())); } + + #[test] + fn rewrite_clone_url_changes_host() { + let base = Url::parse("https://forge.internal.example.com/").unwrap(); + assert_eq!( + rewrite_clone_url("https://forge.public.example.com/owner/repo.git", &base), + Some("https://forge.internal.example.com/owner/repo.git".to_string()) + ); + } + + #[test] + fn rewrite_clone_url_changes_port() { + let base = Url::parse("https://forge.example.com:3000/").unwrap(); + assert_eq!( + rewrite_clone_url("https://forge.example.com/owner/repo.git", &base), + Some("https://forge.example.com:3000/owner/repo.git".to_string()) + ); + } + + #[test] + fn rewrite_clone_url_returns_none_for_invalid_url() { + let base = Url::parse("https://forge.example.com/").unwrap(); + assert_eq!(rewrite_clone_url("not-a-url", &base), None); + } } diff --git a/src/main.rs b/src/main.rs index 769d8fe..e62010b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -368,9 +368,10 @@ async fn async_main(args: CommandLineArgs) -> Result<()> { ) .await?; } - ListRepositoriesCommand::Gitea { api_url, specifiers } => { + ListRepositoriesCommand::Gitea { api_url, clone_url_base, specifiers } => { gitea::list_repositories( api_url, + clone_url_base.as_ref(), global_args.ignore_certs, global_args.use_progress(), &specifiers.user, @@ -506,6 +507,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs { gitea_exclude: Vec::new(), all_gitea_organizations: false, gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_clone_url_base: None, gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), diff --git a/src/reporter.rs b/src/reporter.rs index b01c6df..8734450 100644 --- a/src/reporter.rs +++ b/src/reporter.rs @@ -1743,6 +1743,7 @@ mod tests { gitea_exclude: Vec::new(), all_gitea_organizations: false, gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_clone_url_base: None, gitea_repo_type: GiteaRepoType::Source, bitbucket_user: Vec::new(), bitbucket_workspace: Vec::new(), diff --git a/src/reporter/json_format.rs b/src/reporter/json_format.rs index c5b3500..06cf949 100644 --- a/src/reporter/json_format.rs +++ b/src/reporter/json_format.rs @@ -119,6 +119,7 @@ mod tests { gitea_exclude: Vec::new(), all_gitea_organizations: false, gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(), + gitea_clone_url_base: None, gitea_repo_type: GiteaRepoType::Source, // Bitbucket diff --git a/src/scanner/repos.rs b/src/scanner/repos.rs index 3b354b0..8468f43 100644 --- a/src/scanner/repos.rs +++ b/src/scanner/repos.rs @@ -423,9 +423,11 @@ pub async fn enumerate_gitea_repos( let mut num_found: u64 = 0; let api_url = args.input_specifier_args.gitea_api_url.clone(); + let clone_url_base = args.input_specifier_args.gitea_clone_url_base.as_ref(); let repo_strings = gitea::enumerate_repo_urls( &repo_specifiers, api_url, + clone_url_base, global_args.ignore_certs, Some(&mut progress), ) diff --git a/tests/int_gitea_clone_url_base.rs b/tests/int_gitea_clone_url_base.rs new file mode 100644 index 0000000..6ae968e --- /dev/null +++ b/tests/int_gitea_clone_url_base.rs @@ -0,0 +1,84 @@ +// tests/int_gitea_clone_url_base.rs +// +// Integration test: verify that --clone-url-base rewrites clone URLs +// returned by the Gitea API during repository enumeration. +// +// Uses wiremock to mock the Gitea API and assert_cmd to exercise the full +// CLI path: argument parsing → API enumeration → URL rewriting → output. + +use assert_cmd::Command; +use predicates::str::contains; +use wiremock::{ + matchers::{method, path, query_param}, + Mock, MockServer, ResponseTemplate, +}; + +/// Run `kingfisher scan gitea --list-only` against a mock Gitea API with and +/// without --clone-url-base, verifying that clone URLs are rewritten. +#[tokio::test] +async fn clone_url_base_rewrites_listed_urls() { + let mock_server = MockServer::start().await; + + let public_host = "https://forge.public.example.com"; + let repo_json = serde_json::json!([{ + "full_name": "eblume/kingfisher", + "clone_url": format!("{public_host}/eblume/kingfisher.git"), + "fork": false + }]); + + // Page 1: return one repo. + Mock::given(method("GET")) + .and(path("/api/v1/users/eblume/repos")) + .and(query_param("page", "1")) + .respond_with(ResponseTemplate::new(200).set_body_json(&repo_json)) + .mount(&mock_server) + .await; + + // Page 2: return empty array to terminate pagination. + Mock::given(method("GET")) + .and(path("/api/v1/users/eblume/repos")) + .and(query_param("page", "2")) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([]))) + .mount(&mock_server) + .await; + + let api_url = format!("{}/api/v1/", mock_server.uri()); + + // WITH --clone-url-base: URLs should be rewritten. + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + "gitea", + "--api-url", + &api_url, + "--clone-url-base", + "https://forge.internal.example.com/", + "--user", + "eblume", + "--list-only", + "--no-update-check", + "--quiet", + ]) + .assert() + .success() + .stdout(contains("https://forge.internal.example.com/eblume/kingfisher.git")); + + // WITHOUT --clone-url-base: URLs should be unchanged. + Command::new(assert_cmd::cargo::cargo_bin!("kingfisher")) + .args([ + "scan", + "gitea", + "--api-url", + &api_url, + "--user", + "eblume", + "--list-only", + "--no-update-check", + "--quiet", + ]) + .assert() + .success() + .stdout(contains(&format!( + "{public_host}/eblume/kingfisher.git" + ))); +}