forked from mirrors/kingfisher
feat(gitea): add --clone-url-base flag for clone URL rewriting
Some checks are pending
ClusterFuzzLite PR fuzzing / PR (address) (pull_request) Waiting to run
CI Pull Request / Linux x64 (pull_request) Waiting to run
CI Pull Request / Linux arm64 (pull_request) Waiting to run
CI Pull Request / macOS arm64 (pull_request) Waiting to run
CI Pull Request / Windows arm64 (pull_request) Waiting to run
CI Pull Request / Windows x64 (pull_request) Waiting to run
Some checks are pending
ClusterFuzzLite PR fuzzing / PR (address) (pull_request) Waiting to run
CI Pull Request / Linux x64 (pull_request) Waiting to run
CI Pull Request / Linux arm64 (pull_request) Waiting to run
CI Pull Request / macOS arm64 (pull_request) Waiting to run
CI Pull Request / Windows arm64 (pull_request) Waiting to run
CI Pull Request / Windows x64 (pull_request) Waiting to run
When scanning a self-hosted Gitea/Forgejo instance, the API may be
reachable at a different hostname than the git clone endpoint (e.g.,
internal API vs. public clone URL behind a reverse proxy). The
--clone-url-base flag rewrites the scheme, host, and port of clone
URLs returned by the API, preserving the path.
Example:
kingfisher scan gitea \
--api-url https://forge.internal.example.com/api/v1/ \
--clone-url-base https://forge.internal.example.com/ \
--user eblume
This avoids routing clone traffic through an external proxy when the
API and git endpoints share the same internal host but the instance's
ROOT_URL points to the public endpoint.
Includes unit tests for the URL rewriting function and an integration
test using wiremock to verify the full enumeration path.
This commit is contained in:
parent
1d37d2983c
commit
c24dc0dc27
9 changed files with 150 additions and 3 deletions
|
|
@ -182,6 +182,10 @@ pub struct InputSpecifierArgs {
|
|||
)]
|
||||
pub gitea_api_url: Url,
|
||||
|
||||
/// Override base URL for cloning Gitea repositories
|
||||
#[arg(long, value_hint = ValueHint::Url, hide = true)]
|
||||
pub gitea_clone_url_base: Option<Url>,
|
||||
|
||||
#[arg(long, default_value_t = GiteaRepoType::Source, hide = true)]
|
||||
pub gitea_repo_type: GiteaRepoType,
|
||||
|
||||
|
|
|
|||
|
|
@ -284,7 +284,7 @@ pub enum ScanOperation {
|
|||
pub enum ListRepositoriesCommand {
|
||||
Github { api_url: Url, specifiers: GitHubRepoSpecifiers },
|
||||
Gitlab { api_url: Url, specifiers: GitLabRepoSpecifiers },
|
||||
Gitea { api_url: Url, specifiers: GiteaRepoSpecifiers },
|
||||
Gitea { api_url: Url, clone_url_base: Option<Url>, specifiers: GiteaRepoSpecifiers },
|
||||
Bitbucket { api_url: Url, specifiers: BitbucketRepoSpecifiers },
|
||||
Azure { base_url: Url, specifiers: AzureRepoSpecifiers },
|
||||
Huggingface { specifiers: HuggingFaceRepoSpecifiers },
|
||||
|
|
@ -396,6 +396,7 @@ impl ScanCommandArgs {
|
|||
if args.list_only {
|
||||
Some(ListRepositoriesCommand::Gitea {
|
||||
api_url: args.api_url,
|
||||
clone_url_base: args.clone_url_base,
|
||||
specifiers: args.specifiers,
|
||||
})
|
||||
} else {
|
||||
|
|
@ -408,6 +409,8 @@ impl ScanCommandArgs {
|
|||
args.specifiers.all_organizations;
|
||||
scan_args.input_specifier_args.gitea_repo_type = args.specifiers.repo_type;
|
||||
scan_args.input_specifier_args.gitea_api_url = args.api_url;
|
||||
scan_args.input_specifier_args.gitea_clone_url_base =
|
||||
args.clone_url_base;
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
@ -741,6 +744,15 @@ pub struct GiteaScanArgs {
|
|||
value_hint = ValueHint::Url
|
||||
)]
|
||||
pub api_url: Url,
|
||||
|
||||
/// Override the base URL used for cloning repositories.
|
||||
///
|
||||
/// By default, clone URLs returned by the Gitea/Forgejo API are used as-is.
|
||||
/// When the API is reachable at a different hostname than the git clone
|
||||
/// endpoint (e.g., internal API vs. public clone URL), use this flag to
|
||||
/// rewrite the scheme, host, and port of clone URLs.
|
||||
#[arg(long = "clone-url-base", value_hint = ValueHint::Url)]
|
||||
pub clone_url_base: Option<Url>,
|
||||
}
|
||||
|
||||
#[derive(Args, Debug, Clone)]
|
||||
|
|
|
|||
|
|
@ -919,6 +919,7 @@ pub(crate) fn create_minimal_scan_args() -> crate::cli::commands::scan::ScanArgs
|
|||
gitea_exclude: Vec::new(),
|
||||
all_gitea_organizations: false,
|
||||
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
|
||||
gitea_clone_url_base: None,
|
||||
gitea_repo_type: GiteaRepoType::Source,
|
||||
bitbucket_user: Vec::new(),
|
||||
bitbucket_workspace: Vec::new(),
|
||||
|
|
|
|||
45
src/gitea.rs
45
src/gitea.rs
|
|
@ -212,6 +212,7 @@ async fn fetch_authenticated_orgs(
|
|||
pub async fn enumerate_repo_urls(
|
||||
specifiers: &RepoSpecifiers,
|
||||
api_url: Url,
|
||||
clone_url_base: Option<&Url>,
|
||||
ignore_certs: bool,
|
||||
mut progress: Option<&mut ProgressBar>,
|
||||
) -> Result<Vec<String>> {
|
||||
|
|
@ -291,6 +292,14 @@ pub async fn enumerate_repo_urls(
|
|||
}
|
||||
}
|
||||
|
||||
// Rewrite clone URLs if a custom base was provided.
|
||||
if let Some(base) = clone_url_base {
|
||||
repos = repos
|
||||
.into_iter()
|
||||
.map(|raw| rewrite_clone_url(&raw, base).unwrap_or(raw))
|
||||
.collect();
|
||||
}
|
||||
|
||||
repos.sort();
|
||||
repos.dedup();
|
||||
Ok(repos)
|
||||
|
|
@ -298,6 +307,7 @@ pub async fn enumerate_repo_urls(
|
|||
|
||||
pub async fn list_repositories(
|
||||
api_url: Url,
|
||||
clone_url_base: Option<&Url>,
|
||||
ignore_certs: bool,
|
||||
progress_enabled: bool,
|
||||
users: &[String],
|
||||
|
|
@ -324,7 +334,7 @@ pub async fn list_repositories(
|
|||
exclude_repos: exclude_repos.to_vec(),
|
||||
};
|
||||
|
||||
let urls = enumerate_repo_urls(&specifiers, api_url, ignore_certs, Some(&mut progress)).await?;
|
||||
let urls = enumerate_repo_urls(&specifiers, api_url, clone_url_base, ignore_certs, Some(&mut progress)).await?;
|
||||
for url in urls {
|
||||
println!("{}", url);
|
||||
}
|
||||
|
|
@ -332,6 +342,15 @@ pub async fn list_repositories(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Rewrite a clone URL to use a different base (scheme, host, port), preserving the path.
|
||||
fn rewrite_clone_url(raw: &str, base: &Url) -> Option<String> {
|
||||
let mut parsed = Url::parse(raw).ok()?;
|
||||
parsed.set_scheme(base.scheme()).ok()?;
|
||||
parsed.set_host(base.host_str()).ok()?;
|
||||
parsed.set_port(base.port()).ok()?;
|
||||
Some(parsed.to_string())
|
||||
}
|
||||
|
||||
fn parse_repo(repo_url: &GitUrl) -> Option<(String, String, String)> {
|
||||
let url = Url::parse(repo_url.as_str()).ok()?;
|
||||
let host = url.host_str()?.to_string();
|
||||
|
|
@ -371,4 +390,28 @@ mod tests {
|
|||
fn normalize_repo_identifier_handles_git_suffix() {
|
||||
assert_eq!(normalize_repo_identifier("owner/repo.git"), Some("owner/repo".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_clone_url_changes_host() {
|
||||
let base = Url::parse("https://forge.internal.example.com/").unwrap();
|
||||
assert_eq!(
|
||||
rewrite_clone_url("https://forge.public.example.com/owner/repo.git", &base),
|
||||
Some("https://forge.internal.example.com/owner/repo.git".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_clone_url_changes_port() {
|
||||
let base = Url::parse("https://forge.example.com:3000/").unwrap();
|
||||
assert_eq!(
|
||||
rewrite_clone_url("https://forge.example.com/owner/repo.git", &base),
|
||||
Some("https://forge.example.com:3000/owner/repo.git".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rewrite_clone_url_returns_none_for_invalid_url() {
|
||||
let base = Url::parse("https://forge.example.com/").unwrap();
|
||||
assert_eq!(rewrite_clone_url("not-a-url", &base), None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -368,9 +368,10 @@ async fn async_main(args: CommandLineArgs) -> Result<()> {
|
|||
)
|
||||
.await?;
|
||||
}
|
||||
ListRepositoriesCommand::Gitea { api_url, specifiers } => {
|
||||
ListRepositoriesCommand::Gitea { api_url, clone_url_base, specifiers } => {
|
||||
gitea::list_repositories(
|
||||
api_url,
|
||||
clone_url_base.as_ref(),
|
||||
global_args.ignore_certs,
|
||||
global_args.use_progress(),
|
||||
&specifiers.user,
|
||||
|
|
@ -506,6 +507,7 @@ fn create_default_scan_args() -> cli::commands::scan::ScanArgs {
|
|||
gitea_exclude: Vec::new(),
|
||||
all_gitea_organizations: false,
|
||||
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
|
||||
gitea_clone_url_base: None,
|
||||
gitea_repo_type: GiteaRepoType::Source,
|
||||
|
||||
bitbucket_user: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -1743,6 +1743,7 @@ mod tests {
|
|||
gitea_exclude: Vec::new(),
|
||||
all_gitea_organizations: false,
|
||||
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
|
||||
gitea_clone_url_base: None,
|
||||
gitea_repo_type: GiteaRepoType::Source,
|
||||
bitbucket_user: Vec::new(),
|
||||
bitbucket_workspace: Vec::new(),
|
||||
|
|
|
|||
|
|
@ -119,6 +119,7 @@ mod tests {
|
|||
gitea_exclude: Vec::new(),
|
||||
all_gitea_organizations: false,
|
||||
gitea_api_url: Url::parse("https://gitea.com/api/v1/").unwrap(),
|
||||
gitea_clone_url_base: None,
|
||||
gitea_repo_type: GiteaRepoType::Source,
|
||||
|
||||
// Bitbucket
|
||||
|
|
|
|||
|
|
@ -423,9 +423,11 @@ pub async fn enumerate_gitea_repos(
|
|||
|
||||
let mut num_found: u64 = 0;
|
||||
let api_url = args.input_specifier_args.gitea_api_url.clone();
|
||||
let clone_url_base = args.input_specifier_args.gitea_clone_url_base.as_ref();
|
||||
let repo_strings = gitea::enumerate_repo_urls(
|
||||
&repo_specifiers,
|
||||
api_url,
|
||||
clone_url_base,
|
||||
global_args.ignore_certs,
|
||||
Some(&mut progress),
|
||||
)
|
||||
|
|
|
|||
81
tests/int_gitea_clone_url_base.rs
Normal file
81
tests/int_gitea_clone_url_base.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
// tests/int_gitea_clone_url_base.rs
|
||||
//
|
||||
// Integration test: verify that --clone-url-base rewrites clone URLs
|
||||
// returned by the Gitea API during repository enumeration.
|
||||
|
||||
use anyhow::Result;
|
||||
use kingfisher::gitea::{self, RepoSpecifiers, RepoType};
|
||||
use url::Url;
|
||||
use wiremock::{
|
||||
matchers::{method, path, query_param},
|
||||
Mock, MockServer, ResponseTemplate,
|
||||
};
|
||||
|
||||
/// Mock a Gitea API that returns repos with clone URLs on one host,
|
||||
/// then verify that enumerate_repo_urls rewrites them to a different host.
|
||||
#[tokio::test]
|
||||
async fn clone_url_base_rewrites_enumerated_urls() -> Result<()> {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let public_host = "https://forge.public.example.com";
|
||||
let repo_json = serde_json::json!([{
|
||||
"full_name": "eblume/kingfisher",
|
||||
"clone_url": format!("{public_host}/eblume/kingfisher.git"),
|
||||
"fork": false
|
||||
}]);
|
||||
|
||||
// Page 1: return one repo.
|
||||
Mock::given(method("GET"))
|
||||
.and(path("/api/v1/users/eblume/repos"))
|
||||
.and(query_param("page", "1"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(&repo_json))
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
// Page 2: return empty array to terminate pagination.
|
||||
Mock::given(method("GET"))
|
||||
.and(path("/api/v1/users/eblume/repos"))
|
||||
.and(query_param("page", "2"))
|
||||
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!([])))
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
let api_url = Url::parse(&format!("{}/api/v1/", mock_server.uri()))?;
|
||||
let clone_base = Url::parse("https://forge.internal.example.com/")?;
|
||||
|
||||
let specifiers = RepoSpecifiers {
|
||||
user: vec!["eblume".into()],
|
||||
organization: vec![],
|
||||
all_organizations: false,
|
||||
repo_filter: RepoType::All,
|
||||
exclude_repos: vec![],
|
||||
};
|
||||
|
||||
// Call WITH clone_url_base — URLs should be rewritten.
|
||||
let urls = gitea::enumerate_repo_urls(
|
||||
&specifiers,
|
||||
api_url.clone(),
|
||||
Some(&clone_base),
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.await?;
|
||||
|
||||
assert_eq!(urls.len(), 1);
|
||||
assert_eq!(
|
||||
urls[0],
|
||||
"https://forge.internal.example.com/eblume/kingfisher.git"
|
||||
);
|
||||
|
||||
// Call WITHOUT clone_url_base — URLs should be unchanged.
|
||||
let urls_no_rewrite =
|
||||
gitea::enumerate_repo_urls(&specifiers, api_url, None, false, None).await?;
|
||||
|
||||
assert_eq!(urls_no_rewrite.len(), 1);
|
||||
assert_eq!(
|
||||
urls_no_rewrite[0],
|
||||
format!("{public_host}/eblume/kingfisher.git")
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue