Updated kingfisher scan to accept Git repository URLs as positional targets (for example kingfisher scan github.com/org/repo or kingfisher scan https://gitlab.com/group/project.git) without requiring --git-url.

This commit is contained in:
Mick Grove 2026-02-26 23:14:18 -07:00
commit 0ae4e8445c
25 changed files with 333 additions and 87 deletions

View file

@ -31,8 +31,8 @@ pub struct InputSpecifierArgs {
#[arg(num_args = 0.., value_hint = ValueHint::AnyPath)]
pub path_inputs: Vec<PathBuf>,
/// Clone and scan the Git repository at the given URL
#[arg(long, value_hint = ValueHint::Url)]
/// Deprecated: clone and scan a Git repository URL. Prefer positional targets: `kingfisher scan github.com/org/repo`
#[arg(long = "git-url", value_hint = ValueHint::Url)]
pub git_url: Vec<GitUrl>,
/// Parent directory for cloned Git repositories and scan artifacts
@ -421,7 +421,14 @@ impl InputSpecifierArgs {
}
/// Emit deprecation warnings for legacy top-level provider flags.
pub fn emit_deprecated_warnings(&self) {
pub fn emit_deprecated_warnings(&self, used_legacy_git_url_flag: bool) {
if used_legacy_git_url_flag {
warn_deprecated_provider(
"Git URL",
"Passing repository URLs with `--git-url` is deprecated. Pass the URL as a positional scan target instead, e.g. `kingfisher scan github.com/org/repo`.",
);
}
if self.using_legacy_github_flags() {
warn_deprecated_provider(
"GitHub",

View file

@ -1,6 +1,10 @@
use anyhow::bail;
use clap::{Args, Subcommand, ValueEnum, ValueHint};
use std::path::{Path, PathBuf};
use std::{
net::IpAddr,
path::{Path, PathBuf},
str::FromStr,
};
use strum::Display;
use tracing::debug;
use url::Url;
@ -17,6 +21,7 @@ use crate::{
inputs::{ContentFilteringArgs, InputSpecifierArgs},
output::{OutputArgs, ReportOutputFormat},
rules::RuleSpecifierArgs,
view,
},
global::RAM_GB,
},
@ -202,6 +207,11 @@ pub struct ScanArgs {
/// Disable rule-level `ignore_if_contains` filtering for pattern requirements
#[arg(global = true, long = "no-ignore-if-contains", default_value_t = false)]
pub no_ignore_if_contains: bool,
#[arg(skip)]
pub view_report_port: u16,
#[arg(skip)]
pub view_report_address: String,
}
/// Confidence levels for findings
@ -232,6 +242,24 @@ pub struct ScanCommandArgs {
#[arg(global = true, long = "view-report", default_value_t = false)]
pub view_report: bool,
/// Port for the report viewer when using --view-report (default 7890)
#[arg(
global = true,
long = "view-report-port",
default_value_t = view::DEFAULT_PORT,
value_name = "PORT"
)]
pub view_report_port: u16,
/// Bind address for the report viewer when using --view-report (default 127.0.0.1). Use 0.0.0.0 to allow access from Docker or other hosts.
#[arg(
global = true,
long = "view-report-address",
default_value = view::DEFAULT_ADDRESS,
value_name = "ADDRESS"
)]
pub view_report_address: String,
#[command(subcommand)]
pub provider: Option<ScanInputCommand>,
}
@ -253,11 +281,34 @@ pub enum ListRepositoriesCommand {
}
impl ScanCommandArgs {
fn infer_positional_git_urls(&mut self) {
let mut inferred_git_urls = Vec::new();
let mut retained_paths = Vec::new();
for path in self.scan_args.input_specifier_args.path_inputs.drain(..) {
if path.as_path() == Path::new("-") || path.exists() {
retained_paths.push(path);
continue;
}
if let Some(git_url) = parse_git_url_target(&path) {
inferred_git_urls.push(git_url);
} else {
retained_paths.push(path);
}
}
self.scan_args.input_specifier_args.path_inputs = retained_paths;
self.scan_args.input_specifier_args.git_url.extend(inferred_git_urls);
}
/// Convert CLI arguments into a scan or repository-listing operation.
pub fn into_operation(mut self) -> anyhow::Result<ScanOperation> {
let mut used_provider_subcommand = false;
self.scan_args.view_report = self.view_report;
self.scan_args.view_report_port = self.view_report_port;
self.scan_args.view_report_address = self.view_report_address.clone();
if let Some(provider) = self.provider.take() {
used_provider_subcommand = true;
@ -466,9 +517,12 @@ impl ScanCommandArgs {
}
}
let used_legacy_git_url_flag = !self.scan_args.input_specifier_args.git_url.is_empty();
self.infer_positional_git_urls();
if !self.scan_args.input_specifier_args.has_any_input() {
bail!(
"Specify a path, --git-url, or use a provider subcommand such as 'kingfisher scan github'"
"Specify a path or Git URL (for example: 'kingfisher scan github.com/org/repo'), or use a provider subcommand such as 'kingfisher scan github'"
);
}
@ -483,7 +537,7 @@ impl ScanCommandArgs {
}
if !used_provider_subcommand {
self.scan_args.input_specifier_args.emit_deprecated_warnings();
self.scan_args.input_specifier_args.emit_deprecated_warnings(used_legacy_git_url_flag);
}
if self.scan_args.manage_baseline {
@ -503,6 +557,44 @@ impl ScanCommandArgs {
}
}
fn parse_git_url_target(path: &Path) -> Option<GitUrl> {
let raw = path.to_str()?.trim();
if raw.is_empty() || raw == "-" || raw.contains('\\') {
return None;
}
if let Ok(url) = GitUrl::from_str(raw) {
return Some(url);
}
if raw.contains("://")
|| raw.starts_with('/')
|| raw.starts_with("./")
|| raw.starts_with("../")
|| raw.starts_with('~')
{
return None;
}
let (host, suffix) = raw.split_once('/')?;
if host.is_empty() || suffix.is_empty() {
return None;
}
let path_segments = suffix.split('/').filter(|segment| !segment.is_empty()).count();
if path_segments < 2 {
return None;
}
let host_looks_valid =
host.contains('.') || host == "localhost" || host.parse::<IpAddr>().is_ok();
if !host_looks_valid {
return None;
}
GitUrl::from_str(&format!("https://{raw}")).ok()
}
#[derive(Subcommand, Debug, Clone)]
pub enum ScanInputCommand {
/// Scan local files, directories, or Git repositories
@ -552,7 +644,7 @@ pub struct FilesystemScanArgs {
#[arg(value_name = "PATH", value_hint = ValueHint::AnyPath)]
pub paths: Vec<PathBuf>,
/// Git repository URLs to clone and scan
/// Deprecated: git repository URLs to clone and scan. Prefer positional targets.
#[arg(long = "git-url", value_hint = ValueHint::Url)]
pub git_url: Vec<GitUrl>,
}

View file

@ -22,6 +22,9 @@ pub const DEFAULT_PORT: u16 = 7890;
// Embedded viewer assets - force rebuild
static VIEWER_ASSETS: Dir<'_> = include_dir!("$CARGO_MANIFEST_DIR/docs/access-map-viewer");
/// Default bind address for the report viewer (localhost only for security).
pub const DEFAULT_ADDRESS: &str = "127.0.0.1";
/// View a Kingfisher access-map report locally.
#[derive(clap::Args, Debug)]
pub struct ViewArgs {
@ -33,6 +36,10 @@ pub struct ViewArgs {
#[arg(long, default_value_t = DEFAULT_PORT)]
pub port: u16,
/// Bind address for the report viewer (default 127.0.0.1). Use 0.0.0.0 to allow access from Docker or other hosts.
#[arg(long, default_value = DEFAULT_ADDRESS, value_name = "ADDRESS")]
pub address: String,
#[arg(skip)]
pub open_browser: bool,
@ -45,8 +52,10 @@ struct AppState {
report: Option<Vec<u8>>,
}
pub fn ensure_port_available(port: u16) -> Result<()> {
StdTcpListener::bind(("127.0.0.1", port)).map_err(|err| match err.kind() {
pub fn ensure_port_available(port: u16, address: &str) -> Result<()> {
let addr: std::net::IpAddr =
address.parse().context("Invalid bind address for report viewer")?;
StdTcpListener::bind((addr, port)).map_err(|err| match err.kind() {
std::io::ErrorKind::AddrInUse => anyhow!(
"Port {} is already in use. Re-run with --port <PORT> to choose a different port.",
port
@ -81,14 +90,15 @@ pub async fn run(args: ViewArgs) -> Result<()> {
None
};
let listener =
TcpListener::bind(("127.0.0.1", args.port)).await.map_err(|err| match err.kind() {
std::io::ErrorKind::AddrInUse => anyhow!(
"Port {} is already in use. Re-run with --port <PORT> to choose a different port.",
args.port
),
_ => err.into(),
})?;
let addr: std::net::IpAddr =
args.address.parse().context("Invalid bind address for report viewer")?;
let listener = TcpListener::bind((addr, args.port)).await.map_err(|err| match err.kind() {
std::io::ErrorKind::AddrInUse => anyhow!(
"Port {} is already in use. Re-run with --port <PORT> to choose a different port.",
args.port
),
_ => err.into(),
})?;
let address: SocketAddr =
listener.local_addr().context("Failed to read local listener address")?;