changes in response to PR review

This commit is contained in:
Mick Grove 2026-02-28 12:16:08 -07:00
commit 4f2738b957
4 changed files with 75 additions and 41 deletions

View file

@ -9,8 +9,8 @@ All notable changes to this project will be documented in this file.
- Updated README/integration/usage/install/demo examples and CLI tests to use positional Git URL scanning syntax.
- Jira scanning: added `kingfisher scan jira --include-comments` and `--include-changelog` to scan per-issue comments and changelog entries, with paginated Jira comment fetching and ADF text normalization preserved for issue/comment content.
- Added `--turbo` mode: sets `--commit-metadata=false`, `--no-base64`, disables language detection, and disables tree-sitter parsing...for maximum scan speed. Findings will omit Git commit context (author, date, commit hash) and will not include Base64-decoded secrets.
- SQLite database scanning: kingfisher now detects and extracts SQLite files (`.db`, `.sqlite`, `.sqlite3`, etc.), dumping each table as SQL text with named columns so secrets stored in database rows are scannable. Controlled by the existing `--extract-archives` flag.
- Python bytecode (.pyc) scanning: extracts string constants from compiled Python (`.pyc`, `.pyo`) files via marshal parsing so secrets embedded in bytecode are scannable. Controlled by `--extract-archives`.
- SQLite database scanning: kingfisher now detects and extracts SQLite files (`.db`, `.sqlite`, `.sqlite3`, etc.), dumping each table as SQL text with named columns so secrets stored in database rows are scannable. Extraction is enabled by default and can be disabled with `--no-extract-archives`.
- Python bytecode (.pyc) scanning: extracts string constants from compiled Python (`.pyc`, `.pyo`) files via marshal parsing so secrets embedded in bytecode are scannable. Extraction is enabled by default and can be disabled with `--no-extract-archives`.
- Performance: pipelined ODB enumeration — scanning now begins while blob OIDs are still being discovered, overlapping I/O with pattern matching.
- Performance: skip blobs smaller than 20 bytes during enumeration (too small to contain any secret).
- Performance: preserve pack-ascending blob order in the metadata path for better I/O locality when Rayon splits work.

View file

@ -281,6 +281,19 @@ fn jira_auth_header() -> Option<String> {
std::env::var("KF_JIRA_TOKEN").ok().map(|token| format!("Bearer {}", token))
}
fn jira_relative_base_url(jira_url: &Url) -> Url {
let mut base_url = jira_url.clone();
if !base_url.path().ends_with('/') {
let new_path = if base_url.path().is_empty() {
"/".to_string()
} else {
format!("{}/", base_url.path())
};
base_url.set_path(&new_path);
}
base_url
}
fn normalize_issue(issue: &JiraIssue) -> Result<serde_json::Value> {
let mut issue_value = serde_json::to_value(issue)?;
flatten_adf_fields(&mut issue_value);
@ -340,11 +353,12 @@ pub async fn fetch_comments(
let client = build_http_client(ignore_certs)?;
let mut start_at = 0;
let mut all_comments = Vec::new();
let base_url = jira_relative_base_url(jira_url);
loop {
let url = jira_url
let url = base_url
.join(&format!(
"/rest/api/latest/issue/{issue_key}/comment?startAt={start_at}&maxResults={JIRA_COMMENTS_PAGE_SIZE}"
"rest/api/latest/issue/{issue_key}/comment?startAt={start_at}&maxResults={JIRA_COMMENTS_PAGE_SIZE}"
))
.context("Failed to construct Jira comments URL")?;
@ -789,4 +803,31 @@ mod tests {
assert_eq!(comments[0].pointer("/body"), Some(&json!("first")));
assert_eq!(comments[2].pointer("/body"), Some(&json!("third")));
}
#[tokio::test]
async fn fetch_comments_preserves_base_path() {
let server = MockServer::start().await;
Mock::given(method("GET"))
.and(path("/jira/rest/api/latest/issue/TEST-1/comment"))
.and(query_param("startAt", "0"))
.and(query_param("maxResults", &JIRA_COMMENTS_PAGE_SIZE.to_string()))
.respond_with(ResponseTemplate::new(200).set_body_json(json!({
"comments": [
{"id": "1", "body": "first"}
],
"startAt": 0,
"maxResults": JIRA_COMMENTS_PAGE_SIZE,
"total": 1
})))
.mount(&server)
.await;
let jira_url = Url::parse(&format!("{}/jira", server.uri())).expect("server URL");
let comments =
fetch_comments(&jira_url, "TEST-1", false).await.expect("comments should be fetched");
assert_eq!(comments.len(), 1);
assert_eq!(comments[0].pointer("/body"), Some(&json!("first")));
}
}

View file

@ -92,13 +92,11 @@ fn dump_table(
return Ok(out);
}
let columns_fragment = col_names
.iter()
.map(|c| format!("\"{}\"", c.replace('"', "\"\"")))
.collect::<Vec<_>>()
.join(",");
let columns_fragment =
col_names.iter().map(|c| sqlite_quoted_identifier(c)).collect::<Vec<_>>().join(",");
let query = format!("SELECT * FROM \"{}\"", table_name.replace('"', "\"\""));
let quoted_table_name = sqlite_quoted_identifier(table_name);
let query = format!("SELECT * FROM {quoted_table_name}");
let mut stmt = conn.prepare(&query)?;
let col_count = col_names.len();
@ -115,7 +113,7 @@ fn dump_table(
break;
}
write!(out, "INSERT INTO \"{table_name}\" ({columns_fragment}) VALUES (")?;
write!(out, "INSERT INTO {quoted_table_name} ({columns_fragment}) VALUES (")?;
for i in 0..col_count {
if i > 0 {
@ -132,7 +130,7 @@ fn dump_table(
}
fn column_names(conn: &Connection, table_name: &str) -> Result<Vec<String>> {
let query = format!("PRAGMA table_info(\"{}\")", table_name.replace('"', "\"\""));
let query = format!("PRAGMA table_info({})", sqlite_quoted_identifier(table_name));
let mut stmt = conn.prepare(&query)?;
let names = stmt
.query_map([], |row| {
@ -143,6 +141,10 @@ fn column_names(conn: &Connection, table_name: &str) -> Result<Vec<String>> {
Ok(names)
}
fn sqlite_quoted_identifier(identifier: &str) -> String {
format!("\"{}\"", identifier.replace('"', "\"\""))
}
fn write_value(out: &mut String, row: &rusqlite::Row<'_>, idx: usize) -> Result<()> {
use rusqlite::types::ValueRef;
match row.get_ref(idx)? {
@ -246,4 +248,23 @@ mod tests {
assert!(sql.contains("'it''s a test'"));
assert!(sql.contains("NULL"));
}
#[test]
fn escapes_quoted_table_names_in_generated_sql() {
let tmp = NamedTempFile::new().unwrap();
let path = tmp.path().to_path_buf();
let conn = Connection::open(&path).unwrap();
conn.execute_batch(
"CREATE TABLE \"odd\"\"name\" (id INTEGER PRIMARY KEY, val TEXT);
INSERT INTO \"odd\"\"name\" VALUES (1, 'secret');",
)
.unwrap();
let results = extract_sqlite_contents(&path).unwrap();
let sql = String::from_utf8_lossy(&results[0].1);
assert!(sql.contains("INSERT INTO \"odd\"\"name\""));
assert!(sql.contains("\"val\""));
assert!(sql.contains("'secret'"));
}
}

View file

@ -96,10 +96,7 @@ fn positional_git_url_examples_parse() -> anyhow::Result<()> {
("github.com/kubernetes/kubernetes", "https://github.com/kubernetes/kubernetes"),
("https://github.com/org/repo", "https://github.com/org/repo"),
("gitlab.com/gitlab-org/gitlab", "https://gitlab.com/gitlab-org/gitlab"),
(
"https://gitlab.com/namespace/project.git",
"https://gitlab.com/namespace/project.git",
),
("https://gitlab.com/namespace/project.git", "https://gitlab.com/namespace/project.git"),
];
for (input, expected) in examples {
@ -150,28 +147,3 @@ fn turbo_mode_applies_speed_first_defaults() -> anyhow::Result<()> {
Ok(())
}
#[test]
fn fast_alias_still_enables_turbo_mode() -> anyhow::Result<()> {
let args = CommandLineArgs::try_parse_from([
"kingfisher",
"scan",
".",
"--turbo",
"--no-update-check",
])?;
let command = match args.command {
Command::Scan(scan_args) => scan_args,
other => panic!("unexpected command parsed: {:?}", other),
};
let scan_args = match command.into_operation()? {
ScanOperation::Scan(scan_args) => scan_args,
op => panic!("expected scan operation, got {:?}", op),
};
assert!(scan_args.turbo);
Ok(())
}