From 4f2738b95755092763381a89ddc1d5391cd608ed Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Sat, 28 Feb 2026 12:16:08 -0700 Subject: [PATCH] changes in response to PR review --- CHANGELOG.md | 4 ++-- src/jira.rs | 45 ++++++++++++++++++++++++++++++++++-- src/sqlite.rs | 37 ++++++++++++++++++++++------- tests/cli_git_clone_flags.rs | 30 +----------------------- 4 files changed, 75 insertions(+), 41 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62a5dca..2d62afd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,8 @@ All notable changes to this project will be documented in this file. - Updated README/integration/usage/install/demo examples and CLI tests to use positional Git URL scanning syntax. - Jira scanning: added `kingfisher scan jira --include-comments` and `--include-changelog` to scan per-issue comments and changelog entries, with paginated Jira comment fetching and ADF text normalization preserved for issue/comment content. - Added `--turbo` mode: sets `--commit-metadata=false`, `--no-base64`, disables language detection, and disables tree-sitter parsing...for maximum scan speed. Findings will omit Git commit context (author, date, commit hash) and will not include Base64-decoded secrets. -- SQLite database scanning: kingfisher now detects and extracts SQLite files (`.db`, `.sqlite`, `.sqlite3`, etc.), dumping each table as SQL text with named columns so secrets stored in database rows are scannable. Controlled by the existing `--extract-archives` flag. -- Python bytecode (.pyc) scanning: extracts string constants from compiled Python (`.pyc`, `.pyo`) files via marshal parsing so secrets embedded in bytecode are scannable. Controlled by `--extract-archives`. +- SQLite database scanning: kingfisher now detects and extracts SQLite files (`.db`, `.sqlite`, `.sqlite3`, etc.), dumping each table as SQL text with named columns so secrets stored in database rows are scannable. Extraction is enabled by default and can be disabled with `--no-extract-archives`. +- Python bytecode (.pyc) scanning: extracts string constants from compiled Python (`.pyc`, `.pyo`) files via marshal parsing so secrets embedded in bytecode are scannable. Extraction is enabled by default and can be disabled with `--no-extract-archives`. - Performance: pipelined ODB enumeration — scanning now begins while blob OIDs are still being discovered, overlapping I/O with pattern matching. - Performance: skip blobs smaller than 20 bytes during enumeration (too small to contain any secret). - Performance: preserve pack-ascending blob order in the metadata path for better I/O locality when Rayon splits work. diff --git a/src/jira.rs b/src/jira.rs index 6466ea7..5051a4c 100644 --- a/src/jira.rs +++ b/src/jira.rs @@ -281,6 +281,19 @@ fn jira_auth_header() -> Option { std::env::var("KF_JIRA_TOKEN").ok().map(|token| format!("Bearer {}", token)) } +fn jira_relative_base_url(jira_url: &Url) -> Url { + let mut base_url = jira_url.clone(); + if !base_url.path().ends_with('/') { + let new_path = if base_url.path().is_empty() { + "/".to_string() + } else { + format!("{}/", base_url.path()) + }; + base_url.set_path(&new_path); + } + base_url +} + fn normalize_issue(issue: &JiraIssue) -> Result { let mut issue_value = serde_json::to_value(issue)?; flatten_adf_fields(&mut issue_value); @@ -340,11 +353,12 @@ pub async fn fetch_comments( let client = build_http_client(ignore_certs)?; let mut start_at = 0; let mut all_comments = Vec::new(); + let base_url = jira_relative_base_url(jira_url); loop { - let url = jira_url + let url = base_url .join(&format!( - "/rest/api/latest/issue/{issue_key}/comment?startAt={start_at}&maxResults={JIRA_COMMENTS_PAGE_SIZE}" + "rest/api/latest/issue/{issue_key}/comment?startAt={start_at}&maxResults={JIRA_COMMENTS_PAGE_SIZE}" )) .context("Failed to construct Jira comments URL")?; @@ -789,4 +803,31 @@ mod tests { assert_eq!(comments[0].pointer("/body"), Some(&json!("first"))); assert_eq!(comments[2].pointer("/body"), Some(&json!("third"))); } + + #[tokio::test] + async fn fetch_comments_preserves_base_path() { + let server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/jira/rest/api/latest/issue/TEST-1/comment")) + .and(query_param("startAt", "0")) + .and(query_param("maxResults", &JIRA_COMMENTS_PAGE_SIZE.to_string())) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "comments": [ + {"id": "1", "body": "first"} + ], + "startAt": 0, + "maxResults": JIRA_COMMENTS_PAGE_SIZE, + "total": 1 + }))) + .mount(&server) + .await; + + let jira_url = Url::parse(&format!("{}/jira", server.uri())).expect("server URL"); + let comments = + fetch_comments(&jira_url, "TEST-1", false).await.expect("comments should be fetched"); + + assert_eq!(comments.len(), 1); + assert_eq!(comments[0].pointer("/body"), Some(&json!("first"))); + } } diff --git a/src/sqlite.rs b/src/sqlite.rs index c2b968f..00a534f 100644 --- a/src/sqlite.rs +++ b/src/sqlite.rs @@ -92,13 +92,11 @@ fn dump_table( return Ok(out); } - let columns_fragment = col_names - .iter() - .map(|c| format!("\"{}\"", c.replace('"', "\"\""))) - .collect::>() - .join(","); + let columns_fragment = + col_names.iter().map(|c| sqlite_quoted_identifier(c)).collect::>().join(","); - let query = format!("SELECT * FROM \"{}\"", table_name.replace('"', "\"\"")); + let quoted_table_name = sqlite_quoted_identifier(table_name); + let query = format!("SELECT * FROM {quoted_table_name}"); let mut stmt = conn.prepare(&query)?; let col_count = col_names.len(); @@ -115,7 +113,7 @@ fn dump_table( break; } - write!(out, "INSERT INTO \"{table_name}\" ({columns_fragment}) VALUES (")?; + write!(out, "INSERT INTO {quoted_table_name} ({columns_fragment}) VALUES (")?; for i in 0..col_count { if i > 0 { @@ -132,7 +130,7 @@ fn dump_table( } fn column_names(conn: &Connection, table_name: &str) -> Result> { - let query = format!("PRAGMA table_info(\"{}\")", table_name.replace('"', "\"\"")); + let query = format!("PRAGMA table_info({})", sqlite_quoted_identifier(table_name)); let mut stmt = conn.prepare(&query)?; let names = stmt .query_map([], |row| { @@ -143,6 +141,10 @@ fn column_names(conn: &Connection, table_name: &str) -> Result> { Ok(names) } +fn sqlite_quoted_identifier(identifier: &str) -> String { + format!("\"{}\"", identifier.replace('"', "\"\"")) +} + fn write_value(out: &mut String, row: &rusqlite::Row<'_>, idx: usize) -> Result<()> { use rusqlite::types::ValueRef; match row.get_ref(idx)? { @@ -246,4 +248,23 @@ mod tests { assert!(sql.contains("'it''s a test'")); assert!(sql.contains("NULL")); } + + #[test] + fn escapes_quoted_table_names_in_generated_sql() { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + let conn = Connection::open(&path).unwrap(); + conn.execute_batch( + "CREATE TABLE \"odd\"\"name\" (id INTEGER PRIMARY KEY, val TEXT); + INSERT INTO \"odd\"\"name\" VALUES (1, 'secret');", + ) + .unwrap(); + + let results = extract_sqlite_contents(&path).unwrap(); + let sql = String::from_utf8_lossy(&results[0].1); + + assert!(sql.contains("INSERT INTO \"odd\"\"name\"")); + assert!(sql.contains("\"val\"")); + assert!(sql.contains("'secret'")); + } } diff --git a/tests/cli_git_clone_flags.rs b/tests/cli_git_clone_flags.rs index c18b9f4..ec9935d 100644 --- a/tests/cli_git_clone_flags.rs +++ b/tests/cli_git_clone_flags.rs @@ -96,10 +96,7 @@ fn positional_git_url_examples_parse() -> anyhow::Result<()> { ("github.com/kubernetes/kubernetes", "https://github.com/kubernetes/kubernetes"), ("https://github.com/org/repo", "https://github.com/org/repo"), ("gitlab.com/gitlab-org/gitlab", "https://gitlab.com/gitlab-org/gitlab"), - ( - "https://gitlab.com/namespace/project.git", - "https://gitlab.com/namespace/project.git", - ), + ("https://gitlab.com/namespace/project.git", "https://gitlab.com/namespace/project.git"), ]; for (input, expected) in examples { @@ -150,28 +147,3 @@ fn turbo_mode_applies_speed_first_defaults() -> anyhow::Result<()> { Ok(()) } - -#[test] -fn fast_alias_still_enables_turbo_mode() -> anyhow::Result<()> { - let args = CommandLineArgs::try_parse_from([ - "kingfisher", - "scan", - ".", - "--turbo", - "--no-update-check", - ])?; - - let command = match args.command { - Command::Scan(scan_args) => scan_args, - other => panic!("unexpected command parsed: {:?}", other), - }; - - let scan_args = match command.into_operation()? { - ScanOperation::Scan(scan_args) => scan_args, - op => panic!("expected scan operation, got {:?}", op), - }; - - assert!(scan_args.turbo); - - Ok(()) -}