From d28bbafcf60cf306e8d5de733d258f914caabbdf Mon Sep 17 00:00:00 2001 From: Mick Grove Date: Thu, 26 Feb 2026 23:25:50 -0700 Subject: [PATCH] Fix ADF text flattening --- src/jira.rs | 128 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 104 insertions(+), 24 deletions(-) diff --git a/src/jira.rs b/src/jira.rs index a8c05ac..f196617 100644 --- a/src/jira.rs +++ b/src/jira.rs @@ -15,30 +15,43 @@ pub use gouqi::Issue as JiraIssue; fn extract_adf_text(node: &serde_json::Value) -> String { match node { serde_json::Value::Object(map) => { - if map.get("type").and_then(|v| v.as_str()) == Some("text") { + let node_type = map.get("type").and_then(|v| v.as_str()); + if node_type == Some("text") { return map .get("text") .and_then(|v| v.as_str()) .unwrap_or("") .to_string(); } - map.get("content") - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .map(extract_adf_text) - .filter(|s| !s.is_empty()) - .collect::>() - .join(" ") - }) - .unwrap_or_default() + if node_type == Some("hardBreak") { + return "\n".to_string(); + } + + let mut text = String::new(); + if let Some(arr) = map.get("content").and_then(|v| v.as_array()) { + for child in arr { + text.push_str(&extract_adf_text(child)); + } + } + + if matches!( + node_type, + Some("paragraph" | "heading" | "blockquote" | "listItem" | "codeBlock" | "tableRow" | "table") + ) && !text.is_empty() + && !text.ends_with('\n') + { + text.push('\n'); + } + + text + } + serde_json::Value::Array(arr) => { + let mut text = String::new(); + for child in arr { + text.push_str(&extract_adf_text(child)); + } + text } - serde_json::Value::Array(arr) => arr - .iter() - .map(extract_adf_text) - .filter(|s| !s.is_empty()) - .collect::>() - .join(" "), _ => String::new(), } } @@ -95,9 +108,9 @@ pub async fn download_issues_to_dir( // Jira Cloud API v3 returns descriptions as Atlassian Document Format (ADF), // a nested JSON tree whose leaf text nodes contain the actual content. // Flatten ADF to a plain string so the secret scanner can match against it. - if let Some(desc) = issue_value.pointer("/fields/description").cloned() { - if is_adf(&desc) { - let plain_text = extract_adf_text(&desc); + if let Some(desc) = issue_value.pointer("/fields/description") { + if is_adf(desc) { + let plain_text = extract_adf_text(desc); if let Some(fields) = issue_value.pointer_mut("/fields") { fields["description"] = serde_json::Value::String(plain_text); } @@ -108,11 +121,15 @@ pub async fn download_issues_to_dir( if let Some(comments) = issue_value.pointer_mut("/fields/comment/comments") { if let Some(arr) = comments.as_array_mut() { for comment in arr.iter_mut() { - if let Some(body) = comment.get("body").cloned() { - if is_adf(&body) { - let plain_text = extract_adf_text(&body); - comment["body"] = serde_json::Value::String(plain_text); + let plain_text = comment.get("body").and_then(|body| { + if is_adf(body) { + Some(extract_adf_text(body)) + } else { + None } + }); + if let Some(plain_text) = plain_text { + comment["body"] = serde_json::Value::String(plain_text); } } } @@ -124,3 +141,66 @@ pub async fn download_issues_to_dir( } Ok(paths) } + +#[cfg(test)] +mod tests { + use super::{extract_adf_text, is_adf}; + use serde_json::json; + + #[test] + fn is_adf_detects_doc_root() { + let doc = json!({"type": "doc", "version": 1, "content": []}); + assert!(is_adf(&doc)); + assert!(!is_adf(&json!({"type": "paragraph"}))); + assert!(!is_adf(&json!("not-a-doc"))); + } + + #[test] + fn extract_adf_text_concatenates_adjacent_text_nodes() { + let value = json!({ + "type": "doc", + "version": 1, + "content": [{ + "type": "paragraph", + "content": [ + {"type": "text", "text": "sk-"}, + {"type": "text", "text": "proj-123"} + ] + }] + }); + let text = extract_adf_text(&value); + assert_eq!(text.trim_end(), "sk-proj-123"); + } + + #[test] + fn extract_adf_text_preserves_hard_breaks() { + let value = json!({ + "type": "doc", + "version": 1, + "content": [{ + "type": "paragraph", + "content": [ + {"type": "text", "text": "foo"}, + {"type": "hardBreak"}, + {"type": "text", "text": "bar"} + ] + }] + }); + let text = extract_adf_text(&value); + assert_eq!(text.trim_end(), "foo\nbar"); + } + + #[test] + fn extract_adf_text_adds_paragraph_separator() { + let value = json!({ + "type": "doc", + "version": 1, + "content": [ + {"type": "paragraph", "content": [{"type": "text", "text": "first"}]}, + {"type": "paragraph", "content": [{"type": "text", "text": "second"}]} + ] + }); + let text = extract_adf_text(&value); + assert_eq!(text.trim_end(), "first\nsecond"); + } +}