From d28bbafcf60cf306e8d5de733d258f914caabbdf Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Thu, 26 Feb 2026 23:25:50 -0700
Subject: [PATCH] Fix ADF text flattening

---
 src/jira.rs | 128 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 104 insertions(+), 24 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index a8c05ac..f196617 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -15,30 +15,43 @@ pub use gouqi::Issue as JiraIssue;
 fn extract_adf_text(node: &serde_json::Value) -> String {
     match node {
         serde_json::Value::Object(map) => {
-            if map.get("type").and_then(|v| v.as_str()) == Some("text") {
+            let node_type = map.get("type").and_then(|v| v.as_str());
+            if node_type == Some("text") {
                 return map
                     .get("text")
                     .and_then(|v| v.as_str())
                     .unwrap_or("")
                     .to_string();
             }
-            map.get("content")
-                .and_then(|v| v.as_array())
-                .map(|arr| {
-                    arr.iter()
-                        .map(extract_adf_text)
-                        .filter(|s| !s.is_empty())
-                        .collect::<Vec<_>>()
-                        .join(" ")
-                })
-                .unwrap_or_default()
+            if node_type == Some("hardBreak") {
+                return "\n".to_string();
+            }
+
+            let mut text = String::new();
+            if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
+                for child in arr {
+                    text.push_str(&extract_adf_text(child));
+                }
+            }
+
+            if matches!(
+                node_type,
+                Some("paragraph" | "heading" | "blockquote" | "listItem" | "codeBlock" | "tableRow" | "table")
+            ) && !text.is_empty()
+                && !text.ends_with('\n')
+            {
+                text.push('\n');
+            }
+
+            text
+        }
+        serde_json::Value::Array(arr) => {
+            let mut text = String::new();
+            for child in arr {
+                text.push_str(&extract_adf_text(child));
+            }
+            text
         }
-        serde_json::Value::Array(arr) => arr
-            .iter()
-            .map(extract_adf_text)
-            .filter(|s| !s.is_empty())
-            .collect::<Vec<_>>()
-            .join(" "),
         _ => String::new(),
     }
 }
@@ -95,9 +108,9 @@ pub async fn download_issues_to_dir(
         // Jira Cloud API v3 returns descriptions as Atlassian Document Format (ADF),
         // a nested JSON tree whose leaf text nodes contain the actual content.
         // Flatten ADF to a plain string so the secret scanner can match against it.
-        if let Some(desc) = issue_value.pointer("/fields/description").cloned() {
-            if is_adf(&desc) {
-                let plain_text = extract_adf_text(&desc);
+        if let Some(desc) = issue_value.pointer("/fields/description") {
+            if is_adf(desc) {
+                let plain_text = extract_adf_text(desc);
                 if let Some(fields) = issue_value.pointer_mut("/fields") {
                     fields["description"] = serde_json::Value::String(plain_text);
                 }
@@ -108,11 +121,15 @@ pub async fn download_issues_to_dir(
         if let Some(comments) = issue_value.pointer_mut("/fields/comment/comments") {
             if let Some(arr) = comments.as_array_mut() {
                 for comment in arr.iter_mut() {
-                    if let Some(body) = comment.get("body").cloned() {
-                        if is_adf(&body) {
-                            let plain_text = extract_adf_text(&body);
-                            comment["body"] = serde_json::Value::String(plain_text);
+                    let plain_text = comment.get("body").and_then(|body| {
+                        if is_adf(body) {
+                            Some(extract_adf_text(body))
+                        } else {
+                            None
                         }
+                    });
+                    if let Some(plain_text) = plain_text {
+                        comment["body"] = serde_json::Value::String(plain_text);
                     }
                 }
             }
@@ -124,3 +141,66 @@ pub async fn download_issues_to_dir(
     }
     Ok(paths)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{extract_adf_text, is_adf};
+    use serde_json::json;
+
+    #[test]
+    fn is_adf_detects_doc_root() {
+        let doc = json!({"type": "doc", "version": 1, "content": []});
+        assert!(is_adf(&doc));
+        assert!(!is_adf(&json!({"type": "paragraph"})));
+        assert!(!is_adf(&json!("not-a-doc")));
+    }
+
+    #[test]
+    fn extract_adf_text_concatenates_adjacent_text_nodes() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [{
+                "type": "paragraph",
+                "content": [
+                    {"type": "text", "text": "sk-"},
+                    {"type": "text", "text": "proj-123"}
+                ]
+            }]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "sk-proj-123");
+    }
+
+    #[test]
+    fn extract_adf_text_preserves_hard_breaks() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [{
+                "type": "paragraph",
+                "content": [
+                    {"type": "text", "text": "foo"},
+                    {"type": "hardBreak"},
+                    {"type": "text", "text": "bar"}
+                ]
+            }]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "foo\nbar");
+    }
+
+    #[test]
+    fn extract_adf_text_adds_paragraph_separator() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [
+                {"type": "paragraph", "content": [{"type": "text", "text": "first"}]},
+                {"type": "paragraph", "content": [{"type": "text", "text": "second"}]}
+            ]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "first\nsecond");
+    }
+}