From f680f31e545999c90103a9f461c7564939f56b98 Mon Sep 17 00:00:00 2001
From: Sorevitnod <puduville@gmail.com>
Date: Sat, 21 Feb 2026 19:08:46 +0000
Subject: [PATCH 1/8] Fix Jira ADF description not being scanned on Jira Cloud
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Jira Cloud API v3 returns issue descriptions and comment bodies in
Atlassian Document Format (ADF) — a nested JSON tree — rather than as
plain strings. Previously, the raw ADF object was serialized to disk
and the scanner found 0 bytes of meaningful content, producing no
findings even when secrets were present.

This commit adds `extract_adf_text()` which recursively walks the ADF
`content` tree and collects all leaf `"type":"text"` node values into a
single plain string. `download_issues_to_dir()` now replaces any ADF
`description` or comment `body` fields with their extracted plain text
before writing the issue JSON to disk.

Fixes #242

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/jira.rs | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 1 deletion(-)

diff --git a/src/jira.rs b/src/jira.rs
index 2fe0ef4..a8c05ac 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -6,6 +6,52 @@ use url::Url;
 
 // Re-export the Issue type from gouqi so callers don't depend on the crate.
 pub use gouqi::Issue as JiraIssue;
+
+/// Recursively extracts plain text from an Atlassian Document Format (ADF) node.
+///
+/// Jira Cloud API v3 returns issue descriptions as ADF — a nested JSON structure
+/// rather than a plain string. This function walks the content tree and collects
+/// all leaf `"type": "text"` node values so that secret scanners can find them.
+fn extract_adf_text(node: &serde_json::Value) -> String {
+    match node {
+        serde_json::Value::Object(map) => {
+            if map.get("type").and_then(|v| v.as_str()) == Some("text") {
+                return map
+                    .get("text")
+                    .and_then(|v| v.as_str())
+                    .unwrap_or("")
+                    .to_string();
+            }
+            map.get("content")
+                .and_then(|v| v.as_array())
+                .map(|arr| {
+                    arr.iter()
+                        .map(extract_adf_text)
+                        .filter(|s| !s.is_empty())
+                        .collect::<Vec<_>>()
+                        .join(" ")
+                })
+                .unwrap_or_default()
+        }
+        serde_json::Value::Array(arr) => arr
+            .iter()
+            .map(extract_adf_text)
+            .filter(|s| !s.is_empty())
+            .collect::<Vec<_>>()
+            .join(" "),
+        _ => String::new(),
+    }
+}
+
+/// Returns true if the value looks like an ADF document root.
+fn is_adf(value: &serde_json::Value) -> bool {
+    value
+        .get("type")
+        .and_then(|v| v.as_str())
+        .map(|t| t == "doc")
+        .unwrap_or(false)
+}
+
 pub async fn fetch_issues(
     jira_url: Url,
     jql: &str,
@@ -44,8 +90,36 @@ pub async fn download_issues_to_dir(
     let issues = fetch_issues(jira_url, jql, max_results, ignore_certs).await?;
     let mut paths = Vec::new();
     for issue in issues {
+        let mut issue_value = serde_json::to_value(&issue)?;
+
+        // Jira Cloud API v3 returns descriptions as Atlassian Document Format (ADF),
+        // a nested JSON tree whose leaf text nodes contain the actual content.
+        // Flatten ADF to a plain string so the secret scanner can match against it.
+        if let Some(desc) = issue_value.pointer("/fields/description").cloned() {
+            if is_adf(&desc) {
+                let plain_text = extract_adf_text(&desc);
+                if let Some(fields) = issue_value.pointer_mut("/fields") {
+                    fields["description"] = serde_json::Value::String(plain_text);
+                }
+            }
+        }
+
+        // Apply the same ADF flattening to comment bodies.
+        if let Some(comments) = issue_value.pointer_mut("/fields/comment/comments") {
+            if let Some(arr) = comments.as_array_mut() {
+                for comment in arr.iter_mut() {
+                    if let Some(body) = comment.get("body").cloned() {
+                        if is_adf(&body) {
+                            let plain_text = extract_adf_text(&body);
+                            comment["body"] = serde_json::Value::String(plain_text);
+                        }
+                    }
+                }
+            }
+        }
+
         let file = output_dir.join(format!("{}.json", issue.key));
-        std::fs::write(&file, serde_json::to_vec(&issue)?)?;
+        std::fs::write(&file, serde_json::to_vec(&issue_value)?)?;
         paths.push(file);
     }
     Ok(paths)

From d28bbafcf60cf306e8d5de733d258f914caabbdf Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Thu, 26 Feb 2026 23:25:50 -0700
Subject: [PATCH 2/8] Fix ADF text flattening

---
 src/jira.rs | 128 ++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 104 insertions(+), 24 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index a8c05ac..f196617 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -15,30 +15,43 @@ pub use gouqi::Issue as JiraIssue;
 fn extract_adf_text(node: &serde_json::Value) -> String {
     match node {
         serde_json::Value::Object(map) => {
-            if map.get("type").and_then(|v| v.as_str()) == Some("text") {
+            let node_type = map.get("type").and_then(|v| v.as_str());
+            if node_type == Some("text") {
                 return map
                     .get("text")
                     .and_then(|v| v.as_str())
                     .unwrap_or("")
                     .to_string();
             }
-            map.get("content")
-                .and_then(|v| v.as_array())
-                .map(|arr| {
-                    arr.iter()
-                        .map(extract_adf_text)
-                        .filter(|s| !s.is_empty())
-                        .collect::<Vec<_>>()
-                        .join(" ")
-                })
-                .unwrap_or_default()
+            if node_type == Some("hardBreak") {
+                return "\n".to_string();
+            }
+
+            let mut text = String::new();
+            if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
+                for child in arr {
+                    text.push_str(&extract_adf_text(child));
+                }
+            }
+
+            if matches!(
+                node_type,
+                Some("paragraph" | "heading" | "blockquote" | "listItem" | "codeBlock" | "tableRow" | "table")
+            ) && !text.is_empty()
+                && !text.ends_with('\n')
+            {
+                text.push('\n');
+            }
+
+            text
+        }
+        serde_json::Value::Array(arr) => {
+            let mut text = String::new();
+            for child in arr {
+                text.push_str(&extract_adf_text(child));
+            }
+            text
         }
-        serde_json::Value::Array(arr) => arr
-            .iter()
-            .map(extract_adf_text)
-            .filter(|s| !s.is_empty())
-            .collect::<Vec<_>>()
-            .join(" "),
         _ => String::new(),
     }
 }
@@ -95,9 +108,9 @@ pub async fn download_issues_to_dir(
         // Jira Cloud API v3 returns descriptions as Atlassian Document Format (ADF),
         // a nested JSON tree whose leaf text nodes contain the actual content.
         // Flatten ADF to a plain string so the secret scanner can match against it.
-        if let Some(desc) = issue_value.pointer("/fields/description").cloned() {
-            if is_adf(&desc) {
-                let plain_text = extract_adf_text(&desc);
+        if let Some(desc) = issue_value.pointer("/fields/description") {
+            if is_adf(desc) {
+                let plain_text = extract_adf_text(desc);
                 if let Some(fields) = issue_value.pointer_mut("/fields") {
                     fields["description"] = serde_json::Value::String(plain_text);
                 }
@@ -108,11 +121,15 @@ pub async fn download_issues_to_dir(
         if let Some(comments) = issue_value.pointer_mut("/fields/comment/comments") {
             if let Some(arr) = comments.as_array_mut() {
                 for comment in arr.iter_mut() {
-                    if let Some(body) = comment.get("body").cloned() {
-                        if is_adf(&body) {
-                            let plain_text = extract_adf_text(&body);
-                            comment["body"] = serde_json::Value::String(plain_text);
+                    let plain_text = comment.get("body").and_then(|body| {
+                        if is_adf(body) {
+                            Some(extract_adf_text(body))
+                        } else {
+                            None
                         }
+                    });
+                    if let Some(plain_text) = plain_text {
+                        comment["body"] = serde_json::Value::String(plain_text);
                     }
                 }
             }
@@ -124,3 +141,66 @@ pub async fn download_issues_to_dir(
     }
     Ok(paths)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{extract_adf_text, is_adf};
+    use serde_json::json;
+
+    #[test]
+    fn is_adf_detects_doc_root() {
+        let doc = json!({"type": "doc", "version": 1, "content": []});
+        assert!(is_adf(&doc));
+        assert!(!is_adf(&json!({"type": "paragraph"})));
+        assert!(!is_adf(&json!("not-a-doc")));
+    }
+
+    #[test]
+    fn extract_adf_text_concatenates_adjacent_text_nodes() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [{
+                "type": "paragraph",
+                "content": [
+                    {"type": "text", "text": "sk-"},
+                    {"type": "text", "text": "proj-123"}
+                ]
+            }]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "sk-proj-123");
+    }
+
+    #[test]
+    fn extract_adf_text_preserves_hard_breaks() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [{
+                "type": "paragraph",
+                "content": [
+                    {"type": "text", "text": "foo"},
+                    {"type": "hardBreak"},
+                    {"type": "text", "text": "bar"}
+                ]
+            }]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "foo\nbar");
+    }
+
+    #[test]
+    fn extract_adf_text_adds_paragraph_separator() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [
+                {"type": "paragraph", "content": [{"type": "text", "text": "first"}]},
+                {"type": "paragraph", "content": [{"type": "text", "text": "second"}]}
+            ]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "first\nsecond");
+    }
+}

From fafe89199614b086e83fb455edb9bb1187dc881b Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Thu, 26 Feb 2026 23:42:32 -0700
Subject: [PATCH 3/8] Add ADF extraction test coverage

---
 src/jira.rs | 180 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 150 insertions(+), 30 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index f196617..eab4d63 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -65,6 +65,38 @@ fn is_adf(value: &serde_json::Value) -> bool {
         .unwrap_or(false)
 }
 
+fn flatten_adf_fields(issue_value: &mut serde_json::Value) {
+    // Jira Cloud API v3 returns descriptions as Atlassian Document Format (ADF),
+    // a nested JSON tree whose leaf text nodes contain the actual content.
+    // Flatten ADF to a plain string so the secret scanner can match against it.
+    if let Some(desc) = issue_value.pointer("/fields/description") {
+        if is_adf(desc) {
+            let plain_text = extract_adf_text(desc);
+            if let Some(fields) = issue_value.pointer_mut("/fields") {
+                fields["description"] = serde_json::Value::String(plain_text);
+            }
+        }
+    }
+
+    // Apply the same ADF flattening to comment bodies.
+    if let Some(comments) = issue_value.pointer_mut("/fields/comment/comments") {
+        if let Some(arr) = comments.as_array_mut() {
+            for comment in arr.iter_mut() {
+                let plain_text = comment.get("body").and_then(|body| {
+                    if is_adf(body) {
+                        Some(extract_adf_text(body))
+                    } else {
+                        None
+                    }
+                });
+                if let Some(plain_text) = plain_text {
+                    comment["body"] = serde_json::Value::String(plain_text);
+                }
+            }
+        }
+    }
+}
+
 pub async fn fetch_issues(
     jira_url: Url,
     jql: &str,
@@ -105,35 +137,7 @@ pub async fn download_issues_to_dir(
     for issue in issues {
         let mut issue_value = serde_json::to_value(&issue)?;
 
-        // Jira Cloud API v3 returns descriptions as Atlassian Document Format (ADF),
-        // a nested JSON tree whose leaf text nodes contain the actual content.
-        // Flatten ADF to a plain string so the secret scanner can match against it.
-        if let Some(desc) = issue_value.pointer("/fields/description") {
-            if is_adf(desc) {
-                let plain_text = extract_adf_text(desc);
-                if let Some(fields) = issue_value.pointer_mut("/fields") {
-                    fields["description"] = serde_json::Value::String(plain_text);
-                }
-            }
-        }
-
-        // Apply the same ADF flattening to comment bodies.
-        if let Some(comments) = issue_value.pointer_mut("/fields/comment/comments") {
-            if let Some(arr) = comments.as_array_mut() {
-                for comment in arr.iter_mut() {
-                    let plain_text = comment.get("body").and_then(|body| {
-                        if is_adf(body) {
-                            Some(extract_adf_text(body))
-                        } else {
-                            None
-                        }
-                    });
-                    if let Some(plain_text) = plain_text {
-                        comment["body"] = serde_json::Value::String(plain_text);
-                    }
-                }
-            }
-        }
+        flatten_adf_fields(&mut issue_value);
 
         let file = output_dir.join(format!("{}.json", issue.key));
         std::fs::write(&file, serde_json::to_vec(&issue_value)?)?;
@@ -144,7 +148,7 @@ pub async fn download_issues_to_dir(
 
 #[cfg(test)]
 mod tests {
-    use super::{extract_adf_text, is_adf};
+    use super::{extract_adf_text, flatten_adf_fields, is_adf};
     use serde_json::json;
 
     #[test]
@@ -203,4 +207,120 @@ mod tests {
         let text = extract_adf_text(&value);
         assert_eq!(text.trim_end(), "first\nsecond");
     }
+
+    #[test]
+    fn extract_adf_text_returns_empty_for_non_adf_values() {
+        let value = json!("plain description string");
+        let text = extract_adf_text(&value);
+        assert_eq!(text, "");
+
+        let number_value = json!(42);
+        let number_text = extract_adf_text(&number_value);
+        assert_eq!(number_text, "");
+
+        let null_value = json!(null);
+        let null_text = extract_adf_text(&null_value);
+        assert_eq!(null_text, "");
+    }
+
+    #[test]
+    fn extract_adf_text_handles_missing_content_fields() {
+        let doc_without_content = json!({
+            "type": "doc",
+            "version": 1
+        });
+        let text = extract_adf_text(&doc_without_content);
+        assert_eq!(text, "");
+
+        let paragraph_without_content = json!({
+            "type": "paragraph"
+        });
+        let para_text = extract_adf_text(&paragraph_without_content);
+        assert_eq!(para_text, "");
+    }
+
+    #[test]
+    fn extract_adf_text_handles_empty_doc() {
+        let empty_doc = json!({
+            "type": "doc",
+            "version": 1,
+            "content": []
+        });
+        let text = extract_adf_text(&empty_doc);
+        assert_eq!(text, "");
+    }
+
+    #[test]
+    fn extract_adf_text_handles_lists_and_code_blocks() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [
+                {
+                    "type": "bulletList",
+                    "content": [
+                        {
+                            "type": "listItem",
+                            "content": [{
+                                "type": "paragraph",
+                                "content": [{"type": "text", "text": "item1"}]
+                            }]
+                        },
+                        {
+                            "type": "listItem",
+                            "content": [{
+                                "type": "paragraph",
+                                "content": [{"type": "text", "text": "item2"}]
+                            }]
+                        }
+                    ]
+                },
+                {
+                    "type": "codeBlock",
+                    "content": [{"type": "text", "text": "code"}]
+                }
+            ]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "item1\nitem2\ncode");
+    }
+
+    #[test]
+    fn flatten_adf_fields_converts_comment_bodies() {
+        let mut issue_value = json!({
+            "fields": {
+                "comment": {
+                    "comments": [
+                        {
+                            "body": {
+                                "type": "doc",
+                                "version": 1,
+                                "content": [{
+                                    "type": "paragraph",
+                                    "content": [{"type": "text", "text": "secret"}]
+                                }]
+                            }
+                        }
+                    ]
+                }
+            }
+        });
+        flatten_adf_fields(&mut issue_value);
+        let body = issue_value
+            .pointer("/fields/comment/comments/0/body")
+            .and_then(|v| v.as_str())
+            .unwrap_or("");
+        assert_eq!(body, "secret");
+    }
+
+    #[test]
+    fn flatten_adf_fields_handles_missing_description() {
+        let mut issue_value = json!({
+            "fields": {
+                "summary": "no description here"
+            }
+        });
+        flatten_adf_fields(&mut issue_value);
+        assert!(issue_value.pointer("/fields/description").is_none());
+    }
 }

From 7340ae5051410caeb9edfeea068ad7eb8c095add Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Fri, 27 Feb 2026 21:23:19 -0700
Subject: [PATCH 4/8] Refine ADF flattening and tests

---
 src/jira.rs | 108 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 95 insertions(+), 13 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index eab4d63..0884588 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -27,12 +27,15 @@ fn extract_adf_text(node: &serde_json::Value) -> String {
                 return "\n".to_string();
             }
 
-            let mut text = String::new();
-            if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
-                for child in arr {
-                    text.push_str(&extract_adf_text(child));
+            let text = if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
+                match node_type {
+                    Some("table") => join_children_with_separator(arr, "\n"),
+                    Some("tableRow") => join_children_with_separator(arr, " "),
+                    _ => concat_children(arr),
                 }
-            }
+            } else {
+                String::new()
+            };
 
             if matches!(
                 node_type,
@@ -46,16 +49,45 @@ fn extract_adf_text(node: &serde_json::Value) -> String {
             text
         }
         serde_json::Value::Array(arr) => {
-            let mut text = String::new();
-            for child in arr {
-                text.push_str(&extract_adf_text(child));
-            }
-            text
+            concat_children(arr)
         }
         _ => String::new(),
     }
 }
 
+fn concat_children(arr: &[serde_json::Value]) -> String {
+    let mut text = String::new();
+    for child in arr {
+        text.push_str(&extract_adf_text(child));
+    }
+    text
+}
+
+fn join_children_with_separator(arr: &[serde_json::Value], separator: &str) -> String {
+    let mut text = String::new();
+    for child in arr {
+        let child_text = extract_adf_text(child);
+        if child_text.is_empty() {
+            continue;
+        }
+        let needs_separator = text
+            .chars()
+            .last()
+            .map(|c| !c.is_whitespace())
+            .unwrap_or(false)
+            && child_text
+                .chars()
+                .next()
+                .map(|c| !c.is_whitespace())
+                .unwrap_or(false);
+        if needs_separator {
+            text.push_str(separator);
+        }
+        text.push_str(&child_text);
+    }
+    text
+}
+
 /// Returns true if the value looks like an ADF document root.
 fn is_adf(value: &serde_json::Value) -> bool {
     value
@@ -72,8 +104,14 @@ fn flatten_adf_fields(issue_value: &mut serde_json::Value) {
     if let Some(desc) = issue_value.pointer("/fields/description") {
         if is_adf(desc) {
             let plain_text = extract_adf_text(desc);
-            if let Some(fields) = issue_value.pointer_mut("/fields") {
-                fields["description"] = serde_json::Value::String(plain_text);
+            if let Some(fields) = issue_value
+                .pointer_mut("/fields")
+                .and_then(|value| value.as_object_mut())
+            {
+                fields.insert(
+                    "description".to_string(),
+                    serde_json::Value::String(plain_text.trim_end_matches('\n').to_string()),
+                );
             }
         }
     }
@@ -90,7 +128,14 @@ fn flatten_adf_fields(issue_value: &mut serde_json::Value) {
                     }
                 });
                 if let Some(plain_text) = plain_text {
-                    comment["body"] = serde_json::Value::String(plain_text);
+                    if let Some(comment_obj) = comment.as_object_mut() {
+                        comment_obj.insert(
+                            "body".to_string(),
+                            serde_json::Value::String(
+                                plain_text.trim_end_matches('\n').to_string(),
+                            ),
+                        );
+                    }
                 }
             }
         }
@@ -313,6 +358,43 @@ mod tests {
         assert_eq!(body, "secret");
     }
 
+    #[test]
+    fn flatten_adf_fields_converts_description() {
+        let mut issue_value = json!({
+            "fields": {
+                "description": {
+                    "type": "doc",
+                    "version": 1,
+                    "content": [{
+                        "type": "paragraph",
+                        "content": [{"type": "text", "text": "desc"}]
+                    }]
+                }
+            }
+        });
+        flatten_adf_fields(&mut issue_value);
+        let desc = issue_value
+            .pointer("/fields/description")
+            .and_then(|v| v.as_str())
+            .unwrap_or("");
+        assert_eq!(desc, "desc");
+    }
+
+    #[test]
+    fn flatten_adf_fields_leaves_plain_description() {
+        let mut issue_value = json!({
+            "fields": {
+                "description": "plain description"
+            }
+        });
+        flatten_adf_fields(&mut issue_value);
+        let desc = issue_value
+            .pointer("/fields/description")
+            .and_then(|v| v.as_str())
+            .unwrap_or("");
+        assert_eq!(desc, "plain description");
+    }
+
     #[test]
     fn flatten_adf_fields_handles_missing_description() {
         let mut issue_value = json!({

From 5d9a83ca72812f8b78759df2570f772616ab8480 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Fri, 27 Feb 2026 21:31:31 -0700
Subject: [PATCH 5/8] Optimize ADF join separator

---
 src/jira.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index 0884588..c170706 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -65,25 +65,25 @@ fn concat_children(arr: &[serde_json::Value]) -> String {
 
 fn join_children_with_separator(arr: &[serde_json::Value], separator: &str) -> String {
     let mut text = String::new();
+    let mut last_was_whitespace = true;
     for child in arr {
         let child_text = extract_adf_text(child);
         if child_text.is_empty() {
             continue;
         }
-        let needs_separator = text
+        let child_starts_non_whitespace = child_text
             .chars()
-            .last()
+            .next()
             .map(|c| !c.is_whitespace())
-            .unwrap_or(false)
-            && child_text
-                .chars()
-                .next()
-                .map(|c| !c.is_whitespace())
-                .unwrap_or(false);
+            .unwrap_or(false);
+        let needs_separator = !last_was_whitespace && child_starts_non_whitespace;
         if needs_separator {
             text.push_str(separator);
         }
         text.push_str(&child_text);
+        if let Some(last_char) = child_text.chars().rev().next() {
+            last_was_whitespace = last_char.is_whitespace();
+        }
     }
     text
 }

From b11a69610f46568bb3146b2dbddb467968d05627 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Fri, 27 Feb 2026 21:33:20 -0700
Subject: [PATCH 6/8] Fix mutable text in ADF extractor

---
 src/jira.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jira.rs b/src/jira.rs
index c170706..bc3037e 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -27,7 +27,7 @@ fn extract_adf_text(node: &serde_json::Value) -> String {
                 return "\n".to_string();
             }
 
-            let text = if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
+            let mut text = if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
                 match node_type {
                     Some("table") => join_children_with_separator(arr, "\n"),
                     Some("tableRow") => join_children_with_separator(arr, " "),

From e3c4d41695f9b207f90185000a329c0b771d5638 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Sat, 28 Feb 2026 09:51:33 -0700
Subject: [PATCH 7/8] Make ADF extraction iterative

---
 src/jira.rs | 145 ++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 100 insertions(+), 45 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index bc3037e..40af99e 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -13,61 +13,116 @@ pub use gouqi::Issue as JiraIssue;
 /// rather than a plain string. This function walks the content tree and collects
 /// all leaf `"type": "text"` node values so that secret scanners can find them.
 fn extract_adf_text(node: &serde_json::Value) -> String {
-    match node {
-        serde_json::Value::Object(map) => {
-            let node_type = map.get("type").and_then(|v| v.as_str());
-            if node_type == Some("text") {
-                return map
-                    .get("text")
-                    .and_then(|v| v.as_str())
-                    .unwrap_or("")
-                    .to_string();
-            }
-            if node_type == Some("hardBreak") {
-                return "\n".to_string();
-            }
+    enum FrameState {
+        Enter,
+        Exit {
+            node_type: Option<String>,
+            child_count: usize,
+        },
+    }
 
-            let mut text = if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
-                match node_type {
-                    Some("table") => join_children_with_separator(arr, "\n"),
-                    Some("tableRow") => join_children_with_separator(arr, " "),
-                    _ => concat_children(arr),
+    struct Frame<'a> {
+        node: &'a serde_json::Value,
+        state: FrameState,
+    }
+
+    let mut stack = vec![Frame {
+        node,
+        state: FrameState::Enter,
+    }];
+    let mut values: Vec<String> = Vec::new();
+
+    while let Some(frame) = stack.pop() {
+        match frame.state {
+            FrameState::Enter => match frame.node {
+                serde_json::Value::Object(map) => {
+                    let node_type = map.get("type").and_then(|v| v.as_str());
+                    if node_type == Some("text") {
+                        values.push(
+                            map.get("text")
+                                .and_then(|v| v.as_str())
+                                .unwrap_or("")
+                                .to_string(),
+                        );
+                        continue;
+                    }
+                    if node_type == Some("hardBreak") {
+                        values.push("\n".to_string());
+                        continue;
+                    }
+
+                    let child_count = map
+                        .get("content")
+                        .and_then(|v| v.as_array())
+                        .map(|arr| arr.len())
+                        .unwrap_or(0);
+                    stack.push(Frame {
+                        node: frame.node,
+                        state: FrameState::Exit {
+                            node_type: node_type.map(|value| value.to_string()),
+                            child_count,
+                        },
+                    });
+                    if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
+                        for child in arr.iter().rev() {
+                            stack.push(Frame {
+                                node: child,
+                                state: FrameState::Enter,
+                            });
+                        }
+                    }
                 }
-            } else {
-                String::new()
-            };
-
-            if matches!(
+                serde_json::Value::Array(arr) => {
+                    let child_count = arr.len();
+                    stack.push(Frame {
+                        node: frame.node,
+                        state: FrameState::Exit {
+                            node_type: None,
+                            child_count,
+                        },
+                    });
+                    for child in arr.iter().rev() {
+                        stack.push(Frame {
+                            node: child,
+                            state: FrameState::Enter,
+                        });
+                    }
+                }
+                _ => values.push(String::new()),
+            },
+            FrameState::Exit {
                 node_type,
-                Some("paragraph" | "heading" | "blockquote" | "listItem" | "codeBlock" | "tableRow" | "table")
-            ) && !text.is_empty()
-                && !text.ends_with('\n')
-            {
-                text.push('\n');
+                child_count,
+            } => {
+                let start = values.len().saturating_sub(child_count);
+                let child_texts = values.split_off(start);
+                let mut text = match node_type.as_deref() {
+                    Some("table") => join_texts_with_separator(child_texts, "\n"),
+                    Some("tableRow") => join_texts_with_separator(child_texts, " "),
+                    _ => child_texts.concat(),
+                };
+
+                if matches!(
+                    node_type.as_deref(),
+                    Some("paragraph" | "heading" | "blockquote" | "listItem" | "codeBlock" | "tableRow" | "table")
+                ) && !text.is_empty()
+                    && !text.ends_with('\n')
+                {
+                    text.push('\n');
+                }
+
+                values.push(text);
             }
-
-            text
         }
-        serde_json::Value::Array(arr) => {
-            concat_children(arr)
-        }
-        _ => String::new(),
     }
+
+    values.pop().unwrap_or_default()
 }
 
-fn concat_children(arr: &[serde_json::Value]) -> String {
-    let mut text = String::new();
-    for child in arr {
-        text.push_str(&extract_adf_text(child));
-    }
-    text
-}
-
-fn join_children_with_separator(arr: &[serde_json::Value], separator: &str) -> String {
+fn join_texts_with_separator(child_texts: Vec<String>, separator: &str) -> String {
     let mut text = String::new();
     let mut last_was_whitespace = true;
-    for child in arr {
-        let child_text = extract_adf_text(child);
+    for child_text in child_texts {
         if child_text.is_empty() {
             continue;
         }

From e24964343a1ab7d70306fd5f31f964cd32d4ebc1 Mon Sep 17 00:00:00 2001
From: Mick Grove <mick.grove@mongodb.com>
Date: Sat, 28 Feb 2026 10:24:14 -0700
Subject: [PATCH 8/8] Optimize Jira ADF text extraction

---
 src/jira.rs | 305 ++++++++++++++++++++++++++++++----------------------
 1 file changed, 177 insertions(+), 128 deletions(-)

diff --git a/src/jira.rs b/src/jira.rs
index 40af99e..95d0a96 100644
--- a/src/jira.rs
+++ b/src/jira.rs
@@ -10,146 +10,182 @@ pub use gouqi::Issue as JiraIssue;
 /// Recursively extracts plain text from an Atlassian Document Format (ADF) node.
 ///
 /// Jira Cloud API v3 returns issue descriptions as ADF — a nested JSON structure
-/// rather than a plain string. This function walks the content tree and collects
-/// all leaf `"type": "text"` node values so that secret scanners can find them.
+/// rather than a plain string. This function walks the content tree and writes
+/// leaf `"type": "text"` node values into a single output buffer so extraction
+/// remains linear in the size of the final text.
 fn extract_adf_text(node: &serde_json::Value) -> String {
-    enum FrameState {
-        Enter,
-        Exit {
-            node_type: Option<String>,
-            child_count: usize,
-        },
+    struct PendingSeparator<'a> {
+        separator: &'a str,
+        previous_ended_whitespace: bool,
     }
 
-    struct Frame<'a> {
-        node: &'a serde_json::Value,
-        state: FrameState,
+    struct TextAccumulator {
+        text: String,
+        last_char_is_whitespace: bool,
     }
 
-    let mut stack = vec![Frame {
-        node,
-        state: FrameState::Enter,
-    }];
-    let mut values: Vec<String> = Vec::new();
+    impl TextAccumulator {
+        fn new() -> Self {
+            Self { text: String::new(), last_char_is_whitespace: true }
+        }
 
-    while let Some(frame) = stack.pop() {
-        match frame.state {
-            FrameState::Enter => match frame.node {
-                serde_json::Value::Object(map) => {
-                    let node_type = map.get("type").and_then(|v| v.as_str());
-                    if node_type == Some("text") {
-                        values.push(
-                            map.get("text")
-                                .and_then(|v| v.as_str())
-                                .unwrap_or("")
-                                .to_string(),
-                        );
-                        continue;
-                    }
-                    if node_type == Some("hardBreak") {
-                        values.push("\n".to_string());
-                        continue;
-                    }
+        fn len(&self) -> usize {
+            self.text.len()
+        }
 
-                    let child_count = map
-                        .get("content")
-                        .and_then(|v| v.as_array())
-                        .map(|arr| arr.len())
-                        .unwrap_or(0);
-                    stack.push(Frame {
-                        node: frame.node,
-                        state: FrameState::Exit {
-                            node_type: node_type.map(|value| value.to_string()),
-                            child_count,
-                        },
-                    });
-                    if let Some(arr) = map.get("content").and_then(|v| v.as_array()) {
-                        for child in arr.iter().rev() {
-                            stack.push(Frame {
-                                node: child,
-                                state: FrameState::Enter,
-                            });
+        fn ends_with_newline(&self) -> bool {
+            self.text.ends_with('\n')
+        }
+
+        fn last_char_is_whitespace(&self) -> bool {
+            self.last_char_is_whitespace
+        }
+
+        fn write_text(
+            &mut self,
+            text: &str,
+            pending_separator: &mut Option<PendingSeparator<'_>>,
+        ) -> bool {
+            if text.is_empty() {
+                return false;
+            }
+
+            if let Some(pending_separator) = pending_separator.take() {
+                let starts_non_whitespace =
+                    text.chars().next().map(|ch| !ch.is_whitespace()).unwrap_or(false);
+                if !pending_separator.previous_ended_whitespace && starts_non_whitespace {
+                    self.text.push_str(pending_separator.separator);
+                    if let Some(last_char) = pending_separator.separator.chars().last() {
+                        self.last_char_is_whitespace = last_char.is_whitespace();
+                    }
+                }
+            }
+
+            self.text.push_str(text);
+            if let Some(last_char) = text.chars().last() {
+                self.last_char_is_whitespace = last_char.is_whitespace();
+            }
+            true
+        }
+
+        fn write_char(
+            &mut self,
+            ch: char,
+            pending_separator: &mut Option<PendingSeparator<'_>>,
+        ) -> bool {
+            if let Some(pending_separator) = pending_separator.take() {
+                if !pending_separator.previous_ended_whitespace && !ch.is_whitespace() {
+                    self.text.push_str(pending_separator.separator);
+                    if let Some(last_char) = pending_separator.separator.chars().last() {
+                        self.last_char_is_whitespace = last_char.is_whitespace();
+                    }
+                }
+            }
+
+            self.text.push(ch);
+            self.last_char_is_whitespace = ch.is_whitespace();
+            true
+        }
+    }
+
+    fn write_adf_text(
+        node: &serde_json::Value,
+        output: &mut TextAccumulator,
+        pending_separator: &mut Option<PendingSeparator<'_>>,
+    ) -> bool {
+        match node {
+            serde_json::Value::Object(map) => {
+                let node_type = map.get("type").and_then(|v| v.as_str());
+                if node_type == Some("text") {
+                    return output.write_text(
+                        map.get("text").and_then(|v| v.as_str()).unwrap_or(""),
+                        pending_separator,
+                    );
+                }
+                if node_type == Some("hardBreak") {
+                    return output.write_char('\n', pending_separator);
+                }
+
+                let start_len = output.len();
+                if let Some(children) = map.get("content").and_then(|v| v.as_array()) {
+                    let separator = match node_type {
+                        Some("table") => Some("\n"),
+                        Some("tableRow") => Some(" "),
+                        _ => None,
+                    };
+                    let mut wrote_child_text = false;
+                    let mut previous_ended_whitespace = true;
+                    for child in children {
+                        let mut child_pending_separator = if wrote_child_text {
+                            separator.map(|separator| PendingSeparator {
+                                separator,
+                                previous_ended_whitespace,
+                            })
+                        } else {
+                            pending_separator.take()
+                        };
+                        let child_wrote_text =
+                            write_adf_text(child, output, &mut child_pending_separator);
+                        if !wrote_child_text && !child_wrote_text {
+                            *pending_separator = child_pending_separator;
+                        }
+                        if child_wrote_text {
+                            wrote_child_text = true;
+                            previous_ended_whitespace = output.last_char_is_whitespace();
                         }
                     }
                 }
-                serde_json::Value::Array(arr) => {
-                    let child_count = arr.len();
-                    stack.push(Frame {
-                        node: frame.node,
-                        state: FrameState::Exit {
-                            node_type: None,
-                            child_count,
-                        },
-                    });
-                    for child in arr.iter().rev() {
-                        stack.push(Frame {
-                            node: child,
-                            state: FrameState::Enter,
-                        });
-                    }
-                }
-                _ => values.push(String::new()),
-            },
-            FrameState::Exit {
-                node_type,
-                child_count,
-            } => {
-                let start = values.len().saturating_sub(child_count);
-                let child_texts = values.split_off(start);
-                let mut text = match node_type.as_deref() {
-                    Some("table") => join_texts_with_separator(child_texts, "\n"),
-                    Some("tableRow") => join_texts_with_separator(child_texts, " "),
-                    _ => child_texts.concat(),
-                };
 
                 if matches!(
-                    node_type.as_deref(),
-                    Some("paragraph" | "heading" | "blockquote" | "listItem" | "codeBlock" | "tableRow" | "table")
-                ) && !text.is_empty()
-                    && !text.ends_with('\n')
+                    node_type,
+                    Some(
+                        "paragraph"
+                            | "heading"
+                            | "blockquote"
+                            | "listItem"
+                            | "codeBlock"
+                            | "tableRow"
+                            | "table"
+                    )
+                ) && output.len() > start_len
+                    && !output.ends_with_newline()
                 {
-                    text.push('\n');
+                    output.text.push('\n');
+                    output.last_char_is_whitespace = true;
                 }
 
-                values.push(text);
+                output.len() > start_len
             }
+            serde_json::Value::Array(arr) => {
+                let start_len = output.len();
+                let mut wrote_child_text = false;
+                for child in arr {
+                    let mut child_pending_separator =
+                        if wrote_child_text { None } else { pending_separator.take() };
+                    let child_wrote_text =
+                        write_adf_text(child, output, &mut child_pending_separator);
+                    if !wrote_child_text && !child_wrote_text {
+                        *pending_separator = child_pending_separator;
+                    }
+                    if child_wrote_text {
+                        wrote_child_text = true;
+                    }
+                }
+                output.len() > start_len
+            }
+            _ => false,
         }
     }
 
-    values.pop().unwrap_or_default()
-}
-
-fn join_texts_with_separator(child_texts: Vec<String>, separator: &str) -> String {
-    let mut text = String::new();
-    let mut last_was_whitespace = true;
-    for child_text in child_texts {
-        if child_text.is_empty() {
-            continue;
-        }
-        let child_starts_non_whitespace = child_text
-            .chars()
-            .next()
-            .map(|c| !c.is_whitespace())
-            .unwrap_or(false);
-        let needs_separator = !last_was_whitespace && child_starts_non_whitespace;
-        if needs_separator {
-            text.push_str(separator);
-        }
-        text.push_str(&child_text);
-        if let Some(last_char) = child_text.chars().rev().next() {
-            last_was_whitespace = last_char.is_whitespace();
-        }
-    }
-    text
+    let mut output = TextAccumulator::new();
+    let mut pending_separator = None;
+    write_adf_text(node, &mut output, &mut pending_separator);
+    output.text
 }
 
 /// Returns true if the value looks like an ADF document root.
 fn is_adf(value: &serde_json::Value) -> bool {
-    value
-        .get("type")
-        .and_then(|v| v.as_str())
-        .map(|t| t == "doc")
-        .unwrap_or(false)
+    value.get("type").and_then(|v| v.as_str()).map(|t| t == "doc").unwrap_or(false)
 }
 
 fn flatten_adf_fields(issue_value: &mut serde_json::Value) {
@@ -159,9 +195,8 @@ fn flatten_adf_fields(issue_value: &mut serde_json::Value) {
     if let Some(desc) = issue_value.pointer("/fields/description") {
         if is_adf(desc) {
             let plain_text = extract_adf_text(desc);
-            if let Some(fields) = issue_value
-                .pointer_mut("/fields")
-                .and_then(|value| value.as_object_mut())
+            if let Some(fields) =
+                issue_value.pointer_mut("/fields").and_then(|value| value.as_object_mut())
             {
                 fields.insert(
                     "description".to_string(),
@@ -385,6 +420,24 @@ mod tests {
         assert_eq!(text.trim_end(), "item1\nitem2\ncode");
     }
 
+    #[test]
+    fn extract_adf_text_preserves_table_row_whitespace_rules() {
+        let value = json!({
+            "type": "doc",
+            "version": 1,
+            "content": [{
+                "type": "tableRow",
+                "content": [
+                    {"type": "text", "text": "foo"},
+                    {"type": "text", "text": "bar"},
+                    {"type": "text", "text": " baz"}
+                ]
+            }]
+        });
+        let text = extract_adf_text(&value);
+        assert_eq!(text.trim_end(), "foo bar baz");
+    }
+
     #[test]
     fn flatten_adf_fields_converts_comment_bodies() {
         let mut issue_value = json!({
@@ -428,10 +481,8 @@ mod tests {
             }
         });
         flatten_adf_fields(&mut issue_value);
-        let desc = issue_value
-            .pointer("/fields/description")
-            .and_then(|v| v.as_str())
-            .unwrap_or("");
+        let desc =
+            issue_value.pointer("/fields/description").and_then(|v| v.as_str()).unwrap_or("");
         assert_eq!(desc, "desc");
     }
 
@@ -443,10 +494,8 @@ mod tests {
             }
         });
         flatten_adf_fields(&mut issue_value);
-        let desc = issue_value
-            .pointer("/fields/description")
-            .and_then(|v| v.as_str())
-            .unwrap_or("");
+        let desc =
+            issue_value.pointer("/fields/description").and_then(|v| v.as_str()).unwrap_or("");
         assert_eq!(desc, "plain description");
     }