generated from eblume/project-template
Some checks failed
Build / validate (pull_request) Failing after 3s
Backend (TDD):
- task.promote {container_id, item_ref, attention?, project?}: mint a committed
task from the item_ref-th `- [ ]` context item (1-based, document order via a
new extract::context_item_lines) and rewrite that source line into a [[link]]
to it. Unit + rpc_socket tests.
- resolve_id now excludes canonical-context docs, so [[Task Title]] resolves to
the task, not its identically-titled context doc (deterministic; a general fix
surfaced by promotion's ULID-tiebreak ambiguity).
Plugin: :Heph promote / promote_under_cursor (save-if-dirty → compute item index
with a code-fence-aware scanner mirroring extract.rs → task.promote → reload the
rewritten buffer). e2e spec (f): promote a context line, assert the new task in
next, the source line became a link, and the container backlinks the task.
CI via Dagger: a test_nvim function bakes a pinned, arch-detected Neovim
(v0.11.2 — Debian's is too old for vim.uv) onto rust:1-bookworm, builds hephd,
and runs the self-contained shim suite (cargo + target cache volumes);
build.yaml calls `dagger call test-nvim`. run.lua now fails on zero specs (no
false-green). Validated end-to-end: passing suite → exit 0, failing spec →
Dagger exit 1.
117 Rust tests + 7 nvim e2e specs green.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
256 lines
9.2 KiB
Rust
256 lines
9.2 KiB
Rust
//! Markdown derivation (tech-spec §5).
|
|
//!
|
|
//! From a node's body we derive two things, purely and deterministically:
|
|
//!
|
|
//! - **`[[wiki-links]]`** → `wiki` link targets (resolved to nodes later, via
|
|
//! `aliases`/title; unresolved targets are allowed and recorded).
|
|
//! - **GFM task-list items** (`- [ ]` / `- [x]`) → the **local context-item
|
|
//! index** (Fork A, [[design]] §6.3). The `[ ]`/`[x]` marker *is* the item's
|
|
//! only state; this index is derived per replica, never synced.
|
|
//!
|
|
//! Derivation is **idempotent**: the same body always yields the same
|
|
//! [`Extraction`]. Code blocks are skipped (a `- [ ]` inside a fenced block is
|
|
//! not a task; a `[[link]]` inside one is not a link), which is why this goes
|
|
//! through a real CommonMark parser rather than a line scan.
|
|
|
|
use std::collections::HashSet;
|
|
use std::ops::Range;
|
|
|
|
use pulldown_cmark::{Event, Options, Parser, Tag, TagEnd};
|
|
|
|
/// A context-item line derived from a body (Fork A).
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
pub struct ContextItem {
|
|
/// The visible label text (markers and surrounding whitespace stripped).
|
|
pub text: String,
|
|
/// `true` for `- [x]` (not-outstanding), `false` for `- [ ]` (outstanding).
|
|
pub checked: bool,
|
|
}
|
|
|
|
/// Everything derived from a single body.
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq)]
|
|
pub struct Extraction {
|
|
/// Wiki-link targets, in first-seen document order, de-duplicated.
|
|
pub wiki_links: Vec<String>,
|
|
/// Context items, in document order.
|
|
pub context_items: Vec<ContextItem>,
|
|
}
|
|
|
|
/// Derive [`Extraction`] from a markdown body.
|
|
pub fn extract(body: &str) -> Extraction {
|
|
let mut options = Options::empty();
|
|
options.insert(Options::ENABLE_TASKLISTS);
|
|
|
|
let mut context_items: Vec<ContextItem> = Vec::new();
|
|
// Byte ranges covered by code (fenced/indented blocks and inline spans).
|
|
// Wiki-links found inside these are not links.
|
|
let mut code_ranges: Vec<Range<usize>> = Vec::new();
|
|
// Depth of nested code blocks; their inner text ranges are code.
|
|
let mut code_depth: u32 = 0;
|
|
// The task item currently being collected, if any: (checked, accumulated text).
|
|
let mut current: Option<(bool, String)> = None;
|
|
|
|
for (event, range) in Parser::new_ext(body, options).into_offset_iter() {
|
|
match event {
|
|
Event::Start(Tag::CodeBlock(_)) => code_depth += 1,
|
|
Event::End(TagEnd::CodeBlock) => code_depth = code_depth.saturating_sub(1),
|
|
|
|
Event::TaskListMarker(checked) => {
|
|
current = Some((checked, String::new()));
|
|
}
|
|
Event::End(TagEnd::Item) => {
|
|
if let Some((checked, text)) = current.take() {
|
|
context_items.push(ContextItem {
|
|
checked,
|
|
text: text.trim().to_string(),
|
|
});
|
|
}
|
|
}
|
|
|
|
Event::Text(text) => {
|
|
if code_depth > 0 {
|
|
code_ranges.push(range);
|
|
}
|
|
if let Some((_, label)) = current.as_mut() {
|
|
label.push_str(&text);
|
|
}
|
|
}
|
|
// Inline code is part of an item's visible label, but its contents
|
|
// are never a wiki-link source.
|
|
Event::Code(code) => {
|
|
code_ranges.push(range);
|
|
if let Some((_, label)) = current.as_mut() {
|
|
label.push_str(&code);
|
|
}
|
|
}
|
|
Event::SoftBreak | Event::HardBreak => {
|
|
if let Some((_, label)) = current.as_mut() {
|
|
label.push(' ');
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
// Scan the raw body for wiki-links (CommonMark mangles `[[ ]]` brackets, so
|
|
// we can't rely on Text events), excluding any that start inside code.
|
|
let wiki_links = scan_wiki_links(body, &code_ranges);
|
|
|
|
Extraction {
|
|
wiki_links,
|
|
context_items,
|
|
}
|
|
}
|
|
|
|
/// The 0-based body line index of each context item, in the **same document
|
|
/// order** as [`extract`]'s `context_items` (task markers never fire inside code
|
|
/// blocks, so the two lists align 1:1). Promotion uses this to locate the source
|
|
/// `- [ ]` line it must rewrite into a link (tech-spec §4.3, §6).
|
|
pub fn context_item_lines(body: &str) -> Vec<usize> {
|
|
let mut options = Options::empty();
|
|
options.insert(Options::ENABLE_TASKLISTS);
|
|
let mut lines = Vec::new();
|
|
for (event, range) in Parser::new_ext(body, options).into_offset_iter() {
|
|
if let Event::TaskListMarker(_) = event {
|
|
lines.push(body[..range.start].bytes().filter(|&b| b == b'\n').count());
|
|
}
|
|
}
|
|
lines
|
|
}
|
|
|
|
/// Find `[[target]]` (or `[[target|display]]`) spans in `body`, returning each
|
|
/// unique, non-empty target in first-seen order. Matches starting inside a
|
|
/// `code` range are skipped. The `[` / `]` delimiters are ASCII, so byte
|
|
/// indexing stays on char boundaries.
|
|
fn scan_wiki_links(body: &str, code_ranges: &[Range<usize>]) -> Vec<String> {
|
|
let mut out: Vec<String> = Vec::new();
|
|
let mut seen: HashSet<String> = HashSet::new();
|
|
let bytes = body.as_bytes();
|
|
let mut i = 0;
|
|
while i + 1 < bytes.len() {
|
|
if bytes[i] == b'[' && bytes[i + 1] == b'[' {
|
|
let rest = &body[i + 2..];
|
|
match rest.find("]]") {
|
|
Some(close) => {
|
|
let in_code = code_ranges.iter().any(|r| r.contains(&i));
|
|
if !in_code {
|
|
let inner = &rest[..close];
|
|
// `[[target|display]]` — the target is the left side.
|
|
let target = inner.split('|').next().unwrap_or("").trim();
|
|
if !target.is_empty() && seen.insert(target.to_string()) {
|
|
out.push(target.to_string());
|
|
}
|
|
}
|
|
i += 2 + close + 2;
|
|
continue;
|
|
}
|
|
// Unterminated `[[` — nothing more to find.
|
|
None => break,
|
|
}
|
|
}
|
|
i += 1;
|
|
}
|
|
out
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn links(body: &str) -> Vec<String> {
|
|
extract(body).wiki_links
|
|
}
|
|
|
|
fn items(body: &str) -> Vec<ContextItem> {
|
|
extract(body).context_items
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_simple_wiki_links_in_order() {
|
|
assert_eq!(
|
|
links("See [[Roof]] then [[Contractor calls]]."),
|
|
vec!["Roof".to_string(), "Contractor calls".to_string()]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn wiki_link_target_is_left_of_pipe() {
|
|
assert_eq!(links("[[borgmatic|Borgmatic backups]]"), vec!["borgmatic"]);
|
|
}
|
|
|
|
#[test]
|
|
fn wiki_links_are_deduplicated_first_seen_order() {
|
|
assert_eq!(
|
|
links("[[A]] [[B]] [[A]] [[a]]"),
|
|
vec!["A".to_string(), "B".to_string(), "a".to_string()]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn empty_and_unterminated_wiki_links_are_ignored() {
|
|
assert!(links("[[]] and [[ ]] and [[oops").is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn wiki_links_inside_code_are_not_extracted() {
|
|
let body = "real [[Keep]]\n\n```\nnot [[Skip]] here\n```\n";
|
|
assert_eq!(links(body), vec!["Keep"]);
|
|
}
|
|
|
|
#[test]
|
|
fn extracts_checkbox_items_with_state() {
|
|
let body = "- [ ] feed birds\n- [x] brush teeth\n";
|
|
assert_eq!(
|
|
items(body),
|
|
vec![
|
|
ContextItem {
|
|
text: "feed birds".to_string(),
|
|
checked: false
|
|
},
|
|
ContextItem {
|
|
text: "brush teeth".to_string(),
|
|
checked: true
|
|
},
|
|
]
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn checkbox_inside_code_block_is_not_an_item() {
|
|
let body = "- [ ] real item\n\n```\n- [ ] not an item\n```\n";
|
|
assert_eq!(items(body).len(), 1);
|
|
assert_eq!(items(body)[0].text, "real item");
|
|
}
|
|
|
|
#[test]
|
|
fn checkbox_item_can_carry_a_wiki_link() {
|
|
// A checkbox line is both a context item and a wiki-link source. The
|
|
// item label keeps the raw markdown (`[[...]]` intact) so promotion can
|
|
// locate and rewrite the source line later (Fork A, §6).
|
|
let e = extract("- [ ] call [[Contractor]] back");
|
|
assert_eq!(e.wiki_links, vec!["Contractor"]);
|
|
assert_eq!(e.context_items.len(), 1);
|
|
assert_eq!(e.context_items[0].text, "call [[Contractor]] back");
|
|
assert!(!e.context_items[0].checked);
|
|
}
|
|
|
|
#[test]
|
|
fn context_item_lines_align_with_items_skipping_code() {
|
|
let body = "# Notes\n\n- [ ] first\n\n```\n- [ ] fenced\n```\n\n- [x] second\n";
|
|
let lines = context_item_lines(body);
|
|
// Two real items (the fenced one is skipped, matching `context_items`).
|
|
assert_eq!(lines.len(), extract(body).context_items.len());
|
|
assert_eq!(lines, vec![2, 8]); // 0-based lines of "- [ ] first" / "- [x] second"
|
|
}
|
|
|
|
#[test]
|
|
fn extraction_is_idempotent() {
|
|
let body = "# Mixed\n\n- [ ] do [[X]]\n- [x] done\n\nsee [[Y]]\n";
|
|
assert_eq!(extract(body), extract(body));
|
|
}
|
|
|
|
#[test]
|
|
fn body_without_links_or_items_yields_empty() {
|
|
assert_eq!(extract("just prose, no structure"), Extraction::default());
|
|
}
|
|
}
|