test: add property tests for parsing/CRDT surfaces; fix two parser panics

Tier 1 fuzzing: proptest properties across extract, wikilink, crdt,
frontmatter, recurrence, hlc, datespec, and quickadd — they run with the
normal cargo test suite. Fuzzing surfaced and this fixes:

- parse_offset overflowing chrono date arithmetic on huge offsets (panic)
- parse_month_day slicing a multibyte token on a non-char boundary (panic)

Also hardens crdt::merge_body against malformed sync deltas with catch_unwind
(partial: yrs 0.27 can still SIGABRT/UB on some inputs — tracked separately).
Fixes the extract nested-checkbox alignment so context_item_lines stays 1:1.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-06-09 12:56:31 -07:00
commit c0e633f7a6
14 changed files with 379 additions and 33 deletions

1
Cargo.lock generated
View file

@ -2324,6 +2324,7 @@ dependencies = [
"heph-core",
"jsonwebtoken",
"keyring-core",
"proptest",
"rand 0.8.6",
"reqwest",
"rsa",

View file

@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 279ac0171bbd7d502fc4aa546b3b3fbfc8eb01314a5df24c086f0297460ed01c # shrinks to seed = "", delta = [1, 1, 0, 0, 64, 128, 128, 128, 128, 128, 128, 128, 16, 0]

View file

@ -109,16 +109,39 @@ pub(crate) struct BodyMerge {
/// Merge a peer's `delta` update into the CRDT seeded from `prev_state`. The
/// merging doc never authors, so its `client_id` is irrelevant. Commutative and
/// idempotent — applying the same delta twice is a no-op.
///
/// `delta` arrives from sync peers, so it is untrusted. A delta that fails to
/// decode is ignored (a no-op merge). yrs 0.27 is **not** robust to malformed
/// update bytes: some inputs trip a `debug_assert!` in its block decoder
/// (unwinding panic), and at least one class triggers genuine undefined
/// behaviour (an invalid `char`), which surfaces as a non-unwinding `SIGABRT`
/// under debug UB-checks and as silent UB in release. The `catch_unwind` below
/// contains the unwinding subset so a corrupt payload degrades to a no-op
/// merge rather than crashing a debug daemon; it cannot stop the abort/UB
/// class. The blast radius is limited — `/sync/push` is authenticated — but a
/// buggy or hostile *authenticated* peer can still feed bad bytes here. The
/// real fix is upstream (or a pre-apply validator yrs doesn't yet expose);
/// tracked in the Hephaestus project and exercised by the `crdt_merge` fuzz
/// target. See [[fuzz-testing]].
pub(crate) fn merge_body(prev_state: Option<&[u8]>, delta: &[u8]) -> BodyMerge {
let doc = load(0, prev_state);
if let Ok(update) = Update::decode_v1(delta) {
let mut txn = doc.transact_mut();
let _ = txn.apply_update(update);
}
BodyMerge {
state: encode_state(&doc),
body: materialize(&doc),
}
let merged = std::panic::catch_unwind(|| {
let doc = load(0, prev_state);
if let Ok(update) = Update::decode_v1(delta) {
let mut txn = doc.transact_mut();
let _ = txn.apply_update(update);
}
BodyMerge {
state: encode_state(&doc),
body: materialize(&doc),
}
});
merged.unwrap_or_else(|_| {
let doc = load(0, prev_state);
BodyMerge {
state: encode_state(&doc),
body: materialize(&doc),
}
})
}
/// Materialize a stored CRDT state blob to its body text.
@ -207,4 +230,62 @@ mod tests {
assert_eq!(edit.body, "café au lait");
assert_eq!(body_of(&edit.state), "café au lait");
}
#[test]
fn corrupt_delta_is_a_noop_merge() {
// Minimal panicking payload found by proptest: yrs 0.27 hits a debug
// assertion in its block decoder on this delta instead of returning
// Err. It must degrade to a no-op merge, not crash the daemon.
let bad: &[u8] = &[1, 1, 0, 0, 64, 128, 128, 128, 128, 128, 128, 128, 16, 0];
let base = write_body(A, None, "hello");
let m = merge_body(Some(&base.state), bad);
assert_eq!(m.body, "hello");
assert_eq!(body_of(&m.state), "hello");
}
use proptest::prelude::*;
proptest! {
/// A whole-buffer write always materializes exactly the new body — the
/// diff's UTF-8 boundary alignment never mangles multibyte text
/// (`\PC` generates arbitrary non-control chars, incl. multibyte).
#[test]
fn write_materializes_exactly(prev in "\\PC{0,80}", new in "\\PC{0,80}") {
let base = write_body(A, None, &prev);
let w = write_body(A, Some(&base.state), &new);
prop_assert_eq!(&w.body, &new);
prop_assert_eq!(body_of(&w.state), new);
}
/// Concurrent edits converge to the same body regardless of which side
/// merges the other's delta.
#[test]
fn concurrent_edits_converge(
base in "\\PC{0,40}", ea in "\\PC{0,40}", eb in "\\PC{0,40}",
) {
let b = write_body(A, None, &base);
let on_b = merge_body(None, &b.delta);
let wa = write_body(A, Some(&b.state), &ea);
let wb = write_body(B, Some(&on_b.state), &eb);
let fa = merge_body(Some(&wa.state), &wb.delta);
let fb = merge_body(Some(&wb.state), &wa.delta);
prop_assert_eq!(fa.body, fb.body, "replicas did not converge");
}
/// Applying the same delta twice is a no-op for arbitrary edits.
#[test]
fn merge_idempotent_for_arbitrary_edits(base in "\\PC{0,40}", new in "\\PC{0,40}") {
let b = write_body(A, None, &base);
let w = write_body(A, Some(&b.state), &new);
let once = merge_body(Some(&b.state), &w.delta);
let twice = merge_body(Some(&once.state), &w.delta);
prop_assert_eq!(once.body, twice.body);
}
// NB: robustness to *arbitrary* (non-yrs) delta bytes is deliberately
// NOT asserted here. yrs 0.27 can `SIGABRT`/UB on malformed updates
// (see `merge_body`'s docs and `corrupt_delta_is_a_noop_merge`), which
// is uncatchable and would abort the whole test binary. That surface
// is fuzzed in the non-blocking Tier 2 `crdt_merge` target instead.
}
}

View file

@ -47,50 +47,58 @@ pub fn extract(body: &str) -> Extraction {
let mut code_ranges: Vec<Range<usize>> = Vec::new();
// Depth of nested code blocks; their inner text ranges are code.
let mut code_depth: u32 = 0;
// The task item currently being collected, if any: (checked, accumulated text).
let mut current: Option<(bool, String)> = None;
// One frame per open list item: `Some(index into context_items)` once the
// item turns out to carry a task marker. A stack (not a single slot) so a
// checklist nested under a checklist item keeps both items — pushed in
// marker order, which is what keeps `context_item_lines` aligned 1:1.
let mut open_items: Vec<Option<usize>> = Vec::new();
// Append `s` to the innermost open task item's label, if any.
fn append(items: &mut [ContextItem], open: &[Option<usize>], s: &str) {
if let Some(idx) = open.iter().rev().find_map(|f| *f) {
items[idx].text.push_str(s);
}
}
for (event, range) in Parser::new_ext(body, options).into_offset_iter() {
match event {
Event::Start(Tag::CodeBlock(_)) => code_depth += 1,
Event::End(TagEnd::CodeBlock) => code_depth = code_depth.saturating_sub(1),
Event::Start(Tag::Item) => open_items.push(None),
Event::TaskListMarker(checked) => {
current = Some((checked, String::new()));
context_items.push(ContextItem {
checked,
text: String::new(),
});
if let Some(frame) = open_items.last_mut() {
*frame = Some(context_items.len() - 1);
}
}
Event::End(TagEnd::Item) => {
if let Some((checked, text)) = current.take() {
context_items.push(ContextItem {
checked,
text: text.trim().to_string(),
});
}
open_items.pop();
}
Event::Text(text) => {
if code_depth > 0 {
code_ranges.push(range);
}
if let Some((_, label)) = current.as_mut() {
label.push_str(&text);
}
append(&mut context_items, &open_items, &text);
}
// Inline code is part of an item's visible label, but its contents
// are never a wiki-link source.
Event::Code(code) => {
code_ranges.push(range);
if let Some((_, label)) = current.as_mut() {
label.push_str(&code);
}
append(&mut context_items, &open_items, &code);
}
Event::SoftBreak | Event::HardBreak => {
if let Some((_, label)) = current.as_mut() {
label.push(' ');
}
append(&mut context_items, &open_items, " ");
}
_ => {}
}
}
for item in &mut context_items {
item.text = item.text.trim().to_string();
}
// Scan the raw body for wiki-links (CommonMark mangles `[[ ]]` brackets, so
// we can't rely on Text events), excluding any that start inside code.
@ -243,6 +251,28 @@ mod tests {
assert_eq!(lines, vec![2, 8]); // 0-based lines of "- [ ] first" / "- [x] second"
}
#[test]
fn nested_checkbox_items_are_both_extracted_in_order() {
// A checklist nested under a checklist item: both are real items, in
// document order, and `context_item_lines` must stay aligned 1:1.
let body = "- [ ] outer\n - [x] inner\n";
let e = extract(body);
assert_eq!(
e.context_items,
vec![
ContextItem {
text: "outer".to_string(),
checked: false
},
ContextItem {
text: "inner".to_string(),
checked: true
},
]
);
assert_eq!(context_item_lines(body), vec![0, 1]);
}
#[test]
fn extraction_is_idempotent() {
let body = "# Mixed\n\n- [ ] do [[X]]\n- [x] done\n\nsee [[Y]]\n";
@ -253,4 +283,55 @@ mod tests {
fn body_without_links_or_items_yields_empty() {
assert_eq!(extract("just prose, no structure"), Extraction::default());
}
use proptest::prelude::*;
/// Bodies stitched from markdown-ish fragments — checklists (incl. nested),
/// code fences, links (well-formed, empty, unterminated), and arbitrary
/// text — to stress structure the unit tests don't enumerate.
fn markdownish() -> impl Strategy<Value = String> {
let frag = prop_oneof![
Just("- [ ] feed birds\n".to_string()),
Just("- [x] done [[Roof]]\n".to_string()),
Just(" - [X] nested\n".to_string()),
Just("* [ ] star\n".to_string()),
Just("+ [x] plus\n".to_string()),
Just("- plain item\n".to_string()),
Just("```\n".to_string()),
Just("# Heading\n".to_string()),
Just("> quote\n".to_string()),
Just("[[Roof]] ".to_string()),
Just("[[Roof|the roof]] ".to_string()),
Just("[[ ]] ".to_string()),
Just("[[unterminated ".to_string()),
Just("]] stray ".to_string()),
Just("`- [ ] code`\n".to_string()),
Just("\n".to_string()),
"\\PC{0,12}",
];
proptest::collection::vec(frag, 0..12).prop_map(|v| v.concat())
}
proptest! {
/// Derivation is total (no panic) and idempotent for arbitrary input.
#[test]
fn extract_is_total_and_idempotent(body in "\\PC{0,300}") {
prop_assert_eq!(extract(&body), extract(&body));
}
/// Links are non-empty, trimmed, deduped; and `context_item_lines`
/// aligns 1:1 with `context_items` — the invariant promotion's
/// line-rewriting depends on (see [`context_item_lines`]).
#[test]
fn invariants_hold_for_markdownish_bodies(body in markdownish()) {
let e = extract(&body);
let mut seen = HashSet::new();
for l in &e.wiki_links {
prop_assert!(!l.is_empty());
prop_assert_eq!(l.trim(), l.as_str());
prop_assert!(seen.insert(l.clone()), "duplicate link {:?}", l);
}
prop_assert_eq!(context_item_lines(&body).len(), e.context_items.len());
}
}
}

View file

@ -89,4 +89,35 @@ mod tests {
let body = "---\nid: x\n---\nbody\n\n---\n\nmore\n";
assert_eq!(strip(body), "body\n\n---\n\nmore\n");
}
use proptest::prelude::*;
/// Frontmatter-shaped fragments: fences, key lines, prose, and noise.
fn frontmatterish() -> impl Strategy<Value = String> {
let frag = prop_oneof![
Just("---\n".to_string()),
Just("---".to_string()),
Just("id: x\n".to_string()),
Just("title: Roof\n".to_string()),
Just("not a key line\n".to_string()),
Just("\n".to_string()),
Just("# Heading\n".to_string()),
"\\PC{0,12}",
];
proptest::collection::vec(frag, 0..8).prop_map(|v| v.concat())
}
proptest! {
/// `strip` is total and only ever removes a prefix: the result is
/// always a suffix of the input, and a body with no opening fence is
/// returned untouched.
#[test]
fn strip_returns_a_suffix(body in frontmatterish()) {
let out = strip(&body);
prop_assert!(body.ends_with(out));
if !body.starts_with("---\n") {
prop_assert_eq!(out, body.as_str());
}
}
}
}

View file

@ -218,5 +218,12 @@ mod tests {
let b = Hlc { physical: p2, counter: c2, origin: o2 };
prop_assert_eq!(a.cmp(&b), a.encode().cmp(&b.encode()));
}
/// Sync cursors arrive over the wire — parsing arbitrary strings must
/// return an error, never panic.
#[test]
fn parse_never_panics(s in "\\PC{0,60}") {
let _ = Hlc::parse(&s);
}
}
}

View file

@ -181,5 +181,31 @@ mod tests {
let once = reset_checkboxes(&body);
prop_assert_eq!(reset_checkboxes(&once), once);
}
/// For an infinite rule, the next occurrence exists and is strictly
/// after `after` — roll-forward can never schedule into the past.
#[test]
fn next_is_strictly_after(
freq in proptest::sample::select(vec!["DAILY", "WEEKLY", "MONTHLY", "YEARLY"]),
interval in 1u32..5,
gap_days in 0i64..400,
) {
let rrule = format!("FREQ={freq};INTERVAL={interval}");
let after = JAN1 + gap_days * ONE_DAY;
let next = next_occurrence(&rrule, JAN1, after).unwrap();
let t = next.expect("infinite rule always has a next instance");
prop_assert!(t > after);
}
/// RRULEs are stored strings that may come from old data or other
/// writers — arbitrary input must error, never panic.
#[test]
fn arbitrary_rrule_never_panics(
s in "\\PC{0,60}",
anchor in proptest::num::i64::ANY,
after in proptest::num::i64::ANY,
) {
let _ = next_occurrence(&s, anchor, after);
}
}
}

View file

@ -148,4 +148,41 @@ mod tests {
assert_eq!(expand("dangling [[01ID", &t), "dangling [[01ID");
assert_eq!(expand("", &t), "");
}
use proptest::prelude::*;
/// Bodies stitched from canonical link forms (bare/labelled/legacy), broken
/// fences, and bracket-free filler. Targets are unpadded — at-rest links
/// are canonical — so the collapse∘expand law below holds exactly.
fn linky() -> impl Strategy<Value = String> {
let frag = prop_oneof![
Just("[[01ID]]".to_string()),
Just("[[02ID]]".to_string()),
Just("[[01ID|Roof]]".to_string()),
Just("[[02ID|Garden]]".to_string()),
Just("[[01ID|custom label]]".to_string()),
Just("[[unknown]]".to_string()),
Just("[[Some Title|text]]".to_string()),
Just("[[".to_string()),
Just("]]".to_string()),
Just(" plain text ".to_string()),
"[^\\[\\]]{0,10}",
];
proptest::collection::vec(frag, 0..10).prop_map(|v| v.concat())
}
proptest! {
/// Both projections are idempotent, and the read→write round-trip law
/// holds: what a client echoes back after an expand collapses to the
/// same at-rest body a direct collapse would produce.
#[test]
fn expand_collapse_idempotent_and_round_trip(body in linky()) {
let t = titles();
let e = expand(&body, &t);
prop_assert_eq!(expand(&e, &t), e.clone(), "expand not idempotent");
let c = collapse(&body, &t);
prop_assert_eq!(collapse(&c, &t), c.clone(), "collapse not idempotent");
prop_assert_eq!(collapse(&e, &t), c, "collapse(expand(x)) != collapse(x)");
}
}
}

View file

@ -44,6 +44,7 @@ dbus-secret-service-keyring-store.workspace = true
[dev-dependencies]
tempfile = "3"
proptest = "1"
# Auth tests generate a throwaway RSA key + JWKS at runtime (no key in the repo).
rsa = "0.9"
rand = "0.8"

View file

@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc 9e3bbefcd78c4c389f8d6088d0a5462bf5516aed780ff5dd5952f636c3ae7ba2 # shrinks to s = "A0𐻂 a"

View file

@ -98,12 +98,20 @@ fn parse_offset(rest: &str, today: NaiveDate) -> Result<NaiveDate> {
let n: u64 = num
.parse()
.with_context(|| format!("not a relative date offset: +{rest}"))?;
match unit.trim() {
"" | "d" | "day" | "days" => Ok(today + Days::new(n)),
"w" | "wk" | "week" | "weeks" => Ok(today + Days::new(n * 7)),
"m" | "mo" | "month" | "months" => Ok(today + Months::new(n as u32)),
// Checked throughout: a large `n` would otherwise overflow chrono's date
// arithmetic and panic (the `+` operators do), so an out-of-range offset
// must surface as a clean error instead of crashing the parse.
let out = match unit.trim() {
"" | "d" | "day" | "days" => today.checked_add_days(Days::new(n)),
"w" | "wk" | "week" | "weeks" => n
.checked_mul(7)
.and_then(|days| today.checked_add_days(Days::new(days))),
"m" | "mo" | "month" | "months" => u32::try_from(n)
.ok()
.and_then(|m| today.checked_add_months(Months::new(m))),
other => bail!("unknown offset unit {other:?} (use d, w, or m)"),
}
};
out.with_context(|| format!("date offset +{rest} is out of range"))
}
/// Map a weekday name (full or common abbreviation) to a `Weekday`. Matches
@ -258,7 +266,10 @@ fn parse_month_day(s: &str) -> Option<(u32, u32)> {
return None;
}
let month = |t: &str| -> Option<u32> {
match &t[..t.len().min(3)] {
// First three *chars* (not bytes): a multibyte token like "𐻂" would
// make a byte slice land mid-codepoint and panic.
let key: String = t.chars().take(3).collect();
match key.as_str() {
"jan" => Some(1),
"feb" => Some(2),
"mar" => Some(3),
@ -637,4 +648,37 @@ mod tests {
assert_eq!(humanize_rrule(raw), raw, "should pass {raw} through");
}
}
#[test]
fn huge_day_offset_does_not_panic() {
// `+<huge>d` parses as a valid u64 then overflows the date — must be a
// clean Err, not a chrono arithmetic panic.
assert!(parse_date("+999999999999999999d", today()).is_err());
assert!(parse_date("+999999999w", today()).is_err());
assert!(parse_date("+999999999m", today()).is_err());
}
use proptest::prelude::*;
proptest! {
/// Date parsing is total for any input — surfaces feed it raw user text.
#[test]
fn parse_date_never_panics(s in "\\PC{0,40}") {
let _ = parse_date(&s, today());
}
/// Offset/ISO forms that parse round-trip through `fmt_iso`-style ISO.
#[test]
fn offset_dates_round_trip_through_iso(n in 0u32..3650) {
let date = parse_date(&format!("+{n}d"), today()).unwrap();
let iso = date.format("%Y-%m-%d").to_string();
prop_assert_eq!(parse_date(&iso, today()).unwrap(), date);
}
/// Recurrence parsing is total for any input.
#[test]
fn parse_recurrence_never_panics(s in "\\PC{0,40}") {
let _ = parse_recurrence(&s);
}
}
}

View file

@ -240,4 +240,25 @@ mod tests {
assert_eq!(r.title, "Review every report");
assert_eq!(r.recurrence, None);
}
use proptest::prelude::*;
proptest! {
/// Quick-add is total — it's the daemon's parse of raw capture text.
#[test]
fn parse_never_panics(s in "\\PC{0,60}") {
let _ = parse(&s, today(), &projects());
}
/// Every word in the title came from the input: the parser only ever
/// drops recognized tokens, never invents text.
#[test]
fn title_words_are_a_subset_of_input_words(s in "[\\PC ]{0,60}") {
let r = parse(&s, today(), &projects());
let input: std::collections::HashSet<&str> = s.split_whitespace().collect();
for w in r.title.split_whitespace() {
prop_assert!(input.contains(w), "title word {w:?} not in input {s:?}");
}
}
}
}

View file

@ -0,0 +1 @@
Fixed two parser panics found by fuzzing: a relative date offset like `+999999999999d` overflowed chrono's date arithmetic, and an `every <Month> <day>` recurrence phrase containing a multibyte character (e.g. `𐻂`) sliced a string on a non-char boundary. Both now return a clean error or fall through instead of crashing the daemon's parse.

View file

@ -0,0 +1 @@
Added property-based tests (proptest) across the parsing and CRDT surfaces (extraction, wiki-link projection, body CRDT, frontmatter, recurrence, HLC, datespec, quick-add), runnable as part of `cargo test`. See [[fuzz-testing]].