test(fuzz): add cargo-fuzz targets for CRDT and extraction surfaces
All checks were successful
Build / validate (pull_request) Successful in 10m29s

Tier 2 fuzzing: a nightly cargo-fuzz crate at crates/heph-core/fuzz/ with
three targets (crdt_merge, crdt_write, extract), reaching crate-private CRDT
internals through heph-core's new 'fuzzing' feature. Driven ad-hoc via
'mise run fuzz'; not in CI (needs nightly + wall clock).

crdt_merge immediately surfaced robustness gaps in yrs 0.27 on malformed sync
deltas (a 4-byte input OOMs; other inputs abort/UB) — uncatchable, limited
blast radius (authenticated /sync/push), documented as a known limitation.
extract and crdt_write ran clean over ~1M cases.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Erich Blume 2026-06-09 13:03:10 -07:00
commit e7ced4f8f9
12 changed files with 220 additions and 5 deletions

View file

@ -8,6 +8,12 @@ publish.workspace = true
authors.workspace = true
rust-version.workspace = true
[features]
# Exposes thin public wrappers over crate-private internals (the body CRDT) for
# the cargo-fuzz targets in `fuzz/`. Never enabled in normal builds — the
# wrappers are test scaffolding, not part of the public API.
fuzzing = []
[dependencies]
rusqlite.workspace = true
ulid.workspace = true

5
crates/heph-core/fuzz/.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
target/
corpus/
artifacts/
coverage/
Cargo.lock

View file

@ -0,0 +1,41 @@
# cargo-fuzz harness for heph-core's parsing/CRDT surfaces. Its own workspace
# (the empty `[workspace]` table) so it never pulls into the main build; it is
# nightly-only and run ad-hoc via `mise run fuzz`. See docs/how-to/fuzz-testing.md.
[package]
name = "heph-core-fuzz"
version = "0.0.0"
publish = false
edition = "2021"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.heph-core]
path = ".."
features = ["fuzzing"]
[[bin]]
name = "crdt_merge"
path = "fuzz_targets/crdt_merge.rs"
test = false
doc = false
bench = false
[[bin]]
name = "crdt_write"
path = "fuzz_targets/crdt_write.rs"
test = false
doc = false
bench = false
[[bin]]
name = "extract"
path = "fuzz_targets/extract.rs"
test = false
doc = false
bench = false
[workspace]

View file

@ -0,0 +1,15 @@
#![no_main]
//! Fuzz `merge_body` with arbitrary `delta` bytes — the untrusted sync-ingest
//! surface. A peer's update payload is decoded and applied here; a crash is a
//! remote-input daemon crash. yrs 0.27 is known to `SIGABRT`/UB on some
//! malformed inputs (see `crdt::merge_body`'s docs) — surfacing and shrinking
//! such an input is exactly this target's job.
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
let (state, body) = heph_core::crdt_fuzz::merge_body(None, data);
// Idempotence: applying the same delta again must not change the body.
let (_, body_again) = heph_core::crdt_fuzz::merge_body(Some(&state), data);
assert_eq!(body, body_again, "merge of the same delta was not idempotent");
});

View file

@ -0,0 +1,21 @@
#![no_main]
//! Fuzz `write_body`: diff two arbitrary bodies into the text CRDT and check
//! the round-trip — the materialized body and the re-materialized stored state
//! must both equal the new body exactly. Stresses the UTF-8 boundary alignment
//! in the prefix/suffix diff with arbitrary (incl. multibyte) strings.
use libfuzzer_sys::fuzz_target;
const CLIENT: u64 = 0xAAAA;
fuzz_target!(|data: (String, String)| {
let (prev, new) = data;
let (base, _, _) = heph_core::crdt_fuzz::write_body(CLIENT, None, &prev);
let (state, _delta, body) = heph_core::crdt_fuzz::write_body(CLIENT, Some(&base), &new);
assert_eq!(body, new, "write did not materialize the new body");
assert_eq!(
heph_core::crdt_fuzz::body_of(&state),
new,
"stored state did not re-materialize to the new body"
);
});

View file

@ -0,0 +1,27 @@
#![no_main]
//! Fuzz `extract` over arbitrary markdown. Asserts the invariants promotion and
//! the context-item index depend on: wiki-links are non-empty and de-duplicated,
//! and `context_item_lines` stays 1:1 with `context_items`.
use libfuzzer_sys::fuzz_target;
use std::collections::HashSet;
fuzz_target!(|data: &[u8]| {
let Ok(s) = std::str::from_utf8(data) else {
return;
};
let e = heph_core::extract(s);
let mut seen = HashSet::new();
for link in &e.wiki_links {
assert!(!link.is_empty(), "empty wiki-link target");
assert_eq!(link.trim(), link.as_str(), "untrimmed wiki-link target");
assert!(seen.insert(link.clone()), "duplicate wiki-link {link:?}");
}
assert_eq!(
heph_core::extract::context_item_lines(s).len(),
e.context_items.len(),
"context_item_lines diverged from context_items",
);
});

View file

@ -119,10 +119,12 @@ pub(crate) struct BodyMerge {
/// contains the unwinding subset so a corrupt payload degrades to a no-op
/// merge rather than crashing a debug daemon; it cannot stop the abort/UB
/// class. The blast radius is limited — `/sync/push` is authenticated — but a
/// buggy or hostile *authenticated* peer can still feed bad bytes here. The
/// real fix is upstream (or a pre-apply validator yrs doesn't yet expose);
/// tracked in the Hephaestus project and exercised by the `crdt_merge` fuzz
/// target. See [[fuzz-testing]].
/// buggy or hostile *authenticated* peer can still feed bad bytes here. Beyond
/// the unwinding panics, fuzzing also found a tiny delta (`[255,255,255,126]`)
/// that drives yrs into a huge allocation (OOM) — `catch_unwind` can't help
/// that. The real fix is upstream (or a pre-apply validator yrs doesn't yet
/// expose). Findings and the `crdt_merge` fuzz target are documented in
/// [[fuzz-testing]].
pub(crate) fn merge_body(prev_state: Option<&[u8]>, delta: &[u8]) -> BodyMerge {
let merged = std::panic::catch_unwind(|| {
let doc = load(0, prev_state);
@ -145,11 +147,33 @@ pub(crate) fn merge_body(prev_state: Option<&[u8]>, delta: &[u8]) -> BodyMerge {
}
/// Materialize a stored CRDT state blob to its body text.
#[cfg(test)]
#[cfg(any(test, feature = "fuzzing"))]
pub(crate) fn body_of(state: &[u8]) -> String {
materialize(&load(0, Some(state)))
}
/// Thin public wrappers over the crate-private CRDT for the cargo-fuzz targets
/// (`fuzz/fuzz_targets/`). Compiled only under the `fuzzing` feature, re-exported
/// from the crate root as `crdt_fuzz`. Tuples instead of the private `BodyWrite`
/// / `BodyMerge` structs so the fuzz crate needs no access to those types.
#[cfg(feature = "fuzzing")]
pub mod fuzz {
/// `(state, delta, body)` from diffing `new` into the CRDT seeded by `prev`.
pub fn write_body(client: u64, prev: Option<&[u8]>, new: &str) -> (Vec<u8>, Vec<u8>, String) {
let w = super::write_body(client, prev, new);
(w.state, w.delta, w.body)
}
/// `(state, body)` from merging an untrusted `delta` into `prev`.
pub fn merge_body(prev: Option<&[u8]>, delta: &[u8]) -> (Vec<u8>, String) {
let m = super::merge_body(prev, delta);
(m.state, m.body)
}
/// Materialize a stored state blob to its body text.
pub fn body_of(state: &[u8]) -> String {
super::body_of(state)
}
}
/// Common prefix/suffix diff over byte indices, cut points aligned to UTF-8
/// char boundaries. Returns `(start, delete_len, inserted)` such that replacing
/// `cur[start..start+delete_len]` with `inserted` yields `new`.

View file

@ -16,6 +16,9 @@ pub const VERSION: &str = concat!(env!("CARGO_PKG_VERSION"), " (", env!("HEPH_BU
pub mod clock;
mod crdt;
/// Public CRDT wrappers for the cargo-fuzz targets (`fuzzing` feature only).
#[cfg(feature = "fuzzing")]
pub use crdt::fuzz as crdt_fuzz;
pub mod error;
pub mod export;
pub mod extract;

View file

@ -0,0 +1 @@
Added cargo-fuzz targets for the CRDT and extraction surfaces (`crates/heph-core/fuzz/`, behind heph-core's `fuzzing` feature) plus a `mise run fuzz` task. Nightly-only and ad-hoc, not wired into CI. These targets surfaced robustness gaps in `yrs` 0.27 on malformed sync deltas (OOM, abort/UB) — documented as a known limitation in [[fuzz-testing]].

View file

@ -75,6 +75,34 @@ clock to earn its keep. Run it ad-hoc after touching `crdt.rs`, `extract.rs`,
or the sync payload path. If it ever moves to CI, a scheduled (not per-push)
workflow with a persistent corpus is the right shape.
## Findings so far
The first runs paid for themselves. Tier 1 proptests found two reachable
panics on user input, both fixed in the same change:
- **`datespec::parse_offset`** panicked on a large relative offset (e.g.
`+999999999999d`) because chrono's `+` overflows; now uses checked
arithmetic and returns an out-of-range error.
- **`datespec::parse_month_day`** sliced a token on a non-char boundary for
multibyte input (e.g. an `every <Month> <day>` phrase containing `𐻂`); now
takes the first three *chars*.
Tier 2 (`crdt_merge`) surfaced **robustness gaps in `yrs` 0.27 on malformed
update bytes**, reachable through the authenticated `/sync/push` path:
- a tiny delta `[255, 255, 255, 126]` triggers a huge allocation → **OOM**;
- some inputs trip a `debug_assert!` in the yrs block decoder (unwinding
panic — contained by the `catch_unwind` in `merge_body`);
- at least one class hits genuine UB (an invalid `char`) → `SIGABRT` under
debug UB-checks, silent UB in release.
These are not fully fixable in-tree: `yrs` exposes no pre-apply validator, and
the OOM/abort classes are uncatchable. The blast radius is limited (the sync
endpoint is authenticated), but a buggy or hostile authenticated peer can still
crash a daemon. The `catch_unwind` in `merge_body` is partial mitigation;
durable fixes need upstream `yrs` work or a bounded decoder. Until then this is
a known limitation, tracked here and reproduced by the `crdt_merge` target.
## Why these targets
The high-value surfaces, ranked when this was set up:

View file

@ -58,6 +58,7 @@ Technical reference material for the repository tooling that ships with this pro
| `mise run docs-check-links` | Validate wiki-links against existing doc filenames |
| `mise run docs-mikado` | Inspect active Mikado chains and resume C2 work |
| `mise run docs-preview <tarball>` | Extract and serve a released docs tarball locally |
| `mise run fuzz [seconds] [target]` | Run the nightly cargo-fuzz targets briefly — see [[fuzz-testing]] |
| `mise run import-todoist` | Seed a heph store from Todoist (dry-run by default; `-- --commit` to write) — see [[import-todoist]] |
| `mise run mikado-branch-invariant-check` | Validate `mikado/*` branch commit discipline |
| `mise run pr-comments <pr_number>` | List unresolved PR comments |

43
mise-tasks/fuzz Executable file
View file

@ -0,0 +1,43 @@
#!/usr/bin/env bash
#MISE description="Run the cargo-fuzz targets briefly (nightly). Usage: mise run fuzz [seconds] [target]"
# Tier 2 fuzzing (see docs/how-to/fuzz-testing.md). Nightly-only and ad-hoc —
# not part of `cargo test` or CI. Runs each libFuzzer target for a bounded time
# so it terminates; the corpus under fuzz/corpus/ persists between runs.
#
# mise run fuzz # all targets, 60s each
# mise run fuzz 300 # all targets, 5 min each
# mise run fuzz 600 crdt_merge # one target, 10 min
set -euo pipefail
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
FUZZ_DIR="$ROOT/crates/heph-core/fuzz"
SECONDS_PER="${1:-60}"
ONLY="${2:-}"
if ! command -v cargo-fuzz >/dev/null 2>&1 && ! cargo +nightly fuzz --version >/dev/null 2>&1; then
echo "cargo-fuzz not found. Install with:" >&2
echo " rustup toolchain install nightly && cargo install cargo-fuzz" >&2
exit 1
fi
if [[ -n "$ONLY" ]]; then
targets=("$ONLY")
else
# No `mapfile` — macOS ships bash 3.2. Target names are bare words.
# shellcheck disable=SC2207
targets=($(cargo +nightly fuzz list --fuzz-dir "$FUZZ_DIR"))
fi
rc=0
for t in "${targets[@]}"; do
echo "=== fuzzing $t for ${SECONDS_PER}s ==="
if ! cargo +nightly fuzz run "$t" --fuzz-dir "$FUZZ_DIR" -- -max_total_time="$SECONDS_PER"; then
echo "!!! $t produced a crash — artifact in $FUZZ_DIR/artifacts/$t/" >&2
rc=1
fi
done
exit "$rc"