From d599a8522aefe882ba348952824437380dfc208c Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Mon, 8 Jun 2026 10:32:50 +0200 Subject: [PATCH] test(writes): tolerate benign drift / defer sidecar-covered drift (red) A table's Lance HEAD can sit ahead of the manifest pin after a benign, content-preserving op that never published to the manifest (compaction, a recovery restore, an old-binary optimize, an external compact_files). That drift carries no recovery sidecar and is safe to write over. A pending recovery sidecar, by contrast, marks a real in-flight partial write the open-time sweep will roll back. Adds boundary tests proving the (currently-missing) consumer-side behavior: - writes.rs: strict update / strict delete proceed on benign drift with no sidecar, and defer (pointing at recovery) when a sidecar pins the table. - schema_apply.rs: additive apply proceeds on benign drift, defers when a sidecar pins the table. Shared forge_benign_drift / write_node_pin_sidecar / node_table_uri helpers in tests/helpers/mod.rs (never-matching delete_where advances Lance HEAD by one without touching the manifest). Red today: the pre-stage precondition rejects any HEAD != pin as a stale view (ExpectedVersionMismatch), so the proceeds tests 409 and the defers tests get the generic stale-view message instead of an actionable recovery hint. --- crates/omnigraph/tests/helpers/mod.rs | 84 ++++++++++++++++++++++++ crates/omnigraph/tests/schema_apply.rs | 70 ++++++++++++++++++++ crates/omnigraph/tests/writes.rs | 88 ++++++++++++++++++++++++++ 3 files changed, 242 insertions(+) diff --git a/crates/omnigraph/tests/helpers/mod.rs b/crates/omnigraph/tests/helpers/mod.rs index c97ff72..3e9f4b6 100644 --- a/crates/omnigraph/tests/helpers/mod.rs +++ b/crates/omnigraph/tests/helpers/mod.rs @@ -261,3 +261,87 @@ pub fn s3_test_graph_uri(suite: &str) -> Option { .as_nanos(); Some(format!("s3://{}/{}/{}/{}", bucket, prefix, suite, unique)) } + +// ─── Drift forging (benign `Lance HEAD > manifest` drift + recovery sidecars) ── +// +// Shared by `writes.rs` and `schema_apply.rs` to exercise the consumer-side +// tolerant write precondition. (`recovery.rs` / `maintenance.rs` / `failpoints.rs` +// each still carry their own local `node_table_uri` from before this helper +// existed; they predate it and are left untouched to keep this change scoped.) + +/// FNV-1a (64-bit) — mirrors the table-path hashing in `db/manifest/layout.rs`. +fn fnv1a64(bytes: &[u8]) -> u64 { + let mut hash: u64 = 0xcbf2_9ce4_8422_2325; + for &b in bytes { + hash ^= b as u64; + hash = hash.wrapping_mul(0x100_0000_01b3); + } + hash +} + +/// Full URI of a node-type Lance dataset under a graph root. Mirrors the +/// `nodes/{fnv1a64-hex(type_name)}` layout in `db/manifest/layout.rs`. +pub fn node_table_uri(root: &str, type_name: &str) -> String { + let h = fnv1a64(type_name.as_bytes()); + format!("{}/nodes/{:016x}", root.trim_end_matches('/'), h) +} + +/// Advance a node table's Lance HEAD by one WITHOUT touching the manifest — the +/// shape of benign content-preserving drift (compaction / a recovery `restore` / +/// an old-binary optimize / external `compact_files`), leaving NO recovery +/// sidecar. Uses a never-matching `delete_where` (an inline Lance commit that +/// removes no rows and is agnostic to the table's column set). Returns +/// `(head_before, head_after)`. +pub async fn forge_benign_drift(root: &str, type_name: &str) -> (u64, u64) { + use lance::Dataset; + use omnigraph::table_store::TableStore; + + let table_uri = node_table_uri(root, type_name); + let store = TableStore::new(root); + let mut ds = Dataset::open(&table_uri).await.unwrap(); + let before = ds.version().version; + store.delete_where(&table_uri, &mut ds, "1 = 2").await.unwrap(); + let after = ds.version().version; + assert_eq!( + after, + before + 1, + "benign drift must advance Lance HEAD by exactly 1", + ); + (before, after) +} + +/// Write a recovery sidecar pinning a single node table at `expected_version`, +/// classified `UnexpectedAtP1` (`post_commit_pin == expected_version`, while the +/// observed Lance HEAD is `expected_version + 1`). That classification is +/// roll-back-eligible, so a `RollForwardOnly` refresh DEFERS rather than reclaims +/// it — the sidecar therefore survives on disk to the consumer-side precondition +/// check under test. The caller is expected to have forged matching drift first. +pub fn write_node_pin_sidecar( + root: &str, + operation_id: &str, + type_name: &str, + expected_version: u64, +) { + let table_uri = node_table_uri(root, type_name); + let dir = std::path::Path::new(root).join("__recovery"); + std::fs::create_dir_all(&dir).unwrap(); + let json = format!( + r#"{{ + "schema_version": 1, + "operation_id": "{operation_id}", + "started_at": "0", + "branch": null, + "actor_id": "act-test", + "writer_kind": "Mutation", + "tables": [ + {{ + "table_key": "node:{type_name}", + "table_path": "{table_uri}", + "expected_version": {expected_version}, + "post_commit_pin": {expected_version} + }} + ] + }}"#, + ); + std::fs::write(dir.join(format!("{operation_id}.json")), json).unwrap(); +} diff --git a/crates/omnigraph/tests/schema_apply.rs b/crates/omnigraph/tests/schema_apply.rs index cc0cae2..45e1da8 100644 --- a/crates/omnigraph/tests/schema_apply.rs +++ b/crates/omnigraph/tests/schema_apply.rs @@ -736,3 +736,73 @@ edge Knows: Person -> Person { // current contract, the data is *unreachable* via omnigraph // (no manifest entry), which is the user-facing guarantee. } + +// ─── Tolerance of benign `Lance HEAD > manifest` drift (additive apply) ─────── +// +// Schema apply reads each touched table's source at the manifest-pinned version +// and rewrites onto its Lance HEAD. When HEAD has drifted benignly ahead of the +// pin (content-preserving, no recovery sidecar — compaction / a recovery +// `restore` / an old-binary optimize / an external `compact_files`), apply must +// PROCEED: the content-preserving invariant makes the read-at-pin / write-at-HEAD +// safe. A sidecar-covered drift (a real in-flight partial write the open-time +// sweep will roll back) must instead DEFER to recovery. + +/// Additive apply (add a nullable Person property) on a benignly-drifted Person +/// table (no sidecar) must SUCCEED. Red before the tolerant precondition: the +/// HEAD==pin precondition 409s ("stale view"). +#[tokio::test] +async fn additive_apply_proceeds_on_benign_drift_without_sidecar() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let uri = dir.path().to_str().unwrap(); + + forge_benign_drift(uri, "Person").await; + + let desired = TEST_SCHEMA.replace( + " age: I32?\n}", + " age: I32?\n nickname: String?\n}", + ); + let result = db + .apply_schema(&desired) + .await + .expect("additive schema apply must tolerate benign HEAD>manifest drift"); + assert!(result.applied, "additive apply should report applied"); + assert!( + result.steps.iter().any(|step| matches!( + step, + SchemaMigrationStep::AddProperty { + type_kind: SchemaTypeKind::Node, + type_name, + property_name, + .. + } if type_name == "Person" && property_name == "nickname" + )), + "expected the AddProperty step to have applied: {:?}", + result.steps, + ); +} + +/// Same benign drift, but a recovery sidecar pins Person → additive apply must +/// DEFER and point the operator at recovery. +#[tokio::test] +async fn additive_apply_defers_when_sidecar_pins_table() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let uri = dir.path().to_str().unwrap(); + + let (head_before, _) = forge_benign_drift(uri, "Person").await; + write_node_pin_sidecar(uri, "01H00000000000000000000SD", "Person", head_before); + + let desired = TEST_SCHEMA.replace( + " age: I32?\n}", + " age: I32?\n nickname: String?\n}", + ); + let err = db + .apply_schema(&desired) + .await + .expect_err("a sidecar-covered drift must defer schema apply"); + assert!( + err.to_string().contains("recover"), + "deferred schema apply must point the operator at recovery; got: {err}", + ); +} diff --git a/crates/omnigraph/tests/writes.rs b/crates/omnigraph/tests/writes.rs index 0a309c9..63e2d71 100644 --- a/crates/omnigraph/tests/writes.rs +++ b/crates/omnigraph/tests/writes.rs @@ -1466,3 +1466,91 @@ async fn second_sequential_update_on_same_row_succeeds() { "Alice's age must reflect the second update" ); } + +// ─── Consumer-side tolerance of benign `Lance HEAD > manifest` drift ────────── +// +// A table's Lance HEAD can sit ahead of the manifest pin after a benign, +// content-preserving op that never published (compaction, a recovery `restore`, +// an old-binary optimize, an external `compact_files`). That drift carries NO +// recovery sidecar. The pre-stage write precondition must tolerate it — the +// writer's own commit + publisher CAS reconcile the manifest — rather than +// reject it as a stale view. When a recovery sidecar DOES pin the table (a real +// in-flight partial write the open-time sweep will roll back), the same +// precondition must instead defer and point the operator at recovery. + +/// Strict `update` on a benignly-drifted table (no sidecar) must SUCCEED. +/// Red before the tolerant precondition: 409 `ExpectedVersionMismatch` +/// ("stale view"). This also guards the spurious-409 trap — the post-queue +/// strict check must compare the manifest pin, not the drifted Lance HEAD +/// captured at the open site. +#[tokio::test] +async fn strict_update_proceeds_on_benign_drift_without_sidecar() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let uri = dir.path().to_str().unwrap(); + + forge_benign_drift(uri, "Person").await; + + let result = mutate_main( + &mut db, + MUTATION_QUERIES, + "set_age", + &mixed_params(&[("$name", "Alice")], &[("$age", 99)]), + ) + .await + .expect("strict update must tolerate benign HEAD>manifest drift with no sidecar"); + assert_eq!( + result.affected_nodes, 1, + "the tolerated update must apply to exactly Alice", + ); +} + +/// Same benign drift, but a recovery sidecar pins the table → the strict op must +/// DEFER (not proceed onto a possibly-rolled-back state, not 409 a generic +/// stale view). The error must point the operator at recovery. +#[tokio::test] +async fn strict_update_defers_when_sidecar_pins_table() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let uri = dir.path().to_str().unwrap(); + + let (head_before, _) = forge_benign_drift(uri, "Person").await; + write_node_pin_sidecar(uri, "01H00000000000000000000WD", "Person", head_before); + + let err = mutate_main( + &mut db, + MUTATION_QUERIES, + "set_age", + &mixed_params(&[("$name", "Alice")], &[("$age", 99)]), + ) + .await + .expect_err("a sidecar-covered drift must defer, not proceed"); + assert!( + err.to_string().contains("recover"), + "deferred write must point the operator at recovery; got: {err}", + ); +} + +/// Strict `delete` (the inline-commit residual) on a benignly-drifted table must +/// also SUCCEED — guards the delete path's separate HEAD-capture / reopen site. +#[tokio::test] +async fn delete_proceeds_on_benign_drift_without_sidecar() { + let dir = tempfile::tempdir().unwrap(); + let mut db = init_and_load(&dir).await; + let uri = dir.path().to_str().unwrap(); + + forge_benign_drift(uri, "Person").await; + + let result = mutate_main( + &mut db, + MUTATION_QUERIES, + "remove_person", + ¶ms(&[("$name", "Alice")]), + ) + .await + .expect("strict delete must tolerate benign HEAD>manifest drift with no sidecar"); + assert_eq!( + result.affected_nodes, 1, + "the tolerated delete must remove exactly Alice", + ); +}