From 1b0a2c9310cf9f440b59575c9ab4800ed846de11 Mon Sep 17 00:00:00 2001 From: Ragnor Comerford Date: Thu, 7 May 2026 16:53:51 +0200 Subject: [PATCH] staging: skip revalidation single-table; in-memory snapshot multi-table (PR 2 Step D) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the PR 1b regression (-17% disjoint, -30% same-key) by eliminating the fresh `db.snapshot_for_branch(branch).await` that PR 1b's commit_all issued per mutation. Single-table mutations (`staged.len() + inline_committed.len() == 1`): skip revalidation entirely. The per-(table, branch) Mutex queue holds exclusive while we commit; the publisher's CAS catches any drift that slipped between expected_versions capture and queue acquisition. Conflict cost: 1 orphan Lance HEAD advance, recovered via the existing sidecar protocol on the next ReadWrite open. This is the same trade-off the master plan §"Revalidation perf optimization" prescribes. Multi-table mutations: replace `db.snapshot_for_branch(branch)` (fresh manifest read) with `db.snapshot()` (in-memory). Correct under MR-686's single-process scope because all in-process tenants share one `Arc` -> one coordinator; publishes update the shared coordinator BEFORE releasing queue guards, so a contending tenant reads a fresh in-memory view by the time it acquires its queue keys. The within-mutation race (A captures expected_versions[T2]=V0, B publishes T2->V1 during A's stage I/O, A then acquires T2's queue) is caught via the in-memory check. Multi-coordinator deployments (§VI.27 aspirational) would need force-refresh under the queue — documented in §VI's "Explicit non-commitments". Adds a SAFETY comment naming the two load-bearing premises: (1) per-table queue uses exclusive Mutex (not RwLock), and (2) single-coordinator invariant (one Omnigraph engine per process). Migrating either breaks this skip. Regression sentinel `change_conflict_returns_manifest_conflict_409` passes. 102 lib + 24 runs + 16 staged_writes pass with the new path. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/omnigraph/src/exec/staging.rs | 55 +++++++++++++++++++--------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/crates/omnigraph/src/exec/staging.rs b/crates/omnigraph/src/exec/staging.rs index fd43ea0..94b2d59 100644 --- a/crates/omnigraph/src/exec/staging.rs +++ b/crates/omnigraph/src/exec/staging.rs @@ -435,26 +435,42 @@ impl StagedMutation { } let guards = db.write_queue().acquire_many(&queue_keys).await; - // Revalidate manifest pins. Read fresh per-branch snapshot — - // in-memory `db.snapshot()` may be stale if another writer - // committed since our stage. If any pin moved past our - // expected_version, fail-fast before commit_staged moves - // Lance HEAD. + // Revalidate manifest pins (PR 2 perf optimization). // - // Both staged and inline-committed tables are revalidated. - // Inline-committed tables (delete-only path) had their Lance - // HEAD advanced before this point, but the *manifest pin* - // shouldn't have moved if no other writer interleaved. If it - // has, return manifest_conflict — the sidecar emitted below - // captures (expected, post) so the next open's recovery sweep - // can resolve the Lance-HEAD-vs-manifest divergence. + // Single-table mutations skip revalidation entirely: once the + // per-(table, branch) queue is held, no concurrent writer can + // move our table's pin (queue exclusivity); if revalidation + // were to fail, the publisher's `expected_table_versions` CAS + // catches the same drift. The cost on conflict is one orphan + // Lance HEAD advance, recovered via the sidecar protocol on + // the next ReadWrite open. // - // Note: under PR 1b's intermediate state (global server RwLock - // in place), this revalidation is a no-op because no concurrent - // writer can run. Becomes load-bearing once PR 2 removes the - // global lock — see `.context/pr-1b-plan.md` Risk 3. - if !staged.is_empty() || !inline_committed.is_empty() { - let snapshot = db.snapshot_for_branch(branch).await?; + // Multi-table mutations use the in-memory `db.snapshot()` + // (zero I/O) instead of `db.snapshot_for_branch(...)` (fresh + // manifest read). This is correct under MR-686's single-process + // scope: all in-process tenants share one `Arc` and + // therefore one coordinator; publishes update the shared + // coordinator BEFORE releasing queue guards (see + // `commit_all` -> caller's publisher -> caller drops guards), + // so any tenant 2 acquiring queue keys after tenant 1 reads a + // fresh in-memory view. The within-mutation race (mutation A + // captures expected_versions[T2]=V0, tenant B publishes T2 to + // V1 during A's stage I/O, A then acquires T2's queue) is + // caught here via the in-memory check (B's publish updated the + // shared coordinator, so snapshot.entry(T2) returns V1 != V0). + // + // Multi-coordinator deployments (§VI.27 aspirational) would + // require force-refresh under the queue here. That trade-off + // is documented in §VI's "Explicit non-commitments" subsection. + // + // SAFETY: relies on (1) the per-(table, branch) WriteQueueManager + // using exclusive `tokio::sync::Mutex<()>` (not `RwLock`), and + // (2) the single-coordinator invariant (one Omnigraph engine + // per process). Migrating either premise breaks this skip; see + // master plan risk #2. + let total_tables = staged.len() + inline_committed.len(); + if total_tables > 1 { + let snapshot = db.snapshot().await; for entry in &staged { let current = snapshot.entry(&entry.table_key).map(|e| e.table_version); match current { @@ -498,6 +514,9 @@ impl StagedMutation { } } } + // Avoid an unused-variable warning when `branch` is not consumed + // above (single-table fast path skips revalidation). + let _ = branch; // Sidecar protocol: build the per-table pin list and write the // sidecar BEFORE any Lance commit_staged runs, so a crash