From 7d1a40102cabf560e6a1768c2cf1ac711141e6ca Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 11 May 2026 21:35:18 +0000 Subject: [PATCH] Address review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit merge.rs: best-effort refresh on the Err path so a refresh-time storage error doesn't replace the merge body's structured error (typically the manifest_conflict that the HTTP layer maps to a 409 with a structured payload) with a less informative one. Ok-path behavior is unchanged — there a refresh failure is propagated so the caller knows the coord's cache is unsynced. server.rs: bump MAX_ITERATIONS to 50 and assert at the end that the named clean-409 path actually fired at least once. With ~20% per-iter rate on shared-CPU CI (per the original MR-923 repro), P(no hit in 50) is < 0.002%. Without this assertion the test silently degraded to exercising only the 200-merge path — covered already by the matrix cell. Both changes per Devin Review + cubic comments on PR #80. Co-Authored-By: Ragnor Comerford --- crates/omnigraph-server/tests/server.rs | 28 ++++++++++++++++++------- crates/omnigraph/src/exec/merge.rs | 20 +++++++++++++++++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/crates/omnigraph-server/tests/server.rs b/crates/omnigraph-server/tests/server.rs index f6cfdad..41ccbf1 100644 --- a/crates/omnigraph-server/tests/server.rs +++ b/crates/omnigraph-server/tests/server.rs @@ -3233,7 +3233,16 @@ async fn concurrent_merge_clean_409_does_not_poison_next_change_on_target() { // scenario until the clean-409 branch fires, then sharpens the // post-condition specifically for that branch: with the fix, the // sentinel must succeed. - const MAX_ITERATIONS: usize = 20; + // + // The clean-409 path is timing-dependent (~20% per iteration on + // shared-CPU CI per the original MR-923 repro). With 50 iterations + // the probability of never hitting it under that rate is < 0.002%. + // The test asserts at the end that the named path was actually + // exercised at least once; if a future change makes that path + // un-reachable in this scenario, the assertion surfaces the + // change rather than letting the test silently degrade to + // exercising only the 200-merge path. + const MAX_ITERATIONS: usize = 50; let mut hit_clean_409 = false; for iter in 0..MAX_ITERATIONS { let h = matrix::Harness::new().await; @@ -3281,13 +3290,16 @@ async fn concurrent_merge_clean_409_does_not_poison_next_change_on_target() { break; } } - // If MAX_ITERATIONS runs never hit the clean-409 path, the timing - // window simply didn't open on this hardware. The per-iteration - // sentinel assertion above still covered every 200-merge run, and - // the matrix cell covers the same scenario from a different angle. - // Don't fail the test — that would substitute hardware-timing - // sensitivity for the regression signal. - let _ = hit_clean_409; + assert!( + hit_clean_409, + "clean-409 path never fired in {} iterations — the named regression \ + scenario is no longer reachable through the merge×change race. \ + Either the race window has closed (good — but this test no longer \ + pins the bug) or something is preventing the merge body's \ + post_queue_snapshot drift check from firing. Investigate before \ + dropping this test.", + MAX_ITERATIONS, + ); } #[tokio::test(flavor = "multi_thread", worker_threads = 4)] diff --git a/crates/omnigraph/src/exec/merge.rs b/crates/omnigraph/src/exec/merge.rs index 9581fcb..fdcd68c 100644 --- a/crates/omnigraph/src/exec/merge.rs +++ b/crates/omnigraph/src/exec/merge.rs @@ -1136,8 +1136,26 @@ impl Omnigraph { // sweep emits. Pinned by // `branch_merge_phase_b_failure_recovered_on_next_open` in // `crates/omnigraph/tests/failpoints.rs`. + // + // Err-path refresh is best-effort: the merge body's error + // (typically the structured `manifest_conflict` from the + // post_queue_snapshot drift check) is the value the caller + // needs to see. A refresh-time storage error would replace + // that with a less informative error; the next op or the next + // `Omnigraph::open` will re-sync the coord anyway. if previous_branch == target_branch { - self.refresh_coordinator_only().await?; + match merge_result { + Ok(_) => self.refresh_coordinator_only().await?, + Err(_) => { + if let Err(err) = self.refresh_coordinator_only().await { + tracing::warn!( + error = %err, + "post-merge coordinator refresh failed on the error path; \ + the next op or open will re-sync" + ); + } + } + } } merge_result