mirror of
https://github.com/ModernRelay/omnigraph.git
synced 2026-06-09 01:35:18 +02:00
Address review feedback
merge.rs: best-effort refresh on the Err path so a refresh-time storage error doesn't replace the merge body's structured error (typically the manifest_conflict that the HTTP layer maps to a 409 with a structured payload) with a less informative one. Ok-path behavior is unchanged — there a refresh failure is propagated so the caller knows the coord's cache is unsynced. server.rs: bump MAX_ITERATIONS to 50 and assert at the end that the named clean-409 path actually fired at least once. With ~20% per-iter rate on shared-CPU CI (per the original MR-923 repro), P(no hit in 50) is < 0.002%. Without this assertion the test silently degraded to exercising only the 200-merge path — covered already by the matrix cell. Both changes per Devin Review + cubic comments on PR #80. Co-Authored-By: Ragnor Comerford <ragnor.comerford@gmail.com>
This commit is contained in:
parent
b7353e1dc7
commit
7d1a40102c
2 changed files with 39 additions and 9 deletions
|
|
@ -3233,7 +3233,16 @@ async fn concurrent_merge_clean_409_does_not_poison_next_change_on_target() {
|
|||
// scenario until the clean-409 branch fires, then sharpens the
|
||||
// post-condition specifically for that branch: with the fix, the
|
||||
// sentinel must succeed.
|
||||
const MAX_ITERATIONS: usize = 20;
|
||||
//
|
||||
// The clean-409 path is timing-dependent (~20% per iteration on
|
||||
// shared-CPU CI per the original MR-923 repro). With 50 iterations
|
||||
// the probability of never hitting it under that rate is < 0.002%.
|
||||
// The test asserts at the end that the named path was actually
|
||||
// exercised at least once; if a future change makes that path
|
||||
// un-reachable in this scenario, the assertion surfaces the
|
||||
// change rather than letting the test silently degrade to
|
||||
// exercising only the 200-merge path.
|
||||
const MAX_ITERATIONS: usize = 50;
|
||||
let mut hit_clean_409 = false;
|
||||
for iter in 0..MAX_ITERATIONS {
|
||||
let h = matrix::Harness::new().await;
|
||||
|
|
@ -3281,13 +3290,16 @@ async fn concurrent_merge_clean_409_does_not_poison_next_change_on_target() {
|
|||
break;
|
||||
}
|
||||
}
|
||||
// If MAX_ITERATIONS runs never hit the clean-409 path, the timing
|
||||
// window simply didn't open on this hardware. The per-iteration
|
||||
// sentinel assertion above still covered every 200-merge run, and
|
||||
// the matrix cell covers the same scenario from a different angle.
|
||||
// Don't fail the test — that would substitute hardware-timing
|
||||
// sensitivity for the regression signal.
|
||||
let _ = hit_clean_409;
|
||||
assert!(
|
||||
hit_clean_409,
|
||||
"clean-409 path never fired in {} iterations — the named regression \
|
||||
scenario is no longer reachable through the merge×change race. \
|
||||
Either the race window has closed (good — but this test no longer \
|
||||
pins the bug) or something is preventing the merge body's \
|
||||
post_queue_snapshot drift check from firing. Investigate before \
|
||||
dropping this test.",
|
||||
MAX_ITERATIONS,
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
|
|
|
|||
|
|
@ -1136,8 +1136,26 @@ impl Omnigraph {
|
|||
// sweep emits. Pinned by
|
||||
// `branch_merge_phase_b_failure_recovered_on_next_open` in
|
||||
// `crates/omnigraph/tests/failpoints.rs`.
|
||||
//
|
||||
// Err-path refresh is best-effort: the merge body's error
|
||||
// (typically the structured `manifest_conflict` from the
|
||||
// post_queue_snapshot drift check) is the value the caller
|
||||
// needs to see. A refresh-time storage error would replace
|
||||
// that with a less informative error; the next op or the next
|
||||
// `Omnigraph::open` will re-sync the coord anyway.
|
||||
if previous_branch == target_branch {
|
||||
self.refresh_coordinator_only().await?;
|
||||
match merge_result {
|
||||
Ok(_) => self.refresh_coordinator_only().await?,
|
||||
Err(_) => {
|
||||
if let Err(err) = self.refresh_coordinator_only().await {
|
||||
tracing::warn!(
|
||||
error = %err,
|
||||
"post-merge coordinator refresh failed on the error path; \
|
||||
the next op or open will re-sync"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
merge_result
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue