From 16759b28b9c13b445e49db9363756d24d6a26aaa Mon Sep 17 00:00:00 2001 From: aaltshuler Date: Wed, 10 Jun 2026 02:36:24 +0300 Subject: [PATCH] fix(cluster): RAII-guard the callback failpoint ScopedFailPoint::with_callback gives cfg_callback the same Drop-based cleanup as cfg actions; a panic while the point is active no longer leaks the callback into the process-global registry where it would fire under later tests (greptile review, PR #167). Co-Authored-By: Claude Fable 5 --- crates/omnigraph-cluster/src/failpoints.rs | 14 ++++++++++++++ crates/omnigraph-cluster/tests/failpoints.rs | 19 ++++++++++--------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/crates/omnigraph-cluster/src/failpoints.rs b/crates/omnigraph-cluster/src/failpoints.rs index c6d445b..f1799d7 100644 --- a/crates/omnigraph-cluster/src/failpoints.rs +++ b/crates/omnigraph-cluster/src/failpoints.rs @@ -32,6 +32,20 @@ impl ScopedFailPoint { name: name.to_string(), } } + + /// Register a callback failpoint with the same Drop-based cleanup as + /// `new`. Without the guard, a panic while the point is active would + /// leak the callback into the process-global registry and fire it under + /// later tests in the same binary. + pub fn with_callback(name: &str, callback: F) -> Self + where + F: Fn() + Send + Sync + 'static, + { + fail::cfg_callback(name, callback).expect("configure callback failpoint"); + Self { + name: name.to_string(), + } + } } #[cfg(feature = "failpoints")] diff --git a/crates/omnigraph-cluster/tests/failpoints.rs b/crates/omnigraph-cluster/tests/failpoints.rs index 05d2913..db7b82d 100644 --- a/crates/omnigraph-cluster/tests/failpoints.rs +++ b/crates/omnigraph-cluster/tests/failpoints.rs @@ -171,18 +171,19 @@ fn apply_cas_race_surfaces_state_cas_mismatch() { // Simulate the concurrent writer at the exact race window: rewrite // state.json (valid JSON, graph/schema digests preserved, revision 99) - // after apply read it but before apply writes. + // after apply read it but before apply writes. RAII-guarded so a panic + // inside apply cannot leak the callback into the global registry. let race_path = state_path(dir.path()); - fail::cfg_callback("cluster_apply.before_state_write", move || { - let mut state: serde_json::Value = - serde_json::from_str(&fs::read_to_string(&race_path).unwrap()).unwrap(); - state["state_revision"] = serde_json::json!(99); - fs::write(&race_path, serde_json::to_string_pretty(&state).unwrap()).unwrap(); - }) - .expect("configure callback failpoint"); + let failpoint = + ScopedFailPoint::with_callback("cluster_apply.before_state_write", move || { + let mut state: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&race_path).unwrap()).unwrap(); + state["state_revision"] = serde_json::json!(99); + fs::write(&race_path, serde_json::to_string_pretty(&state).unwrap()).unwrap(); + }); let out = apply_config_dir(dir.path()); - fail::remove("cluster_apply.before_state_write"); + drop(failpoint); assert!(!out.ok); assert!(!out.state_written);