diff --git a/src/chain/reverify.rs b/src/chain/reverify.rs index b774230b..609273ed 100644 --- a/src/chain/reverify.rs +++ b/src/chain/reverify.rs @@ -308,6 +308,26 @@ impl CompositeReverifier for DefaultCompositeReverifier { let all_built = derived > 0 && built == derived; let all_ran = built > 0 && steps_run == built && sandbox_errors == 0; if all_built && all_ran && final_sink_hit { + // Phase 31 telemetry stability stamping. When the caller + // opts in via `NYX_VERIFY_REPLAY_STABLE=1` (mirrored by + // [`VerifyOptions::replay_stable_check`]) we re-run the + // chain step sequence one more time on the same built + // workdirs and stamp `replay_stable` based on whether the + // second pass also fires the sink sentinel. `Some(true)` + // means the chain reproduces; `Some(false)` means the chain + // is flaky (rare but a real eval-corpus signal); the field + // stays `None` when the opt-in is off. + let replay_stable = if opts.replay_stable_check { + let (_, replay_sandbox_errors, _, _, replay_final_sink_hit) = + run_chain_steps(&built_steps, &opts.sandbox, &terminal); + if replay_sandbox_errors == 0 { + Some(replay_final_sink_hit) + } else { + None + } + } else { + None + }; VerifyResult { finding_id, status: VerifyStatus::Confirmed, @@ -318,7 +338,7 @@ impl CompositeReverifier for DefaultCompositeReverifier { attempts: vec![], toolchain_match: None, differential: None, - replay_stable: None, + replay_stable, wrong: None, hardening_outcome: None, } diff --git a/tests/chain_emission_e2e.rs b/tests/chain_emission_e2e.rs index e2cfd630..432e698d 100644 --- a/tests/chain_emission_e2e.rs +++ b/tests/chain_emission_e2e.rs @@ -215,6 +215,86 @@ fn flask_eval_chain_reverify_populates_dynamic_verdict() { } } +/// Locks the Phase 31 telemetry stability stamping contract: when +/// `NYX_VERIFY_REPLAY_STABLE=1` is set and the chain reverifier resolves +/// to `Confirmed`, the verdict's `replay_stable` field is populated. +/// Without the env var, `replay_stable` stays `null`. +/// +/// Status-agnostic: when the host's Python toolchain is missing the +/// reverifier never reaches its `Confirmed` branch and `replay_stable` +/// stays `null` in both arms — the test then asserts only the absence- +/// path contract under both env-var settings so it stays green on +/// toolchain-free hosts. When `Confirmed` *does* fire, the env-var-set +/// arm must carry `Some(true|false)`. +#[cfg(feature = "dynamic")] +#[test] +fn flask_eval_chain_replay_stable_honours_opt_in() { + let root = fixture_root("python/flask_eval"); + + // Arm 1: env var unset → replay_stable must be null on the top chain + // regardless of verdict status. + let assert_off = Command::cargo_bin("nyx") + .expect("nyx binary") + .args(["scan", "--format", "json"]) + .arg(&root) + .env_remove("NYX_VERIFY_REPLAY_STABLE") + .assert() + .success(); + let value_off: Value = serde_json::from_slice(&assert_off.get_output().stdout) + .expect("nyx scan --format json produced invalid JSON (arm off)"); + let top_off = value_off + .get("chains") + .and_then(Value::as_array) + .and_then(|c| c.first()) + .expect("expected at least one composed chain (arm off)"); + let dv_off = top_off + .get("dynamic_verdict") + .expect("dynamic_verdict missing (arm off)"); + let replay_off = dv_off.get("replay_stable"); + assert!( + matches!(replay_off, None | Some(Value::Null)), + "replay_stable should be absent or null when opt-in is off; got {replay_off:?}" + ); + + // Arm 2: env var set → replay_stable must be populated when the + // verdict is Confirmed. When the toolchain is missing the verdict + // stays Inconclusive and replay_stable stays null; both branches + // are valid wiring outcomes. + let assert_on = Command::cargo_bin("nyx") + .expect("nyx binary") + .args(["scan", "--format", "json"]) + .arg(&root) + .env("NYX_VERIFY_REPLAY_STABLE", "1") + .assert() + .success(); + let value_on: Value = serde_json::from_slice(&assert_on.get_output().stdout) + .expect("nyx scan --format json produced invalid JSON (arm on)"); + let top_on = value_on + .get("chains") + .and_then(Value::as_array) + .and_then(|c| c.first()) + .expect("expected at least one composed chain (arm on)"); + let dv_on = top_on + .get("dynamic_verdict") + .expect("dynamic_verdict missing (arm on)"); + let status_on = dv_on + .get("status") + .and_then(Value::as_str) + .expect("verdict missing status (arm on)"); + let replay_on = dv_on.get("replay_stable"); + if status_on == "Confirmed" { + assert!( + matches!(replay_on, Some(Value::Bool(_))), + "replay_stable must be populated when opt-in is on and verdict is Confirmed; got {replay_on:?}" + ); + } else { + assert!( + matches!(replay_on, None | Some(Value::Null) | Some(Value::Bool(_))), + "replay_stable should be absent, null, or a bool; got {replay_on:?}" + ); + } +} + /// Mirror of the above: with `--no-verify` the chain-reverify pass is /// skipped and `dynamic_verdict` stays `null`. Locks the cost-control /// contract: users who opt out of dynamic verification do not pay the