mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss/grind] deferred session-0020 (20260517T044708Z-e058)
This commit is contained in:
parent
b5696c99e2
commit
a2acfac7a2
2 changed files with 101 additions and 1 deletions
|
|
@ -308,6 +308,26 @@ impl CompositeReverifier for DefaultCompositeReverifier {
|
|||
let all_built = derived > 0 && built == derived;
|
||||
let all_ran = built > 0 && steps_run == built && sandbox_errors == 0;
|
||||
if all_built && all_ran && final_sink_hit {
|
||||
// Phase 31 telemetry stability stamping. When the caller
|
||||
// opts in via `NYX_VERIFY_REPLAY_STABLE=1` (mirrored by
|
||||
// [`VerifyOptions::replay_stable_check`]) we re-run the
|
||||
// chain step sequence one more time on the same built
|
||||
// workdirs and stamp `replay_stable` based on whether the
|
||||
// second pass also fires the sink sentinel. `Some(true)`
|
||||
// means the chain reproduces; `Some(false)` means the chain
|
||||
// is flaky (rare but a real eval-corpus signal); the field
|
||||
// stays `None` when the opt-in is off.
|
||||
let replay_stable = if opts.replay_stable_check {
|
||||
let (_, replay_sandbox_errors, _, _, replay_final_sink_hit) =
|
||||
run_chain_steps(&built_steps, &opts.sandbox, &terminal);
|
||||
if replay_sandbox_errors == 0 {
|
||||
Some(replay_final_sink_hit)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Confirmed,
|
||||
|
|
@ -318,7 +338,7 @@ impl CompositeReverifier for DefaultCompositeReverifier {
|
|||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
differential: None,
|
||||
replay_stable: None,
|
||||
replay_stable,
|
||||
wrong: None,
|
||||
hardening_outcome: None,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -215,6 +215,86 @@ fn flask_eval_chain_reverify_populates_dynamic_verdict() {
|
|||
}
|
||||
}
|
||||
|
||||
/// Locks the Phase 31 telemetry stability stamping contract: when
|
||||
/// `NYX_VERIFY_REPLAY_STABLE=1` is set and the chain reverifier resolves
|
||||
/// to `Confirmed`, the verdict's `replay_stable` field is populated.
|
||||
/// Without the env var, `replay_stable` stays `null`.
|
||||
///
|
||||
/// Status-agnostic: when the host's Python toolchain is missing the
|
||||
/// reverifier never reaches its `Confirmed` branch and `replay_stable`
|
||||
/// stays `null` in both arms — the test then asserts only the absence-
|
||||
/// path contract under both env-var settings so it stays green on
|
||||
/// toolchain-free hosts. When `Confirmed` *does* fire, the env-var-set
|
||||
/// arm must carry `Some(true|false)`.
|
||||
#[cfg(feature = "dynamic")]
|
||||
#[test]
|
||||
fn flask_eval_chain_replay_stable_honours_opt_in() {
|
||||
let root = fixture_root("python/flask_eval");
|
||||
|
||||
// Arm 1: env var unset → replay_stable must be null on the top chain
|
||||
// regardless of verdict status.
|
||||
let assert_off = Command::cargo_bin("nyx")
|
||||
.expect("nyx binary")
|
||||
.args(["scan", "--format", "json"])
|
||||
.arg(&root)
|
||||
.env_remove("NYX_VERIFY_REPLAY_STABLE")
|
||||
.assert()
|
||||
.success();
|
||||
let value_off: Value = serde_json::from_slice(&assert_off.get_output().stdout)
|
||||
.expect("nyx scan --format json produced invalid JSON (arm off)");
|
||||
let top_off = value_off
|
||||
.get("chains")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|c| c.first())
|
||||
.expect("expected at least one composed chain (arm off)");
|
||||
let dv_off = top_off
|
||||
.get("dynamic_verdict")
|
||||
.expect("dynamic_verdict missing (arm off)");
|
||||
let replay_off = dv_off.get("replay_stable");
|
||||
assert!(
|
||||
matches!(replay_off, None | Some(Value::Null)),
|
||||
"replay_stable should be absent or null when opt-in is off; got {replay_off:?}"
|
||||
);
|
||||
|
||||
// Arm 2: env var set → replay_stable must be populated when the
|
||||
// verdict is Confirmed. When the toolchain is missing the verdict
|
||||
// stays Inconclusive and replay_stable stays null; both branches
|
||||
// are valid wiring outcomes.
|
||||
let assert_on = Command::cargo_bin("nyx")
|
||||
.expect("nyx binary")
|
||||
.args(["scan", "--format", "json"])
|
||||
.arg(&root)
|
||||
.env("NYX_VERIFY_REPLAY_STABLE", "1")
|
||||
.assert()
|
||||
.success();
|
||||
let value_on: Value = serde_json::from_slice(&assert_on.get_output().stdout)
|
||||
.expect("nyx scan --format json produced invalid JSON (arm on)");
|
||||
let top_on = value_on
|
||||
.get("chains")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|c| c.first())
|
||||
.expect("expected at least one composed chain (arm on)");
|
||||
let dv_on = top_on
|
||||
.get("dynamic_verdict")
|
||||
.expect("dynamic_verdict missing (arm on)");
|
||||
let status_on = dv_on
|
||||
.get("status")
|
||||
.and_then(Value::as_str)
|
||||
.expect("verdict missing status (arm on)");
|
||||
let replay_on = dv_on.get("replay_stable");
|
||||
if status_on == "Confirmed" {
|
||||
assert!(
|
||||
matches!(replay_on, Some(Value::Bool(_))),
|
||||
"replay_stable must be populated when opt-in is on and verdict is Confirmed; got {replay_on:?}"
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
matches!(replay_on, None | Some(Value::Null) | Some(Value::Bool(_))),
|
||||
"replay_stable should be absent, null, or a bool; got {replay_on:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Mirror of the above: with `--no-verify` the chain-reverify pass is
|
||||
/// skipped and `dynamic_verdict` stays `null`. Locks the cost-control
|
||||
/// contract: users who opt out of dynamic verification do not pay the
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue