//! Phase 30 (Track C — determinism): run the verifier 10× on the same //! input and assert byte-identical [`VerifyTrace`] output across runs, //! plus byte-identical telemetry records once wall-clock fields are //! stripped. //! //! The test deliberately drives the policy-deny short-circuit so it //! does not depend on a working language toolchain, a sandbox backend, //! or a populated payload corpus. That path emits exactly the same //! pipeline events ([`SpecStarted`], [`Verdict`]) every run, and //! emits a single telemetry record whose only non-deterministic field //! is the wall-clock `ts` timestamp. Stripping `ts` gives a stable //! envelope the test can compare directly. #![cfg(feature = "dynamic")] use nyx_scanner::commands::scan::Diag; use nyx_scanner::dynamic::telemetry::{self, SamplingPolicy}; use nyx_scanner::dynamic::verify::{VerifyOptions, verify_finding}; use nyx_scanner::evidence::{Confidence, Evidence, VerifyStatus}; use nyx_scanner::patterns::{FindingCategory, Severity}; use serde_json::Value; use std::collections::BTreeSet; use std::sync::{Mutex, MutexGuard}; const RUN_COUNT: usize = 10; // `NYX_TELEMETRY_PATH` and the telemetry log are process-wide; cargo test // runs the tests in this binary in parallel by default, which would race // the env var and interleave writes from sibling tests into the file the // telemetry-determinism assertion is reading. Serialise the tests in // this file with a module-level mutex so each owns the telemetry surface // exclusively for the duration of its run. static TEST_LOCK: Mutex<()> = Mutex::new(()); fn lock_telemetry() -> MutexGuard<'static, ()> { TEST_LOCK.lock().unwrap_or_else(|e| e.into_inner()) } fn deny_diag(stable_hash: u64) -> Diag { // Triggers the credentials deny rule via the AWS-key regex from // `crate::utils::redact::contains_secret`. The deny rule fires // deterministically because the rule lookup table is `const`. let ev = Evidence { notes: vec!["secret=AKIAFAKEDETERM00000000".to_owned()], ..Evidence::default() }; Diag { path: "src/handler.py".to_owned(), line: 42, col: 0, severity: Severity::High, id: "py.cmdi.os_system".to_owned(), category: FindingCategory::Security, path_validated: false, guard_kind: None, message: None, labels: vec![], confidence: Some(Confidence::High), evidence: Some(ev), rank_score: None, rank_reason: None, suppressed: false, suppression: None, triage_state: "open".to_string(), triage_note: String::new(), rollup: None, finding_id: String::new(), alternative_finding_ids: vec![], stable_hash, } } /// Strip every non-deterministic field from a parsed telemetry record /// and re-serialise. Phase 30 acceptance explicitly excludes wall-clock /// timestamps; `ts` is the only such field today. Future additions /// belong in this filter so the canonical "what does deterministic /// telemetry look like?" surface lives in one place. fn strip_volatile_fields(line: &str) -> String { let mut value: Value = serde_json::from_str(line).expect("telemetry line should be JSON"); if let Some(obj) = value.as_object_mut() { obj.remove("ts"); // `duration_ms` is zero on the no-sandbox deny path, but strip // it defensively so the audit stays correct if a future code // path stamps a non-zero duration before the verdict short- // circuits. obj.remove("duration_ms"); } serde_json::to_string(&value).expect("re-serialisation cannot fail") } #[test] fn ten_runs_produce_byte_identical_telemetry_minus_timestamps() { let _guard = lock_telemetry(); let tmp = tempfile::TempDir::new().expect("tempdir"); let log = tmp.path().join("events.jsonl"); // Pin the telemetry log to the temp file and ensure the // `NYX_NO_TELEMETRY` opt-out is not set in this process. unsafe { std::env::set_var("NYX_TELEMETRY_PATH", &log); std::env::remove_var("NYX_NO_TELEMETRY"); } let diag = deny_diag(0x0123_4567_89ab_cdef); let opts = VerifyOptions { telemetry_policy: SamplingPolicy::keep_all(), trace_verbose: false, ..VerifyOptions::default() }; let mut verdict_jsons: BTreeSet = BTreeSet::new(); for _ in 0..RUN_COUNT { let result = verify_finding(&diag, &opts); assert_eq!(result.status, VerifyStatus::Inconclusive); // Drop `differential` and any future timestamped field by // round-tripping through serde; structural equality is the // contract. verdict_jsons.insert(serde_json::to_string(&result).expect("VerifyResult serialises")); } assert_eq!( verdict_jsons.len(), 1, "VerifyResult must be byte-identical across {RUN_COUNT} runs, got {} distinct", verdict_jsons.len() ); // Read the telemetry log; expect RUN_COUNT lines, all identical // once `ts` is removed. let parsed = telemetry::read_events(&log).expect("events.jsonl should parse"); assert_eq!( parsed.len(), RUN_COUNT, "expected {RUN_COUNT} telemetry records, got {}", parsed.len() ); let stripped: BTreeSet = parsed .iter() .map(|v| { // round-trip through string so the strip path matches // what the on-disk reader does. let line = serde_json::to_string(v).expect("re-serialise"); strip_volatile_fields(&line) }) .collect(); assert_eq!( stripped.len(), 1, "telemetry records must be byte-identical (sans ts/duration_ms) across {RUN_COUNT} runs, got {} distinct: {:?}", stripped.len(), stripped ); // Cleanup: leave the env var pointing at the (about-to-be-deleted) // tempdir would poison sibling tests that share this process. unsafe { std::env::remove_var("NYX_TELEMETRY_PATH"); } } /// Recursively strip volatile fields from a `serde_json::Value` tree. /// The Confirmed-path `VerifyResult` carries timing fields buried under /// `differential.vuln_probes[].captured_at_ns` etc., so a flat top-level /// `obj.remove(...)` is not enough. /// /// Field denylist: /// - `captured_at_ns` — wall-clock probe capture timestamp. /// - `ts` / `duration_ms` — telemetry-side timing fields stripped by /// [`strip_volatile_fields`] but worth re-stripping here too in case /// a future code path lands them on `VerifyResult` directly. /// - `repro_bundle` / `bundle_dir` — `NYX_REPRO_BASE` is fed an /// in-test-tempdir whose path is stable across the loop, but the /// hashed sub-directory name folds in any per-run randomness; strip /// defensively. #[cfg(target_os = "macos")] fn strip_volatile_recursive(value: &mut Value) { const VOLATILE_KEYS: &[&str] = &[ "captured_at_ns", "ts", "duration_ms", "repro_bundle", "bundle_dir", ]; match value { Value::Object(map) => { for key in VOLATILE_KEYS { map.remove(*key); } for (_, v) in map.iter_mut() { strip_volatile_recursive(v); } } Value::Array(arr) => { for v in arr.iter_mut() { strip_volatile_recursive(v); } } _ => {} } } /// Confirmed-path determinism: drive the verifier through a real /// payload run (macOS process backend + sandbox-exec wrap + python3 /// harness) `RUN_COUNT_CONFIRMED` times and assert byte-identical /// `VerifyResult` once volatile timing fields are stripped. /// /// Mirrors [`ten_runs_produce_byte_identical_telemetry_minus_timestamps`] /// (the deny-path determinism contract) but exercises the build → /// sandbox → probe pipeline instead of the policy-deny short-circuit. /// Closes the determinism audit's "complete coverage needs an end-to-end /// Confirmed run" gap. /// /// macOS-only: the Linux process backend needs `cc -static` + libc.a to /// drive the C fixture through chroot, and `cc -static` is unsupported /// by the Darwin clang shipped with Xcode. The Linux row's analogue /// lands when the Phase 17 follow-up's `bind_mount_host_libs` opt-in /// wiring (see `deferred.md`) lets the python harness survive chroot. /// /// `RUN_COUNT_CONFIRMED = 3` keeps the test cost bounded (~6s per run /// on a warm cache → ~20s total) while still gating against single-run /// hash collisions that would flake at N=2. Bumping to N=10 (matching /// the deny-path test) is a wall-clock decision, not a coverage one. #[cfg(all(feature = "dynamic", target_os = "macos"))] #[test] fn confirmed_run_is_byte_identical_across_runs() { use nyx_scanner::evidence::{FlowStep, FlowStepKind}; use nyx_scanner::labels::Cap; use nyx_scanner::utils::config::Config; use std::path::PathBuf; let _guard = lock_telemetry(); const RUN_COUNT_CONFIRMED: usize = 3; // Pre-flight skips: the macOS process backend needs the sandbox-exec // wrap binary + a working python3 to drive the cmdi_positive fixture. if !std::path::Path::new("/usr/bin/sandbox-exec").exists() { eprintln!("SKIP: /usr/bin/sandbox-exec missing — cannot exercise process-backend wrap"); return; } if !std::process::Command::new("/usr/bin/python3") .arg("--version") .output() .map(|o| o.status.success()) .unwrap_or(false) { eprintln!("SKIP: /usr/bin/python3 missing — cannot run python harness"); return; } let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) .join("tests/dynamic_fixtures/python/cmdi_positive.py"); let tmp = tempfile::TempDir::new().expect("create tempdir"); let dst = tmp.path().join("cmdi_positive.py"); std::fs::copy(&fixture_src, &dst).expect("stage fixture into tempdir"); // Pin the repro bundle + telemetry log to in-test tempdir paths so // every run reads + writes the same absolute paths (the per-run path // would otherwise leak into VerifyResult and break determinism). unsafe { std::env::set_var("NYX_REPRO_BASE", tmp.path().join("repro").to_str().unwrap()); std::env::set_var( "NYX_TELEMETRY_PATH", tmp.path().join("events.jsonl").to_str().unwrap(), ); std::env::remove_var("NYX_NO_TELEMETRY"); } let path_str = dst.to_string_lossy().into_owned(); let evidence = Evidence { flow_steps: vec![ FlowStep { step: 1, kind: FlowStepKind::Source, file: path_str.clone(), line: 1, col: 0, snippet: None, variable: Some("host".into()), callee: None, function: Some("run_ping".into()), is_cross_file: false, }, FlowStep { step: 2, kind: FlowStepKind::Sink, file: path_str.clone(), line: 13, col: 4, snippet: None, variable: None, callee: None, function: None, is_cross_file: false, }, ], sink_caps: Cap::CODE_EXEC.bits(), ..Default::default() }; let diag = Diag { path: path_str, line: 13, col: 0, severity: Severity::High, id: "taint-unsanitised-flow".into(), category: FindingCategory::Security, path_validated: false, guard_kind: None, message: None, labels: vec![], confidence: Some(Confidence::High), evidence: Some(evidence), rank_score: None, rank_reason: None, suppressed: false, suppression: None, triage_state: "open".to_string(), triage_note: String::new(), rollup: None, finding_id: String::new(), alternative_finding_ids: vec![], stable_hash: 0xdec0_de00_dec0_de00, }; let mut config = Config::default(); config.scanner.harden_profile = "strict".to_owned(); // Force the process backend: Auto would route python to docker on // CI hosts where docker is reachable, and docker ignores the // hardening profile. Pinning to `process` exercises the sandbox- // exec wrap on every run, which is the surface the determinism // contract covers. config.scanner.verify_backend = "process".to_owned(); let mut opts = VerifyOptions::from_config(&config); opts.telemetry_policy = SamplingPolicy::keep_all(); opts.trace_verbose = false; let first = verify_finding(&diag, &opts); if first.status != VerifyStatus::Confirmed { eprintln!( "SKIP: cmdi_positive.py under --harden=strict did not confirm in this environment \ (status={:?}, detail={:?})", first.status, first.detail, ); unsafe { std::env::remove_var("NYX_REPRO_BASE"); std::env::remove_var("NYX_TELEMETRY_PATH"); } return; } let mut stripped: BTreeSet = BTreeSet::new(); for (i, result) in std::iter::once(first) .chain((1..RUN_COUNT_CONFIRMED).map(|_| verify_finding(&diag, &opts))) .enumerate() { assert_eq!( result.status, VerifyStatus::Confirmed, "run {i}: cmdi_positive.py under --harden=strict must Confirm — got {:?} (detail={:?})", result.status, result.detail, ); let mut json: Value = serde_json::from_str(&serde_json::to_string(&result).expect("VerifyResult serialises")) .expect("re-parse"); strip_volatile_recursive(&mut json); stripped.insert(json.to_string()); } assert_eq!( stripped.len(), 1, "VerifyResult must be byte-identical across {RUN_COUNT_CONFIRMED} runs once volatile \ timing fields are stripped; got {} distinct values: {:?}", stripped.len(), stripped, ); unsafe { std::env::remove_var("NYX_REPRO_BASE"); std::env::remove_var("NYX_TELEMETRY_PATH"); } } #[test] fn policy_deny_excerpt_is_stable_across_runs() { let _guard = lock_telemetry(); // The PolicyDeniedDynamic verdict carries an excerpt scrubbed via // the blake3-keyed `Scrubber`. blake3 is deterministic, so the // excerpt should be byte-identical across runs. Independent // assertion from the telemetry-determinism test because the // scrubber-hash path is a separate determinism contract worth // pinning on its own. let diag = deny_diag(0xfeed_face_0123_4567); let opts = VerifyOptions::default(); let mut excerpts: BTreeSet = BTreeSet::new(); for _ in 0..RUN_COUNT { let result = verify_finding(&diag, &opts); match result .inconclusive_reason .expect("expected PolicyDeniedDynamic on deny path") { nyx_scanner::evidence::InconclusiveReason::PolicyDeniedDynamic { excerpt, .. } => { excerpts.insert(excerpt); } other => panic!("expected PolicyDeniedDynamic, got {other:?}"), } } assert_eq!( excerpts.len(), 1, "scrubbed excerpt must be deterministic across {RUN_COUNT} runs, got {excerpts:?}" ); }