From 637b73392870e7f59d399b28c305946309680fdd Mon Sep 17 00:00:00 2001
From: pitboss <pitboss@local>
Date: Sun, 17 May 2026 19:40:29 -0500
Subject: [PATCH] [pitboss] sweep after phase 04: 2 deferred items resolved

---
 src/dynamic/repro.rs                 | 18 ++++++++++
 src/dynamic/sandbox/process_macos.rs | 18 ++++++++++
 src/dynamic/verify.rs                | 17 ++++++++++
 tests/deserialize_corpus.rs          | 50 +++++++++++++++++++---------
 tests/hostile_input_tests.rs         | 23 +++++++++----
 5 files changed, 105 insertions(+), 21 deletions(-)

diff --git a/src/dynamic/repro.rs b/src/dynamic/repro.rs
index 8cda6c5c..0643848c 100644
--- a/src/dynamic/repro.rs
+++ b/src/dynamic/repro.rs
@@ -655,6 +655,18 @@ fn create_symlink(_target: &Path, _link: &Path) -> std::io::Result<()> {
 
 #[cfg(test)]
 mod tests {
+    /// Process-global `NYX_REPRO_BASE` is mutated by several tests in
+    /// this module; without serialisation a parallel `cargo test`
+    /// invocation races on the global state and produces flakes that
+    /// vanish under `--test-threads=1`.  Every env-mutating test
+    /// acquires this guard for the duration of its body.
+    /// `unwrap_or_else(into_inner)` recovers from poisoning so a
+    /// failing test does not cascade-fail every later test.
+    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
+        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+        LOCK.lock().unwrap_or_else(|e| e.into_inner())
+    }
+
     use super::*;
     use crate::dynamic::sandbox::SandboxBackend;
     use crate::dynamic::spec::{EntryKind, PayloadSlot};
@@ -722,6 +734,7 @@ mod tests {
 
     #[test]
     fn write_creates_expected_layout() {
+        let _env_guard = env_lock();
         let dir = TempDir::new().unwrap();
         unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
 
@@ -759,6 +772,7 @@ mod tests {
 
     #[test]
     fn toolchain_lock_records_expected_toolchain_and_hashes() {
+        let _env_guard = env_lock();
         let dir = TempDir::new().unwrap();
         unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
         let spec = make_spec();
@@ -831,6 +845,7 @@ mod tests {
 
     #[test]
     fn reproduce_sh_contains_toolchain_check_and_exit_codes() {
+        let _env_guard = env_lock();
         let dir = TempDir::new().unwrap();
         unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
         let artifact = write(
@@ -925,6 +940,7 @@ mod tests {
 
     #[test]
     fn bundle_root_for_honours_test_override() {
+        let _env_guard = env_lock();
         let dir = TempDir::new().unwrap();
         unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
         let root = bundle_root_for("cafe0001").unwrap();
@@ -934,6 +950,7 @@ mod tests {
 
     #[test]
     fn bundle_root_for_matches_write_output_under_override() {
+        let _env_guard = env_lock();
         // The path returned by `bundle_root_for` must equal the bundle path
         // that `write` produces — replay callers locate the bundle without
         // re-creating directories, so a drift between the two helpers would
@@ -955,6 +972,7 @@ mod tests {
 
     #[test]
     fn outcome_json_redacts_secrets() {
+        let _env_guard = env_lock();
         let dir = TempDir::new().unwrap();
         unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
 
diff --git a/src/dynamic/sandbox/process_macos.rs b/src/dynamic/sandbox/process_macos.rs
index afa98aab..9f544011 100644
--- a/src/dynamic/sandbox/process_macos.rs
+++ b/src/dynamic/sandbox/process_macos.rs
@@ -431,6 +431,19 @@ pub fn wrap_plan(input: &WrapInput<'_>) -> WrapResult {
 mod tests {
     use super::*;
 
+    /// Process-global env vars (`NYX_SANDBOX_EXEC_BIN`,
+    /// `NYX_SB_DENY_DEFAULT`, `NYX_SB_SEED_DIR`) are mutated by several
+    /// tests in this module; without serialisation a parallel
+    /// `cargo test` invocation races on the global state and produces
+    /// flakes that vanish under `--test-threads=1`.  Every env-mutating
+    /// test acquires this guard for the duration of its body.
+    /// `unwrap_or_else(into_inner)` recovers from poisoning so a
+    /// failing test does not cascade-fail every later test.
+    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
+        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+        LOCK.lock().unwrap_or_else(|e| e.into_inner())
+    }
+
     #[test]
     fn profile_for_caps_prefers_file_io() {
         const FILE_IO: u32 = 1 << 5;
@@ -534,6 +547,7 @@ mod tests {
 
     #[test]
     fn sandbox_exec_bin_honours_env_override() {
+        let _env_guard = env_lock();
         // SAFETY: tests are run serially with the macOS hardening suite;
         // resetting the env var below restores the default for subsequent
         // tests in the same process.
@@ -590,6 +604,7 @@ mod tests {
 
     #[test]
     fn deny_default_seed_for_returns_none_without_env_opt_in() {
+        let _env_guard = env_lock();
         // SAFETY: tests in this module mutate process-global env; the
         // macOS hardening integration suite serialises around the same
         // env vars so cargo nextest's per-test process isolation does not
@@ -601,6 +616,7 @@ mod tests {
 
     #[test]
     fn deny_default_seed_for_returns_some_when_env_set_and_seed_present() {
+        let _env_guard = env_lock();
         let tmp = std::env::temp_dir().join("nyx-sb-seed-test");
         let _ = std::fs::remove_dir_all(&tmp);
         std::fs::create_dir_all(&tmp).expect("create seed tempdir");
@@ -626,6 +642,7 @@ mod tests {
 
     #[test]
     fn wrap_plan_returns_none_when_sandbox_exec_missing() {
+        let _env_guard = env_lock();
         unsafe { std::env::set_var(SANDBOX_EXEC_BIN_ENV, "/nonexistent/sandbox-exec") };
         let input = WrapInput {
             cmd_path: Path::new("/usr/bin/true"),
@@ -643,6 +660,7 @@ mod tests {
     #[test]
     #[cfg(target_os = "macos")]
     fn wrap_plan_returns_sandboxed_when_sandbox_exec_present() {
+        let _env_guard = env_lock();
         // Skip when the host doesn't actually have /usr/bin/sandbox-exec
         // (e.g. someone reading SANDBOX_EXEC_BIN_ENV from a parent shell).
         unsafe { std::env::remove_var(SANDBOX_EXEC_BIN_ENV) };
diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs
index 5c4bf934..ff77d337 100644
--- a/src/dynamic/verify.rs
+++ b/src/dynamic/verify.rs
@@ -1264,6 +1264,19 @@ fn build_verdict(
 mod tests {
     use super::*;
 
+    /// Process-global env vars (`NYX_VERIFY_REPLAY_STABLE`,
+    /// `NYX_VERIFY_REPLAY_DOCKER`) are mutated by several tests in this
+    /// module; without serialisation a parallel `cargo test` invocation
+    /// races on the global state and produces flakes that vanish under
+    /// `--test-threads=1`.  Every env-mutating test acquires this guard
+    /// for the duration of its body.  `unwrap_or_else(into_inner)`
+    /// recovers from poisoning so a failing test does not cascade-fail
+    /// every later test in the suite.
+    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
+        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+        LOCK.lock().unwrap_or_else(|e| e.into_inner())
+    }
+
     #[test]
     fn compute_entry_content_hash_stable_for_same_file() {
         let dir = tempfile::TempDir::new().unwrap();
@@ -1300,6 +1313,7 @@ mod tests {
 
     #[test]
     fn from_config_defaults_replay_stable_check_off() {
+        let _env_guard = env_lock();
         // Make sure the test is hermetic — `from_config` reads the env
         // var, so a stale process-wide setting could mask the default.
         unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_STABLE") };
@@ -1313,6 +1327,7 @@ mod tests {
 
     #[test]
     fn from_config_picks_up_replay_stable_env_flag() {
+        let _env_guard = env_lock();
         unsafe { std::env::set_var("NYX_VERIFY_REPLAY_STABLE", "1") };
         let opts = VerifyOptions::from_config(&Config::default());
         assert!(opts.replay_stable_check);
@@ -1327,6 +1342,7 @@ mod tests {
 
     #[test]
     fn from_config_defaults_replay_use_docker_off() {
+        let _env_guard = env_lock();
         // Same hermeticity concern as `replay_stable_check`: clear any
         // stale process-wide setting so the default is observable.
         unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_DOCKER") };
@@ -1340,6 +1356,7 @@ mod tests {
 
     #[test]
     fn from_config_picks_up_replay_docker_env_flag() {
+        let _env_guard = env_lock();
         unsafe { std::env::set_var("NYX_VERIFY_REPLAY_DOCKER", "1") };
         let opts = VerifyOptions::from_config(&Config::default());
         assert!(opts.replay_use_docker);
diff --git a/tests/deserialize_corpus.rs b/tests/deserialize_corpus.rs
index 78a753b6..d83e7116 100644
--- a/tests/deserialize_corpus.rs
+++ b/tests/deserialize_corpus.rs
@@ -131,15 +131,36 @@ fn probe_kind_deserialize_serdes() {
 
 #[test]
 fn lang_emitter_dispatches_to_deserialize_harness() {
-    for (lang, entry_file, entry_name, marker) in [
-        (Lang::Java, "tests/dynamic_fixtures/deserialize/java/vuln.java",
-            "run", "RestrictedObjectInputStream"),
-        (Lang::Python, "tests/dynamic_fixtures/deserialize/python/vuln.py",
-            "run", "RestrictedUnpickler"),
-        (Lang::Php, "tests/dynamic_fixtures/deserialize/php/vuln.php",
-            "run", "allowed_classes"),
-        (Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
-            "run", "Marshal.load"),
+    // `sink_callee_marker` is the per-language deserialize sink call
+    // string the harness writes into the JSON probe record — the
+    // resolveClass / find_class / unserialize / Marshal.load boundary
+    // the brief calls out.  Pinning the marker here keeps the test
+    // honest about which guard each lang's harness names.
+    for (lang, entry_file, entry_name, sink_callee_marker) in [
+        (
+            Lang::Java,
+            "tests/dynamic_fixtures/deserialize/java/vuln.java",
+            "run",
+            "ObjectInputStream.resolveClass",
+        ),
+        (
+            Lang::Python,
+            "tests/dynamic_fixtures/deserialize/python/vuln.py",
+            "run",
+            "pickle.Unpickler.find_class",
+        ),
+        (
+            Lang::Php,
+            "tests/dynamic_fixtures/deserialize/php/vuln.php",
+            "run",
+            "unserialize",
+        ),
+        (
+            Lang::Ruby,
+            "tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
+            "run",
+            "Marshal.load",
+        ),
     ] {
         let spec = make_spec(lang, entry_file, entry_name);
         let harness = lang::emit(&spec)
@@ -148,12 +169,11 @@ fn lang_emitter_dispatches_to_deserialize_harness() {
             harness.source.contains("NYX_GADGET_CLASS:"),
             "{lang:?} deserialize harness must parse NYX_GADGET_CLASS marker",
         );
-        // Each lang's harness either splices the relevant guard
-        // construct directly or names the equivalent constant.  The
-        // assertions below pin only the parts the harness emitter
-        // generates (not the fixture), so the test stays green even
-        // when the fixture moves.
-        let _ = marker; // marker validated by inspecting the fixture, not the harness.
+        assert!(
+            harness.source.contains(sink_callee_marker),
+            "{lang:?} deserialize harness must name {sink_callee_marker:?} as the \
+             resolveClass / find_class equivalent sink callee",
+        );
     }
 }
 
diff --git a/tests/hostile_input_tests.rs b/tests/hostile_input_tests.rs
index 427d38c4..7dfcd70b 100644
--- a/tests/hostile_input_tests.rs
+++ b/tests/hostile_input_tests.rs
@@ -229,12 +229,17 @@ fn binary_null_heavy_input_is_skipped() {
 
 /// Invalid UTF-8 in a recognised source extension must not panic.
 /// tree-sitter can operate on raw bytes; we just check that it survives.
+/// Budget widened from 2 s to 10 s after the pitboss parallel `cargo test`
+/// invocation surfaced ~2.8 s wall time under shared-runner CPU pressure
+/// even though the isolated test runs well under 100 ms.  The point is
+/// to catch a runaway, not to benchmark, so 10 s leaves clear headroom
+/// without masking a real regression.
 #[test]
 fn invalid_utf8_does_not_panic() {
     let bytes = b"\xff\xfe\xfd\xfc\n\xde\xad\xbe\xef\n// trailing\n".to_vec();
     let path = Path::new("junk.rs");
     let cfg = hostile_cfg();
-    let _ = with_time_budget(Duration::from_secs(2), "invalid utf8", || {
+    let _ = with_time_budget(Duration::from_secs(10), "invalid utf8", || {
         run_rules_on_bytes(&bytes, path, &cfg, None, None).expect("invalid UTF-8 should not error")
     });
 }
@@ -260,10 +265,13 @@ fn empty_file_is_noop() {
 /// right-associative expression, the latter is a separate stress case
 /// dominated by recursive descent and not representative of real input.
 ///
-/// Generous debug-build budget (20 s) because the full analysis pipeline
+/// Generous debug-build budget (40 s) because the full analysis pipeline
 /// runs on every statement; release builds are an order of magnitude
 /// faster.  The point is to guard against regressions that are
-/// super-linear in statement count, not to benchmark.
+/// super-linear in statement count, not to benchmark.  Budget widened
+/// from 20 s after the pitboss parallel `cargo test` invocation surfaced
+/// 24-25 s wall time under shared-runner CPU pressure even though the
+/// isolated test runs in ~3.7 s.
 #[test]
 fn very_long_single_line_parses() {
     run_on_prod_stack(|| {
@@ -275,7 +283,7 @@ fn very_long_single_line_parses() {
 
         let path = Path::new("long_line.js");
         let cfg = hostile_cfg();
-        let _ = with_time_budget(Duration::from_secs(20), "long line parse", || {
+        let _ = with_time_budget(Duration::from_secs(40), "long line parse", || {
             run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None)
                 .expect("long-line file should parse")
         });
@@ -348,7 +356,10 @@ fn deeply_nested_if_statements_do_not_stack_overflow() {
 
 /// Lots of small functions in one file stresses the pass-1/pass-2 bookkeeping
 /// (summary extraction, callgraph build).  2 000 functions is cheap but
-/// plausible for generated code.
+/// plausible for generated code.  Budget widened from 15 s after the
+/// pitboss parallel `cargo test` invocation surfaced 15.3 s under
+/// shared-runner CPU pressure even though the isolated test runs in
+/// ~3.7 s.
 #[test]
 fn many_small_functions_do_not_explode() {
     let mut s = String::with_capacity(2000 * 32);
@@ -358,7 +369,7 @@ fn many_small_functions_do_not_explode() {
 
     let path = Path::new("many_funcs.js");
     let cfg = hostile_cfg();
-    let _ = with_time_budget(Duration::from_secs(15), "many-funcs scan", || {
+    let _ = with_time_budget(Duration::from_secs(30), "many-funcs scan", || {
         run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None)
             .expect("many-functions file should scan")
     });