[pitboss] sweep after phase 04: 2 deferred items resolved

2026-07-24 21:41:02 +02:00 · 2026-05-17 19:40:29 -05:00 · 2026-05-17 19:40:29 -05:00 · 637b733928
commit 637b733928
parent 8583b29796
5 changed files with 105 additions and 21 deletions
--- a/src/dynamic/repro.rs
+++ b/src/dynamic/repro.rs
@ -655,6 +655,18 @@ fn create_symlink(_target: &Path, _link: &Path) -> std::io::Result<()> {

 #[cfg(test)]
 mod tests {
+    /// Process-global `NYX_REPRO_BASE` is mutated by several tests in
+    /// this module; without serialisation a parallel `cargo test`
+    /// invocation races on the global state and produces flakes that
+    /// vanish under `--test-threads=1`.  Every env-mutating test
+    /// acquires this guard for the duration of its body.
+    /// `unwrap_or_else(into_inner)` recovers from poisoning so a
+    /// failing test does not cascade-fail every later test.
+    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
+        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+        LOCK.lock().unwrap_or_else(|e| e.into_inner())
+    }
+
    use super::*;
    use crate::dynamic::sandbox::SandboxBackend;
    use crate::dynamic::spec::{EntryKind, PayloadSlot};
@ -722,6 +734,7 @@ mod tests {

    #[test]
    fn write_creates_expected_layout() {
+        let _env_guard = env_lock();
        let dir = TempDir::new().unwrap();
        unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };

@ -759,6 +772,7 @@ mod tests {

    #[test]
    fn toolchain_lock_records_expected_toolchain_and_hashes() {
+        let _env_guard = env_lock();
        let dir = TempDir::new().unwrap();
        unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
        let spec = make_spec();
@ -831,6 +845,7 @@ mod tests {

    #[test]
    fn reproduce_sh_contains_toolchain_check_and_exit_codes() {
+        let _env_guard = env_lock();
        let dir = TempDir::new().unwrap();
        unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
        let artifact = write(
@ -925,6 +940,7 @@ mod tests {

    #[test]
    fn bundle_root_for_honours_test_override() {
+        let _env_guard = env_lock();
        let dir = TempDir::new().unwrap();
        unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
        let root = bundle_root_for("cafe0001").unwrap();
@ -934,6 +950,7 @@ mod tests {

    #[test]
    fn bundle_root_for_matches_write_output_under_override() {
+        let _env_guard = env_lock();
        // The path returned by `bundle_root_for` must equal the bundle path
        // that `write` produces — replay callers locate the bundle without
        // re-creating directories, so a drift between the two helpers would
@ -955,6 +972,7 @@ mod tests {

    #[test]
    fn outcome_json_redacts_secrets() {
+        let _env_guard = env_lock();
        let dir = TempDir::new().unwrap();
        unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };

--- a/src/dynamic/sandbox/process_macos.rs
+++ b/src/dynamic/sandbox/process_macos.rs
@ -431,6 +431,19 @@ pub fn wrap_plan(input: &WrapInput<'_>) -> WrapResult {
 mod tests {
    use super::*;

+    /// Process-global env vars (`NYX_SANDBOX_EXEC_BIN`,
+    /// `NYX_SB_DENY_DEFAULT`, `NYX_SB_SEED_DIR`) are mutated by several
+    /// tests in this module; without serialisation a parallel
+    /// `cargo test` invocation races on the global state and produces
+    /// flakes that vanish under `--test-threads=1`.  Every env-mutating
+    /// test acquires this guard for the duration of its body.
+    /// `unwrap_or_else(into_inner)` recovers from poisoning so a
+    /// failing test does not cascade-fail every later test.
+    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
+        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+        LOCK.lock().unwrap_or_else(|e| e.into_inner())
+    }
+
    #[test]
    fn profile_for_caps_prefers_file_io() {
        const FILE_IO: u32 = 1 << 5;
@ -534,6 +547,7 @@ mod tests {

    #[test]
    fn sandbox_exec_bin_honours_env_override() {
+        let _env_guard = env_lock();
        // SAFETY: tests are run serially with the macOS hardening suite;
        // resetting the env var below restores the default for subsequent
        // tests in the same process.
@ -590,6 +604,7 @@ mod tests {

    #[test]
    fn deny_default_seed_for_returns_none_without_env_opt_in() {
+        let _env_guard = env_lock();
        // SAFETY: tests in this module mutate process-global env; the
        // macOS hardening integration suite serialises around the same
        // env vars so cargo nextest's per-test process isolation does not
@ -601,6 +616,7 @@ mod tests {

    #[test]
    fn deny_default_seed_for_returns_some_when_env_set_and_seed_present() {
+        let _env_guard = env_lock();
        let tmp = std::env::temp_dir().join("nyx-sb-seed-test");
        let _ = std::fs::remove_dir_all(&tmp);
        std::fs::create_dir_all(&tmp).expect("create seed tempdir");
@ -626,6 +642,7 @@ mod tests {

    #[test]
    fn wrap_plan_returns_none_when_sandbox_exec_missing() {
+        let _env_guard = env_lock();
        unsafe { std::env::set_var(SANDBOX_EXEC_BIN_ENV, "/nonexistent/sandbox-exec") };
        let input = WrapInput {
            cmd_path: Path::new("/usr/bin/true"),
@ -643,6 +660,7 @@ mod tests {
    #[test]
    #[cfg(target_os = "macos")]
    fn wrap_plan_returns_sandboxed_when_sandbox_exec_present() {
+        let _env_guard = env_lock();
        // Skip when the host doesn't actually have /usr/bin/sandbox-exec
        // (e.g. someone reading SANDBOX_EXEC_BIN_ENV from a parent shell).
        unsafe { std::env::remove_var(SANDBOX_EXEC_BIN_ENV) };
--- a/src/dynamic/verify.rs
+++ b/src/dynamic/verify.rs
@ -1264,6 +1264,19 @@ fn build_verdict(
 mod tests {
    use super::*;

+    /// Process-global env vars (`NYX_VERIFY_REPLAY_STABLE`,
+    /// `NYX_VERIFY_REPLAY_DOCKER`) are mutated by several tests in this
+    /// module; without serialisation a parallel `cargo test` invocation
+    /// races on the global state and produces flakes that vanish under
+    /// `--test-threads=1`.  Every env-mutating test acquires this guard
+    /// for the duration of its body.  `unwrap_or_else(into_inner)`
+    /// recovers from poisoning so a failing test does not cascade-fail
+    /// every later test in the suite.
+    fn env_lock() -> std::sync::MutexGuard<'static, ()> {
+        static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+        LOCK.lock().unwrap_or_else(|e| e.into_inner())
+    }
+
    #[test]
    fn compute_entry_content_hash_stable_for_same_file() {
        let dir = tempfile::TempDir::new().unwrap();
@ -1300,6 +1313,7 @@ mod tests {

    #[test]
    fn from_config_defaults_replay_stable_check_off() {
+        let _env_guard = env_lock();
        // Make sure the test is hermetic — `from_config` reads the env
        // var, so a stale process-wide setting could mask the default.
        unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_STABLE") };
@ -1313,6 +1327,7 @@ mod tests {

    #[test]
    fn from_config_picks_up_replay_stable_env_flag() {
+        let _env_guard = env_lock();
        unsafe { std::env::set_var("NYX_VERIFY_REPLAY_STABLE", "1") };
        let opts = VerifyOptions::from_config(&Config::default());
        assert!(opts.replay_stable_check);
@ -1327,6 +1342,7 @@ mod tests {

    #[test]
    fn from_config_defaults_replay_use_docker_off() {
+        let _env_guard = env_lock();
        // Same hermeticity concern as `replay_stable_check`: clear any
        // stale process-wide setting so the default is observable.
        unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_DOCKER") };
@ -1340,6 +1356,7 @@ mod tests {

    #[test]
    fn from_config_picks_up_replay_docker_env_flag() {
+        let _env_guard = env_lock();
        unsafe { std::env::set_var("NYX_VERIFY_REPLAY_DOCKER", "1") };
        let opts = VerifyOptions::from_config(&Config::default());
        assert!(opts.replay_use_docker);
--- a/tests/deserialize_corpus.rs
+++ b/tests/deserialize_corpus.rs
@ -131,15 +131,36 @@ fn probe_kind_deserialize_serdes() {

 #[test]
 fn lang_emitter_dispatches_to_deserialize_harness() {
-    for (lang, entry_file, entry_name, marker) in [
-        (Lang::Java, "tests/dynamic_fixtures/deserialize/java/vuln.java",
-            "run", "RestrictedObjectInputStream"),
-        (Lang::Python, "tests/dynamic_fixtures/deserialize/python/vuln.py",
-            "run", "RestrictedUnpickler"),
-        (Lang::Php, "tests/dynamic_fixtures/deserialize/php/vuln.php",
-            "run", "allowed_classes"),
-        (Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
-            "run", "Marshal.load"),
+    // `sink_callee_marker` is the per-language deserialize sink call
+    // string the harness writes into the JSON probe record — the
+    // resolveClass / find_class / unserialize / Marshal.load boundary
+    // the brief calls out.  Pinning the marker here keeps the test
+    // honest about which guard each lang's harness names.
+    for (lang, entry_file, entry_name, sink_callee_marker) in [
+        (
+            Lang::Java,
+            "tests/dynamic_fixtures/deserialize/java/vuln.java",
+            "run",
+            "ObjectInputStream.resolveClass",
+        ),
+        (
+            Lang::Python,
+            "tests/dynamic_fixtures/deserialize/python/vuln.py",
+            "run",
+            "pickle.Unpickler.find_class",
+        ),
+        (
+            Lang::Php,
+            "tests/dynamic_fixtures/deserialize/php/vuln.php",
+            "run",
+            "unserialize",
+        ),
+        (
+            Lang::Ruby,
+            "tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
+            "run",
+            "Marshal.load",
+        ),
    ] {
        let spec = make_spec(lang, entry_file, entry_name);
        let harness = lang::emit(&spec)
@ -148,12 +169,11 @@ fn lang_emitter_dispatches_to_deserialize_harness() {
            harness.source.contains("NYX_GADGET_CLASS:"),
            "{lang:?} deserialize harness must parse NYX_GADGET_CLASS marker",
        );
-        // Each lang's harness either splices the relevant guard
-        // construct directly or names the equivalent constant.  The
-        // assertions below pin only the parts the harness emitter
-        // generates (not the fixture), so the test stays green even
-        // when the fixture moves.
-        let _ = marker; // marker validated by inspecting the fixture, not the harness.
+        assert!(
+            harness.source.contains(sink_callee_marker),
+            "{lang:?} deserialize harness must name {sink_callee_marker:?} as the \
+             resolveClass / find_class equivalent sink callee",
+        );
    }
 }

--- a/tests/hostile_input_tests.rs
+++ b/tests/hostile_input_tests.rs
@ -229,12 +229,17 @@ fn binary_null_heavy_input_is_skipped() {

 /// Invalid UTF-8 in a recognised source extension must not panic.
 /// tree-sitter can operate on raw bytes; we just check that it survives.
+/// Budget widened from 2 s to 10 s after the pitboss parallel `cargo test`
+/// invocation surfaced ~2.8 s wall time under shared-runner CPU pressure
+/// even though the isolated test runs well under 100 ms.  The point is
+/// to catch a runaway, not to benchmark, so 10 s leaves clear headroom
+/// without masking a real regression.
 #[test]
 fn invalid_utf8_does_not_panic() {
    let bytes = b"\xff\xfe\xfd\xfc\n\xde\xad\xbe\xef\n// trailing\n".to_vec();
    let path = Path::new("junk.rs");
    let cfg = hostile_cfg();
-    let _ = with_time_budget(Duration::from_secs(2), "invalid utf8", || {
+    let _ = with_time_budget(Duration::from_secs(10), "invalid utf8", || {
        run_rules_on_bytes(&bytes, path, &cfg, None, None).expect("invalid UTF-8 should not error")
    });
 }
@ -260,10 +265,13 @@ fn empty_file_is_noop() {
 /// right-associative expression, the latter is a separate stress case
 /// dominated by recursive descent and not representative of real input.
 ///
-/// Generous debug-build budget (20 s) because the full analysis pipeline
+/// Generous debug-build budget (40 s) because the full analysis pipeline
 /// runs on every statement; release builds are an order of magnitude
 /// faster.  The point is to guard against regressions that are
-/// super-linear in statement count, not to benchmark.
+/// super-linear in statement count, not to benchmark.  Budget widened
+/// from 20 s after the pitboss parallel `cargo test` invocation surfaced
+/// 24-25 s wall time under shared-runner CPU pressure even though the
+/// isolated test runs in ~3.7 s.
 #[test]
 fn very_long_single_line_parses() {
    run_on_prod_stack(|| {
@ -275,7 +283,7 @@ fn very_long_single_line_parses() {

        let path = Path::new("long_line.js");
        let cfg = hostile_cfg();
-        let _ = with_time_budget(Duration::from_secs(20), "long line parse", || {
+        let _ = with_time_budget(Duration::from_secs(40), "long line parse", || {
            run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None)
                .expect("long-line file should parse")
        });
@ -348,7 +356,10 @@ fn deeply_nested_if_statements_do_not_stack_overflow() {

 /// Lots of small functions in one file stresses the pass-1/pass-2 bookkeeping
 /// (summary extraction, callgraph build).  2 000 functions is cheap but
-/// plausible for generated code.
+/// plausible for generated code.  Budget widened from 15 s after the
+/// pitboss parallel `cargo test` invocation surfaced 15.3 s under
+/// shared-runner CPU pressure even though the isolated test runs in
+/// ~3.7 s.
 #[test]
 fn many_small_functions_do_not_explode() {
    let mut s = String::with_capacity(2000 * 32);
@ -358,7 +369,7 @@ fn many_small_functions_do_not_explode() {

    let path = Path::new("many_funcs.js");
    let cfg = hostile_cfg();
-    let _ = with_time_budget(Duration::from_secs(15), "many-funcs scan", || {
+    let _ = with_time_budget(Duration::from_secs(30), "many-funcs scan", || {
        run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None)
            .expect("many-functions file should scan")
    });