From 637b73392870e7f59d399b28c305946309680fdd Mon Sep 17 00:00:00 2001 From: pitboss Date: Sun, 17 May 2026 19:40:29 -0500 Subject: [PATCH] [pitboss] sweep after phase 04: 2 deferred items resolved --- src/dynamic/repro.rs | 18 ++++++++++ src/dynamic/sandbox/process_macos.rs | 18 ++++++++++ src/dynamic/verify.rs | 17 ++++++++++ tests/deserialize_corpus.rs | 50 +++++++++++++++++++--------- tests/hostile_input_tests.rs | 23 +++++++++---- 5 files changed, 105 insertions(+), 21 deletions(-) diff --git a/src/dynamic/repro.rs b/src/dynamic/repro.rs index 8cda6c5c..0643848c 100644 --- a/src/dynamic/repro.rs +++ b/src/dynamic/repro.rs @@ -655,6 +655,18 @@ fn create_symlink(_target: &Path, _link: &Path) -> std::io::Result<()> { #[cfg(test)] mod tests { + /// Process-global `NYX_REPRO_BASE` is mutated by several tests in + /// this module; without serialisation a parallel `cargo test` + /// invocation races on the global state and produces flakes that + /// vanish under `--test-threads=1`. Every env-mutating test + /// acquires this guard for the duration of its body. + /// `unwrap_or_else(into_inner)` recovers from poisoning so a + /// failing test does not cascade-fail every later test. + fn env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + LOCK.lock().unwrap_or_else(|e| e.into_inner()) + } + use super::*; use crate::dynamic::sandbox::SandboxBackend; use crate::dynamic::spec::{EntryKind, PayloadSlot}; @@ -722,6 +734,7 @@ mod tests { #[test] fn write_creates_expected_layout() { + let _env_guard = env_lock(); let dir = TempDir::new().unwrap(); unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) }; @@ -759,6 +772,7 @@ mod tests { #[test] fn toolchain_lock_records_expected_toolchain_and_hashes() { + let _env_guard = env_lock(); let dir = TempDir::new().unwrap(); unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) }; let spec = make_spec(); @@ -831,6 +845,7 @@ mod tests { #[test] fn reproduce_sh_contains_toolchain_check_and_exit_codes() { + let _env_guard = env_lock(); let dir = TempDir::new().unwrap(); unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) }; let artifact = write( @@ -925,6 +940,7 @@ mod tests { #[test] fn bundle_root_for_honours_test_override() { + let _env_guard = env_lock(); let dir = TempDir::new().unwrap(); unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) }; let root = bundle_root_for("cafe0001").unwrap(); @@ -934,6 +950,7 @@ mod tests { #[test] fn bundle_root_for_matches_write_output_under_override() { + let _env_guard = env_lock(); // The path returned by `bundle_root_for` must equal the bundle path // that `write` produces — replay callers locate the bundle without // re-creating directories, so a drift between the two helpers would @@ -955,6 +972,7 @@ mod tests { #[test] fn outcome_json_redacts_secrets() { + let _env_guard = env_lock(); let dir = TempDir::new().unwrap(); unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) }; diff --git a/src/dynamic/sandbox/process_macos.rs b/src/dynamic/sandbox/process_macos.rs index afa98aab..9f544011 100644 --- a/src/dynamic/sandbox/process_macos.rs +++ b/src/dynamic/sandbox/process_macos.rs @@ -431,6 +431,19 @@ pub fn wrap_plan(input: &WrapInput<'_>) -> WrapResult { mod tests { use super::*; + /// Process-global env vars (`NYX_SANDBOX_EXEC_BIN`, + /// `NYX_SB_DENY_DEFAULT`, `NYX_SB_SEED_DIR`) are mutated by several + /// tests in this module; without serialisation a parallel + /// `cargo test` invocation races on the global state and produces + /// flakes that vanish under `--test-threads=1`. Every env-mutating + /// test acquires this guard for the duration of its body. + /// `unwrap_or_else(into_inner)` recovers from poisoning so a + /// failing test does not cascade-fail every later test. + fn env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + LOCK.lock().unwrap_or_else(|e| e.into_inner()) + } + #[test] fn profile_for_caps_prefers_file_io() { const FILE_IO: u32 = 1 << 5; @@ -534,6 +547,7 @@ mod tests { #[test] fn sandbox_exec_bin_honours_env_override() { + let _env_guard = env_lock(); // SAFETY: tests are run serially with the macOS hardening suite; // resetting the env var below restores the default for subsequent // tests in the same process. @@ -590,6 +604,7 @@ mod tests { #[test] fn deny_default_seed_for_returns_none_without_env_opt_in() { + let _env_guard = env_lock(); // SAFETY: tests in this module mutate process-global env; the // macOS hardening integration suite serialises around the same // env vars so cargo nextest's per-test process isolation does not @@ -601,6 +616,7 @@ mod tests { #[test] fn deny_default_seed_for_returns_some_when_env_set_and_seed_present() { + let _env_guard = env_lock(); let tmp = std::env::temp_dir().join("nyx-sb-seed-test"); let _ = std::fs::remove_dir_all(&tmp); std::fs::create_dir_all(&tmp).expect("create seed tempdir"); @@ -626,6 +642,7 @@ mod tests { #[test] fn wrap_plan_returns_none_when_sandbox_exec_missing() { + let _env_guard = env_lock(); unsafe { std::env::set_var(SANDBOX_EXEC_BIN_ENV, "/nonexistent/sandbox-exec") }; let input = WrapInput { cmd_path: Path::new("/usr/bin/true"), @@ -643,6 +660,7 @@ mod tests { #[test] #[cfg(target_os = "macos")] fn wrap_plan_returns_sandboxed_when_sandbox_exec_present() { + let _env_guard = env_lock(); // Skip when the host doesn't actually have /usr/bin/sandbox-exec // (e.g. someone reading SANDBOX_EXEC_BIN_ENV from a parent shell). unsafe { std::env::remove_var(SANDBOX_EXEC_BIN_ENV) }; diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index 5c4bf934..ff77d337 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -1264,6 +1264,19 @@ fn build_verdict( mod tests { use super::*; + /// Process-global env vars (`NYX_VERIFY_REPLAY_STABLE`, + /// `NYX_VERIFY_REPLAY_DOCKER`) are mutated by several tests in this + /// module; without serialisation a parallel `cargo test` invocation + /// races on the global state and produces flakes that vanish under + /// `--test-threads=1`. Every env-mutating test acquires this guard + /// for the duration of its body. `unwrap_or_else(into_inner)` + /// recovers from poisoning so a failing test does not cascade-fail + /// every later test in the suite. + fn env_lock() -> std::sync::MutexGuard<'static, ()> { + static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(()); + LOCK.lock().unwrap_or_else(|e| e.into_inner()) + } + #[test] fn compute_entry_content_hash_stable_for_same_file() { let dir = tempfile::TempDir::new().unwrap(); @@ -1300,6 +1313,7 @@ mod tests { #[test] fn from_config_defaults_replay_stable_check_off() { + let _env_guard = env_lock(); // Make sure the test is hermetic — `from_config` reads the env // var, so a stale process-wide setting could mask the default. unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_STABLE") }; @@ -1313,6 +1327,7 @@ mod tests { #[test] fn from_config_picks_up_replay_stable_env_flag() { + let _env_guard = env_lock(); unsafe { std::env::set_var("NYX_VERIFY_REPLAY_STABLE", "1") }; let opts = VerifyOptions::from_config(&Config::default()); assert!(opts.replay_stable_check); @@ -1327,6 +1342,7 @@ mod tests { #[test] fn from_config_defaults_replay_use_docker_off() { + let _env_guard = env_lock(); // Same hermeticity concern as `replay_stable_check`: clear any // stale process-wide setting so the default is observable. unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_DOCKER") }; @@ -1340,6 +1356,7 @@ mod tests { #[test] fn from_config_picks_up_replay_docker_env_flag() { + let _env_guard = env_lock(); unsafe { std::env::set_var("NYX_VERIFY_REPLAY_DOCKER", "1") }; let opts = VerifyOptions::from_config(&Config::default()); assert!(opts.replay_use_docker); diff --git a/tests/deserialize_corpus.rs b/tests/deserialize_corpus.rs index 78a753b6..d83e7116 100644 --- a/tests/deserialize_corpus.rs +++ b/tests/deserialize_corpus.rs @@ -131,15 +131,36 @@ fn probe_kind_deserialize_serdes() { #[test] fn lang_emitter_dispatches_to_deserialize_harness() { - for (lang, entry_file, entry_name, marker) in [ - (Lang::Java, "tests/dynamic_fixtures/deserialize/java/vuln.java", - "run", "RestrictedObjectInputStream"), - (Lang::Python, "tests/dynamic_fixtures/deserialize/python/vuln.py", - "run", "RestrictedUnpickler"), - (Lang::Php, "tests/dynamic_fixtures/deserialize/php/vuln.php", - "run", "allowed_classes"), - (Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb", - "run", "Marshal.load"), + // `sink_callee_marker` is the per-language deserialize sink call + // string the harness writes into the JSON probe record — the + // resolveClass / find_class / unserialize / Marshal.load boundary + // the brief calls out. Pinning the marker here keeps the test + // honest about which guard each lang's harness names. + for (lang, entry_file, entry_name, sink_callee_marker) in [ + ( + Lang::Java, + "tests/dynamic_fixtures/deserialize/java/vuln.java", + "run", + "ObjectInputStream.resolveClass", + ), + ( + Lang::Python, + "tests/dynamic_fixtures/deserialize/python/vuln.py", + "run", + "pickle.Unpickler.find_class", + ), + ( + Lang::Php, + "tests/dynamic_fixtures/deserialize/php/vuln.php", + "run", + "unserialize", + ), + ( + Lang::Ruby, + "tests/dynamic_fixtures/deserialize/ruby/vuln.rb", + "run", + "Marshal.load", + ), ] { let spec = make_spec(lang, entry_file, entry_name); let harness = lang::emit(&spec) @@ -148,12 +169,11 @@ fn lang_emitter_dispatches_to_deserialize_harness() { harness.source.contains("NYX_GADGET_CLASS:"), "{lang:?} deserialize harness must parse NYX_GADGET_CLASS marker", ); - // Each lang's harness either splices the relevant guard - // construct directly or names the equivalent constant. The - // assertions below pin only the parts the harness emitter - // generates (not the fixture), so the test stays green even - // when the fixture moves. - let _ = marker; // marker validated by inspecting the fixture, not the harness. + assert!( + harness.source.contains(sink_callee_marker), + "{lang:?} deserialize harness must name {sink_callee_marker:?} as the \ + resolveClass / find_class equivalent sink callee", + ); } } diff --git a/tests/hostile_input_tests.rs b/tests/hostile_input_tests.rs index 427d38c4..7dfcd70b 100644 --- a/tests/hostile_input_tests.rs +++ b/tests/hostile_input_tests.rs @@ -229,12 +229,17 @@ fn binary_null_heavy_input_is_skipped() { /// Invalid UTF-8 in a recognised source extension must not panic. /// tree-sitter can operate on raw bytes; we just check that it survives. +/// Budget widened from 2 s to 10 s after the pitboss parallel `cargo test` +/// invocation surfaced ~2.8 s wall time under shared-runner CPU pressure +/// even though the isolated test runs well under 100 ms. The point is +/// to catch a runaway, not to benchmark, so 10 s leaves clear headroom +/// without masking a real regression. #[test] fn invalid_utf8_does_not_panic() { let bytes = b"\xff\xfe\xfd\xfc\n\xde\xad\xbe\xef\n// trailing\n".to_vec(); let path = Path::new("junk.rs"); let cfg = hostile_cfg(); - let _ = with_time_budget(Duration::from_secs(2), "invalid utf8", || { + let _ = with_time_budget(Duration::from_secs(10), "invalid utf8", || { run_rules_on_bytes(&bytes, path, &cfg, None, None).expect("invalid UTF-8 should not error") }); } @@ -260,10 +265,13 @@ fn empty_file_is_noop() { /// right-associative expression, the latter is a separate stress case /// dominated by recursive descent and not representative of real input. /// -/// Generous debug-build budget (20 s) because the full analysis pipeline +/// Generous debug-build budget (40 s) because the full analysis pipeline /// runs on every statement; release builds are an order of magnitude /// faster. The point is to guard against regressions that are -/// super-linear in statement count, not to benchmark. +/// super-linear in statement count, not to benchmark. Budget widened +/// from 20 s after the pitboss parallel `cargo test` invocation surfaced +/// 24-25 s wall time under shared-runner CPU pressure even though the +/// isolated test runs in ~3.7 s. #[test] fn very_long_single_line_parses() { run_on_prod_stack(|| { @@ -275,7 +283,7 @@ fn very_long_single_line_parses() { let path = Path::new("long_line.js"); let cfg = hostile_cfg(); - let _ = with_time_budget(Duration::from_secs(20), "long line parse", || { + let _ = with_time_budget(Duration::from_secs(40), "long line parse", || { run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None) .expect("long-line file should parse") }); @@ -348,7 +356,10 @@ fn deeply_nested_if_statements_do_not_stack_overflow() { /// Lots of small functions in one file stresses the pass-1/pass-2 bookkeeping /// (summary extraction, callgraph build). 2 000 functions is cheap but -/// plausible for generated code. +/// plausible for generated code. Budget widened from 15 s after the +/// pitboss parallel `cargo test` invocation surfaced 15.3 s under +/// shared-runner CPU pressure even though the isolated test runs in +/// ~3.7 s. #[test] fn many_small_functions_do_not_explode() { let mut s = String::with_capacity(2000 * 32); @@ -358,7 +369,7 @@ fn many_small_functions_do_not_explode() { let path = Path::new("many_funcs.js"); let cfg = hostile_cfg(); - let _ = with_time_budget(Duration::from_secs(15), "many-funcs scan", || { + let _ = with_time_budget(Duration::from_secs(30), "many-funcs scan", || { run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None) .expect("many-functions file should scan") });