[pitboss] sweep after phase 04: 2 deferred items resolved

This commit is contained in:
pitboss 2026-05-17 19:40:29 -05:00
parent 8583b29796
commit 637b733928
5 changed files with 105 additions and 21 deletions

View file

@ -655,6 +655,18 @@ fn create_symlink(_target: &Path, _link: &Path) -> std::io::Result<()> {
#[cfg(test)]
mod tests {
/// Process-global `NYX_REPRO_BASE` is mutated by several tests in
/// this module; without serialisation a parallel `cargo test`
/// invocation races on the global state and produces flakes that
/// vanish under `--test-threads=1`. Every env-mutating test
/// acquires this guard for the duration of its body.
/// `unwrap_or_else(into_inner)` recovers from poisoning so a
/// failing test does not cascade-fail every later test.
fn env_lock() -> std::sync::MutexGuard<'static, ()> {
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
LOCK.lock().unwrap_or_else(|e| e.into_inner())
}
use super::*;
use crate::dynamic::sandbox::SandboxBackend;
use crate::dynamic::spec::{EntryKind, PayloadSlot};
@ -722,6 +734,7 @@ mod tests {
#[test]
fn write_creates_expected_layout() {
let _env_guard = env_lock();
let dir = TempDir::new().unwrap();
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
@ -759,6 +772,7 @@ mod tests {
#[test]
fn toolchain_lock_records_expected_toolchain_and_hashes() {
let _env_guard = env_lock();
let dir = TempDir::new().unwrap();
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
let spec = make_spec();
@ -831,6 +845,7 @@ mod tests {
#[test]
fn reproduce_sh_contains_toolchain_check_and_exit_codes() {
let _env_guard = env_lock();
let dir = TempDir::new().unwrap();
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
let artifact = write(
@ -925,6 +940,7 @@ mod tests {
#[test]
fn bundle_root_for_honours_test_override() {
let _env_guard = env_lock();
let dir = TempDir::new().unwrap();
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };
let root = bundle_root_for("cafe0001").unwrap();
@ -934,6 +950,7 @@ mod tests {
#[test]
fn bundle_root_for_matches_write_output_under_override() {
let _env_guard = env_lock();
// The path returned by `bundle_root_for` must equal the bundle path
// that `write` produces — replay callers locate the bundle without
// re-creating directories, so a drift between the two helpers would
@ -955,6 +972,7 @@ mod tests {
#[test]
fn outcome_json_redacts_secrets() {
let _env_guard = env_lock();
let dir = TempDir::new().unwrap();
unsafe { std::env::set_var("NYX_REPRO_BASE", dir.path().to_str().unwrap()) };

View file

@ -431,6 +431,19 @@ pub fn wrap_plan(input: &WrapInput<'_>) -> WrapResult {
mod tests {
use super::*;
/// Process-global env vars (`NYX_SANDBOX_EXEC_BIN`,
/// `NYX_SB_DENY_DEFAULT`, `NYX_SB_SEED_DIR`) are mutated by several
/// tests in this module; without serialisation a parallel
/// `cargo test` invocation races on the global state and produces
/// flakes that vanish under `--test-threads=1`. Every env-mutating
/// test acquires this guard for the duration of its body.
/// `unwrap_or_else(into_inner)` recovers from poisoning so a
/// failing test does not cascade-fail every later test.
fn env_lock() -> std::sync::MutexGuard<'static, ()> {
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
LOCK.lock().unwrap_or_else(|e| e.into_inner())
}
#[test]
fn profile_for_caps_prefers_file_io() {
const FILE_IO: u32 = 1 << 5;
@ -534,6 +547,7 @@ mod tests {
#[test]
fn sandbox_exec_bin_honours_env_override() {
let _env_guard = env_lock();
// SAFETY: tests are run serially with the macOS hardening suite;
// resetting the env var below restores the default for subsequent
// tests in the same process.
@ -590,6 +604,7 @@ mod tests {
#[test]
fn deny_default_seed_for_returns_none_without_env_opt_in() {
let _env_guard = env_lock();
// SAFETY: tests in this module mutate process-global env; the
// macOS hardening integration suite serialises around the same
// env vars so cargo nextest's per-test process isolation does not
@ -601,6 +616,7 @@ mod tests {
#[test]
fn deny_default_seed_for_returns_some_when_env_set_and_seed_present() {
let _env_guard = env_lock();
let tmp = std::env::temp_dir().join("nyx-sb-seed-test");
let _ = std::fs::remove_dir_all(&tmp);
std::fs::create_dir_all(&tmp).expect("create seed tempdir");
@ -626,6 +642,7 @@ mod tests {
#[test]
fn wrap_plan_returns_none_when_sandbox_exec_missing() {
let _env_guard = env_lock();
unsafe { std::env::set_var(SANDBOX_EXEC_BIN_ENV, "/nonexistent/sandbox-exec") };
let input = WrapInput {
cmd_path: Path::new("/usr/bin/true"),
@ -643,6 +660,7 @@ mod tests {
#[test]
#[cfg(target_os = "macos")]
fn wrap_plan_returns_sandboxed_when_sandbox_exec_present() {
let _env_guard = env_lock();
// Skip when the host doesn't actually have /usr/bin/sandbox-exec
// (e.g. someone reading SANDBOX_EXEC_BIN_ENV from a parent shell).
unsafe { std::env::remove_var(SANDBOX_EXEC_BIN_ENV) };

View file

@ -1264,6 +1264,19 @@ fn build_verdict(
mod tests {
use super::*;
/// Process-global env vars (`NYX_VERIFY_REPLAY_STABLE`,
/// `NYX_VERIFY_REPLAY_DOCKER`) are mutated by several tests in this
/// module; without serialisation a parallel `cargo test` invocation
/// races on the global state and produces flakes that vanish under
/// `--test-threads=1`. Every env-mutating test acquires this guard
/// for the duration of its body. `unwrap_or_else(into_inner)`
/// recovers from poisoning so a failing test does not cascade-fail
/// every later test in the suite.
fn env_lock() -> std::sync::MutexGuard<'static, ()> {
static LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
LOCK.lock().unwrap_or_else(|e| e.into_inner())
}
#[test]
fn compute_entry_content_hash_stable_for_same_file() {
let dir = tempfile::TempDir::new().unwrap();
@ -1300,6 +1313,7 @@ mod tests {
#[test]
fn from_config_defaults_replay_stable_check_off() {
let _env_guard = env_lock();
// Make sure the test is hermetic — `from_config` reads the env
// var, so a stale process-wide setting could mask the default.
unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_STABLE") };
@ -1313,6 +1327,7 @@ mod tests {
#[test]
fn from_config_picks_up_replay_stable_env_flag() {
let _env_guard = env_lock();
unsafe { std::env::set_var("NYX_VERIFY_REPLAY_STABLE", "1") };
let opts = VerifyOptions::from_config(&Config::default());
assert!(opts.replay_stable_check);
@ -1327,6 +1342,7 @@ mod tests {
#[test]
fn from_config_defaults_replay_use_docker_off() {
let _env_guard = env_lock();
// Same hermeticity concern as `replay_stable_check`: clear any
// stale process-wide setting so the default is observable.
unsafe { std::env::remove_var("NYX_VERIFY_REPLAY_DOCKER") };
@ -1340,6 +1356,7 @@ mod tests {
#[test]
fn from_config_picks_up_replay_docker_env_flag() {
let _env_guard = env_lock();
unsafe { std::env::set_var("NYX_VERIFY_REPLAY_DOCKER", "1") };
let opts = VerifyOptions::from_config(&Config::default());
assert!(opts.replay_use_docker);

View file

@ -131,15 +131,36 @@ fn probe_kind_deserialize_serdes() {
#[test]
fn lang_emitter_dispatches_to_deserialize_harness() {
for (lang, entry_file, entry_name, marker) in [
(Lang::Java, "tests/dynamic_fixtures/deserialize/java/vuln.java",
"run", "RestrictedObjectInputStream"),
(Lang::Python, "tests/dynamic_fixtures/deserialize/python/vuln.py",
"run", "RestrictedUnpickler"),
(Lang::Php, "tests/dynamic_fixtures/deserialize/php/vuln.php",
"run", "allowed_classes"),
(Lang::Ruby, "tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
"run", "Marshal.load"),
// `sink_callee_marker` is the per-language deserialize sink call
// string the harness writes into the JSON probe record — the
// resolveClass / find_class / unserialize / Marshal.load boundary
// the brief calls out. Pinning the marker here keeps the test
// honest about which guard each lang's harness names.
for (lang, entry_file, entry_name, sink_callee_marker) in [
(
Lang::Java,
"tests/dynamic_fixtures/deserialize/java/vuln.java",
"run",
"ObjectInputStream.resolveClass",
),
(
Lang::Python,
"tests/dynamic_fixtures/deserialize/python/vuln.py",
"run",
"pickle.Unpickler.find_class",
),
(
Lang::Php,
"tests/dynamic_fixtures/deserialize/php/vuln.php",
"run",
"unserialize",
),
(
Lang::Ruby,
"tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
"run",
"Marshal.load",
),
] {
let spec = make_spec(lang, entry_file, entry_name);
let harness = lang::emit(&spec)
@ -148,12 +169,11 @@ fn lang_emitter_dispatches_to_deserialize_harness() {
harness.source.contains("NYX_GADGET_CLASS:"),
"{lang:?} deserialize harness must parse NYX_GADGET_CLASS marker",
);
// Each lang's harness either splices the relevant guard
// construct directly or names the equivalent constant. The
// assertions below pin only the parts the harness emitter
// generates (not the fixture), so the test stays green even
// when the fixture moves.
let _ = marker; // marker validated by inspecting the fixture, not the harness.
assert!(
harness.source.contains(sink_callee_marker),
"{lang:?} deserialize harness must name {sink_callee_marker:?} as the \
resolveClass / find_class equivalent sink callee",
);
}
}

View file

@ -229,12 +229,17 @@ fn binary_null_heavy_input_is_skipped() {
/// Invalid UTF-8 in a recognised source extension must not panic.
/// tree-sitter can operate on raw bytes; we just check that it survives.
/// Budget widened from 2 s to 10 s after the pitboss parallel `cargo test`
/// invocation surfaced ~2.8 s wall time under shared-runner CPU pressure
/// even though the isolated test runs well under 100 ms. The point is
/// to catch a runaway, not to benchmark, so 10 s leaves clear headroom
/// without masking a real regression.
#[test]
fn invalid_utf8_does_not_panic() {
let bytes = b"\xff\xfe\xfd\xfc\n\xde\xad\xbe\xef\n// trailing\n".to_vec();
let path = Path::new("junk.rs");
let cfg = hostile_cfg();
let _ = with_time_budget(Duration::from_secs(2), "invalid utf8", || {
let _ = with_time_budget(Duration::from_secs(10), "invalid utf8", || {
run_rules_on_bytes(&bytes, path, &cfg, None, None).expect("invalid UTF-8 should not error")
});
}
@ -260,10 +265,13 @@ fn empty_file_is_noop() {
/// right-associative expression, the latter is a separate stress case
/// dominated by recursive descent and not representative of real input.
///
/// Generous debug-build budget (20 s) because the full analysis pipeline
/// Generous debug-build budget (40 s) because the full analysis pipeline
/// runs on every statement; release builds are an order of magnitude
/// faster. The point is to guard against regressions that are
/// super-linear in statement count, not to benchmark.
/// super-linear in statement count, not to benchmark. Budget widened
/// from 20 s after the pitboss parallel `cargo test` invocation surfaced
/// 24-25 s wall time under shared-runner CPU pressure even though the
/// isolated test runs in ~3.7 s.
#[test]
fn very_long_single_line_parses() {
run_on_prod_stack(|| {
@ -275,7 +283,7 @@ fn very_long_single_line_parses() {
let path = Path::new("long_line.js");
let cfg = hostile_cfg();
let _ = with_time_budget(Duration::from_secs(20), "long line parse", || {
let _ = with_time_budget(Duration::from_secs(40), "long line parse", || {
run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None)
.expect("long-line file should parse")
});
@ -348,7 +356,10 @@ fn deeply_nested_if_statements_do_not_stack_overflow() {
/// Lots of small functions in one file stresses the pass-1/pass-2 bookkeeping
/// (summary extraction, callgraph build). 2 000 functions is cheap but
/// plausible for generated code.
/// plausible for generated code. Budget widened from 15 s after the
/// pitboss parallel `cargo test` invocation surfaced 15.3 s under
/// shared-runner CPU pressure even though the isolated test runs in
/// ~3.7 s.
#[test]
fn many_small_functions_do_not_explode() {
let mut s = String::with_capacity(2000 * 32);
@ -358,7 +369,7 @@ fn many_small_functions_do_not_explode() {
let path = Path::new("many_funcs.js");
let cfg = hostile_cfg();
let _ = with_time_budget(Duration::from_secs(15), "many-funcs scan", || {
let _ = with_time_budget(Duration::from_secs(30), "many-funcs scan", || {
run_rules_on_bytes(s.as_bytes(), path, &cfg, None, None)
.expect("many-functions file should scan")
});