From fd50549582630c44c56fb63eb59e7f5e7effa2d5 Mon Sep 17 00:00:00 2001 From: elipeter Date: Fri, 22 May 2026 11:31:17 -0500 Subject: [PATCH] refactor(dynamic): ensure unique workdir names to avoid conflicts, improve Java sibling stub handling, and enhance comments --- src/dynamic/harness.rs | 146 +++++++++++++++++++++++++++++++- src/dynamic/sandbox/mod.rs | 13 +-- src/main.rs | 1 + tests/common/fixture_harness.rs | 21 ++--- 4 files changed, 157 insertions(+), 24 deletions(-) diff --git a/src/dynamic/harness.rs b/src/dynamic/harness.rs index 44306e6d..8b1cdd43 100644 --- a/src/dynamic/harness.rs +++ b/src/dynamic/harness.rs @@ -18,6 +18,10 @@ use crate::dynamic::spec::HarnessSpec; use crate::evidence::UnsupportedReason; use std::fs; use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; + +static WORKDIR_COUNTER: AtomicU64 = AtomicU64::new(0); /// A built harness ready to hand off to the sandbox. #[derive(Debug, Clone)] @@ -56,12 +60,19 @@ pub fn build(spec: &HarnessSpec) -> Result { /// Write the harness source to a temporary working directory. /// -/// On Unix we prefer `/tmp/nyx-harness/{spec_hash}` over `env::temp_dir()` +/// On Unix we prefer `/tmp/nyx-harness/{spec_hash}-p{pid}-r{seq}-t{time}` +/// over `env::temp_dir()` /// because macOS' `$TMPDIR` resolves to `/var/folders/.../T/` — deep enough /// that traversal payloads like `../../../../etc/passwd` cannot escape to /// `/` from the workdir, which masks path-traversal verdicts. `/tmp` is /// shallow (resolves to `/private/tmp` on macOS, `/tmp` on Linux) and keeps /// payload depth assumptions portable. +/// +/// The per-run suffix is intentional: the workdir contains mutable build +/// products, probe channels, and sometimes a long-lived Docker container +/// mount. Reusing `/tmp/nyx-harness/{spec_hash}` across concurrent +/// verifier processes lets one run overwrite or delete another run's Java +/// classes while the JVM is starting. fn stage_harness( spec: &HarnessSpec, harness_src: &lang::HarnessSource, @@ -71,7 +82,7 @@ fn stage_harness( } else { std::env::temp_dir().join("nyx-harness") }; - let workdir = base_dir.join(&spec.spec_hash); + let workdir = unique_workdir(&base_dir, &spec.spec_hash); fs::create_dir_all(&workdir)?; // Write harness source (create parent dir if needed, e.g. "src/main.rs"). @@ -92,10 +103,44 @@ fn stage_harness( // Copy the entry file into the workdir so the harness can import/include it. copy_entry_file(spec, &workdir, harness_src.entry_subpath.as_deref()); + copy_java_sibling_sources(spec, &workdir); Ok(workdir) } +fn unique_workdir(base_dir: &Path, spec_hash: &str) -> PathBuf { + let seq = WORKDIR_COUNTER.fetch_add(1, Ordering::Relaxed); + let pid = std::process::id(); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + base_dir.join(format!( + "{}-p{pid}-r{seq:016x}-t{nanos:x}", + safe_workdir_component(spec_hash) + )) +} + +fn safe_workdir_component(input: &str) -> String { + let mut out = String::with_capacity(input.len().max(1)); + for b in input.bytes() { + if b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'-') { + out.push(b as char); + } else { + out.push('_'); + } + } + if out.is_empty() { + out.push_str("unknown"); + } + if out.len() > 80 { + let digest = blake3::hash(input.as_bytes()); + let hex = digest.to_hex(); + out = format!("{}-{}", &out[..80], &hex[..16]); + } + out +} + /// Copy the entry source file to the workdir. /// /// `entry_subpath` controls the destination: @@ -135,6 +180,44 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &Path, entry_subpath: Option<&st } } +/// Java shape fixtures often keep tiny annotation / framework stubs next to +/// `Vuln.java` or `Benign.java`. Stage those siblings with the entry file so +/// each unique workdir is self-contained, while skipping the opposite fixture +/// variant to avoid duplicate public-class declarations in corpus tests. +fn copy_java_sibling_sources(spec: &HarnessSpec, workdir: &Path) { + if spec.lang != crate::symbol::Lang::Java { + return; + } + let entry = PathBuf::from(&spec.entry_file); + let Some(parent) = entry.parent() else { + return; + }; + let Some(entry_name) = entry.file_name().and_then(|n| n.to_str()) else { + return; + }; + let alt_name = match entry_name { + "Vuln.java" => "Benign.java", + "Benign.java" => "Vuln.java", + _ => return, + }; + let Ok(entries) = fs::read_dir(parent) else { + return; + }; + for item in entries.flatten() { + let p = item.path(); + if !p.extension().map(|e| e == "java").unwrap_or(false) { + continue; + } + let Some(name) = p.file_name().and_then(|n| n.to_str()) else { + continue; + }; + if name == entry_name || name == alt_name { + continue; + } + let _ = fs::copy(&p, workdir.join(name)); + } +} + /// Extract the source of the entry file (for repro bundles). Best-effort. fn extract_entry_source(spec: &HarnessSpec) -> String { let candidates = [ @@ -231,4 +314,63 @@ mod tests { assert!(harness.workdir.join("harness.py").exists()); assert!(!harness.source.is_empty()); } + + #[test] + fn build_uses_unique_flat_workdir_for_same_spec_hash() { + let spec = HarnessSpec { + finding_id: "0000000000000001".into(), + entry_file: "src/app.py".into(), + entry_name: "login".into(), + entry_kind: EntryKind::Function, + lang: Lang::Python, + toolchain_id: "python-3".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::SQL_QUERY, + constraint_hints: vec![], + sink_file: "src/app.py".into(), + sink_line: 10, + spec_hash: "test0000abcd1234".into(), + derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + java_toolchain: crate::dynamic::spec::JavaToolchain::default(), + }; + let first = build(&spec).unwrap(); + let second = build(&spec).unwrap(); + assert_ne!(first.workdir, second.workdir); + assert_eq!(first.workdir.parent(), second.workdir.parent()); + } + + #[test] + fn build_java_stages_sibling_stubs_without_alt_fixture() { + let tmp = tempfile::TempDir::new().unwrap(); + let vuln = tmp.path().join("Vuln.java"); + fs::write(&vuln, "public class Vuln {}\n").unwrap(); + fs::write(tmp.path().join("Helper.java"), "class Helper {}\n").unwrap(); + fs::write(tmp.path().join("Benign.java"), "public class Benign {}\n").unwrap(); + + let spec = HarnessSpec { + finding_id: "0000000000000001".into(), + entry_file: vuln.to_string_lossy().into_owned(), + entry_name: "run".into(), + entry_kind: EntryKind::Function, + lang: Lang::Java, + toolchain_id: "java-21".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::XXE, + constraint_hints: vec![], + sink_file: vuln.to_string_lossy().into_owned(), + sink_line: 1, + spec_hash: "javatest00000001".into(), + derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + java_toolchain: crate::dynamic::spec::JavaToolchain::default(), + }; + + let harness = build(&spec).unwrap(); + assert!(harness.workdir.join("Vuln.java").exists()); + assert!(harness.workdir.join("Helper.java").exists()); + assert!(!harness.workdir.join("Benign.java").exists()); + } } diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs index ccadc62e..edada166 100644 --- a/src/dynamic/sandbox/mod.rs +++ b/src/dynamic/sandbox/mod.rs @@ -722,15 +722,16 @@ fn register_exit_cleanup() { } fn workdir_to_container_name(workdir: &Path) -> String { - // The workdir is /tmp/nyx-harness/{spec_hash}; the spec_hash is the last - // path component (16-char hex). Use it directly for a readable name. - let spec_hash = workdir + // The harness workdir's final path component is a sanitized, readable run + // id derived from the spec hash plus a per-run suffix. Use it directly so + // two concurrent builds for the same finding do not share a container. + let run_id = workdir .file_name() .and_then(|n| n.to_str()) .unwrap_or("unknown"); - // Container names: [a-zA-Z0-9_.-], must not start with dot or dash. - // spec_hash is lowercase hex (0-9a-f); safe to use directly. - format!("nyx-{spec_hash}") + // Container names: [a-zA-Z0-9_.-], must not start with dot or dash. The + // `nyx-` prefix provides a safe first character. + format!("nyx-{run_id}") } /// Docker image tag for a Python toolchain ID (e.g. `python-3.11`). diff --git a/src/main.rs b/src/main.rs index 100830b0..fbd21ef6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,6 +20,7 @@ fn init_tracing() { let fmt_layer = tracing_fmt::layer() .pretty() + .with_writer(std::io::stderr) .with_thread_ids(true) .with_timer(time::UtcTime::rfc_3339()); diff --git a/tests/common/fixture_harness.rs b/tests/common/fixture_harness.rs index 9f19101e..24085f61 100644 --- a/tests/common/fixture_harness.rs +++ b/tests/common/fixture_harness.rs @@ -497,22 +497,11 @@ pub fn run_shape_fixture_lang( // Phase 14: Java shape fixtures bundle annotation / type stubs as // sibling `*.java` files alongside `Vuln.java` / `Benign.java`. - // The harness builder owns `/tmp/nyx-harness//` and only - // copies the entry file + extra_files — it never walks the entry - // file's parent dir. Pre-create the workdir and stage every - // sibling stub there so the build sandbox's `javac *.java` step - // resolves the annotation / type references without pulling in any - // Maven deps. Skip the alternate Vuln/Benign file to keep public - // class declarations from colliding with the running variant. + // Stage those sidecars next to the temp-copied entry file so the + // harness builder can copy them into its per-run workdir. Skip the + // alternate Vuln/Benign file to keep public class declarations from + // colliding with the running variant. if matches!(lang, nyx_scanner::symbol::Lang::Java) { - let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec.spec_hash); - // Wipe any prior contents so stale `.java` / `.class` files - // from previous emitter revisions cannot bleed into this run. - // `prepare_java` globs every `*.java` in the workdir — leaving - // an obsolete `Entry.java` next to the new `Vuln.java` produces - // a duplicate-class compile error. - let _ = std::fs::remove_dir_all(&workdir); - let _ = std::fs::create_dir_all(&workdir); let alt_file = if file == "Vuln.java" { "Benign.java" } else if file == "Benign.java" { @@ -531,7 +520,7 @@ pub fn run_shape_fixture_lang( continue; } if p.extension().map(|e| e == "java").unwrap_or(false) { - let _ = std::fs::copy(&p, workdir.join(&name)); + let _ = std::fs::copy(&p, tmp.path().join(&name)); } } }