refactor(dynamic): ensure unique workdir names to avoid conflicts, improve Java sibling stub handling, and enhance comments

This commit is contained in:
elipeter 2026-05-22 11:31:17 -05:00
parent 32211079a0
commit fd50549582
4 changed files with 157 additions and 24 deletions

View file

@ -18,6 +18,10 @@ use crate::dynamic::spec::HarnessSpec;
use crate::evidence::UnsupportedReason;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};
static WORKDIR_COUNTER: AtomicU64 = AtomicU64::new(0);
/// A built harness ready to hand off to the sandbox.
#[derive(Debug, Clone)]
@ -56,12 +60,19 @@ pub fn build(spec: &HarnessSpec) -> Result<BuiltHarness, HarnessError> {
/// Write the harness source to a temporary working directory.
///
/// On Unix we prefer `/tmp/nyx-harness/{spec_hash}` over `env::temp_dir()`
/// On Unix we prefer `/tmp/nyx-harness/{spec_hash}-p{pid}-r{seq}-t{time}`
/// over `env::temp_dir()`
/// because macOS' `$TMPDIR` resolves to `/var/folders/.../T/` — deep enough
/// that traversal payloads like `../../../../etc/passwd` cannot escape to
/// `/` from the workdir, which masks path-traversal verdicts. `/tmp` is
/// shallow (resolves to `/private/tmp` on macOS, `/tmp` on Linux) and keeps
/// payload depth assumptions portable.
///
/// The per-run suffix is intentional: the workdir contains mutable build
/// products, probe channels, and sometimes a long-lived Docker container
/// mount. Reusing `/tmp/nyx-harness/{spec_hash}` across concurrent
/// verifier processes lets one run overwrite or delete another run's Java
/// classes while the JVM is starting.
fn stage_harness(
spec: &HarnessSpec,
harness_src: &lang::HarnessSource,
@ -71,7 +82,7 @@ fn stage_harness(
} else {
std::env::temp_dir().join("nyx-harness")
};
let workdir = base_dir.join(&spec.spec_hash);
let workdir = unique_workdir(&base_dir, &spec.spec_hash);
fs::create_dir_all(&workdir)?;
// Write harness source (create parent dir if needed, e.g. "src/main.rs").
@ -92,10 +103,44 @@ fn stage_harness(
// Copy the entry file into the workdir so the harness can import/include it.
copy_entry_file(spec, &workdir, harness_src.entry_subpath.as_deref());
copy_java_sibling_sources(spec, &workdir);
Ok(workdir)
}
fn unique_workdir(base_dir: &Path, spec_hash: &str) -> PathBuf {
let seq = WORKDIR_COUNTER.fetch_add(1, Ordering::Relaxed);
let pid = std::process::id();
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_nanos())
.unwrap_or(0);
base_dir.join(format!(
"{}-p{pid}-r{seq:016x}-t{nanos:x}",
safe_workdir_component(spec_hash)
))
}
fn safe_workdir_component(input: &str) -> String {
let mut out = String::with_capacity(input.len().max(1));
for b in input.bytes() {
if b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'-') {
out.push(b as char);
} else {
out.push('_');
}
}
if out.is_empty() {
out.push_str("unknown");
}
if out.len() > 80 {
let digest = blake3::hash(input.as_bytes());
let hex = digest.to_hex();
out = format!("{}-{}", &out[..80], &hex[..16]);
}
out
}
/// Copy the entry source file to the workdir.
///
/// `entry_subpath` controls the destination:
@ -135,6 +180,44 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &Path, entry_subpath: Option<&st
}
}
/// Java shape fixtures often keep tiny annotation / framework stubs next to
/// `Vuln.java` or `Benign.java`. Stage those siblings with the entry file so
/// each unique workdir is self-contained, while skipping the opposite fixture
/// variant to avoid duplicate public-class declarations in corpus tests.
fn copy_java_sibling_sources(spec: &HarnessSpec, workdir: &Path) {
if spec.lang != crate::symbol::Lang::Java {
return;
}
let entry = PathBuf::from(&spec.entry_file);
let Some(parent) = entry.parent() else {
return;
};
let Some(entry_name) = entry.file_name().and_then(|n| n.to_str()) else {
return;
};
let alt_name = match entry_name {
"Vuln.java" => "Benign.java",
"Benign.java" => "Vuln.java",
_ => return,
};
let Ok(entries) = fs::read_dir(parent) else {
return;
};
for item in entries.flatten() {
let p = item.path();
if !p.extension().map(|e| e == "java").unwrap_or(false) {
continue;
}
let Some(name) = p.file_name().and_then(|n| n.to_str()) else {
continue;
};
if name == entry_name || name == alt_name {
continue;
}
let _ = fs::copy(&p, workdir.join(name));
}
}
/// Extract the source of the entry file (for repro bundles). Best-effort.
fn extract_entry_source(spec: &HarnessSpec) -> String {
let candidates = [
@ -231,4 +314,63 @@ mod tests {
assert!(harness.workdir.join("harness.py").exists());
assert!(!harness.source.is_empty());
}
#[test]
fn build_uses_unique_flat_workdir_for_same_spec_hash() {
let spec = HarnessSpec {
finding_id: "0000000000000001".into(),
entry_file: "src/app.py".into(),
entry_name: "login".into(),
entry_kind: EntryKind::Function,
lang: Lang::Python,
toolchain_id: "python-3".into(),
payload_slot: PayloadSlot::Param(0),
expected_cap: Cap::SQL_QUERY,
constraint_hints: vec![],
sink_file: "src/app.py".into(),
sink_line: 10,
spec_hash: "test0000abcd1234".into(),
derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps,
stubs_required: vec![],
framework: None,
java_toolchain: crate::dynamic::spec::JavaToolchain::default(),
};
let first = build(&spec).unwrap();
let second = build(&spec).unwrap();
assert_ne!(first.workdir, second.workdir);
assert_eq!(first.workdir.parent(), second.workdir.parent());
}
#[test]
fn build_java_stages_sibling_stubs_without_alt_fixture() {
let tmp = tempfile::TempDir::new().unwrap();
let vuln = tmp.path().join("Vuln.java");
fs::write(&vuln, "public class Vuln {}\n").unwrap();
fs::write(tmp.path().join("Helper.java"), "class Helper {}\n").unwrap();
fs::write(tmp.path().join("Benign.java"), "public class Benign {}\n").unwrap();
let spec = HarnessSpec {
finding_id: "0000000000000001".into(),
entry_file: vuln.to_string_lossy().into_owned(),
entry_name: "run".into(),
entry_kind: EntryKind::Function,
lang: Lang::Java,
toolchain_id: "java-21".into(),
payload_slot: PayloadSlot::Param(0),
expected_cap: Cap::XXE,
constraint_hints: vec![],
sink_file: vuln.to_string_lossy().into_owned(),
sink_line: 1,
spec_hash: "javatest00000001".into(),
derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps,
stubs_required: vec![],
framework: None,
java_toolchain: crate::dynamic::spec::JavaToolchain::default(),
};
let harness = build(&spec).unwrap();
assert!(harness.workdir.join("Vuln.java").exists());
assert!(harness.workdir.join("Helper.java").exists());
assert!(!harness.workdir.join("Benign.java").exists());
}
}

View file

@ -722,15 +722,16 @@ fn register_exit_cleanup() {
}
fn workdir_to_container_name(workdir: &Path) -> String {
// The workdir is /tmp/nyx-harness/{spec_hash}; the spec_hash is the last
// path component (16-char hex). Use it directly for a readable name.
let spec_hash = workdir
// The harness workdir's final path component is a sanitized, readable run
// id derived from the spec hash plus a per-run suffix. Use it directly so
// two concurrent builds for the same finding do not share a container.
let run_id = workdir
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown");
// Container names: [a-zA-Z0-9_.-], must not start with dot or dash.
// spec_hash is lowercase hex (0-9a-f); safe to use directly.
format!("nyx-{spec_hash}")
// Container names: [a-zA-Z0-9_.-], must not start with dot or dash. The
// `nyx-` prefix provides a safe first character.
format!("nyx-{run_id}")
}
/// Docker image tag for a Python toolchain ID (e.g. `python-3.11`).

View file

@ -20,6 +20,7 @@ fn init_tracing() {
let fmt_layer = tracing_fmt::layer()
.pretty()
.with_writer(std::io::stderr)
.with_thread_ids(true)
.with_timer(time::UtcTime::rfc_3339());

View file

@ -497,22 +497,11 @@ pub fn run_shape_fixture_lang(
// Phase 14: Java shape fixtures bundle annotation / type stubs as
// sibling `*.java` files alongside `Vuln.java` / `Benign.java`.
// The harness builder owns `/tmp/nyx-harness/<spec_hash>/` and only
// copies the entry file + extra_files — it never walks the entry
// file's parent dir. Pre-create the workdir and stage every
// sibling stub there so the build sandbox's `javac *.java` step
// resolves the annotation / type references without pulling in any
// Maven deps. Skip the alternate Vuln/Benign file to keep public
// class declarations from colliding with the running variant.
// Stage those sidecars next to the temp-copied entry file so the
// harness builder can copy them into its per-run workdir. Skip the
// alternate Vuln/Benign file to keep public class declarations from
// colliding with the running variant.
if matches!(lang, nyx_scanner::symbol::Lang::Java) {
let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec.spec_hash);
// Wipe any prior contents so stale `.java` / `.class` files
// from previous emitter revisions cannot bleed into this run.
// `prepare_java` globs every `*.java` in the workdir — leaving
// an obsolete `Entry.java` next to the new `Vuln.java` produces
// a duplicate-class compile error.
let _ = std::fs::remove_dir_all(&workdir);
let _ = std::fs::create_dir_all(&workdir);
let alt_file = if file == "Vuln.java" {
"Benign.java"
} else if file == "Benign.java" {
@ -531,7 +520,7 @@ pub fn run_shape_fixture_lang(
continue;
}
if p.extension().map(|e| e == "java").unwrap_or(false) {
let _ = std::fs::copy(&p, workdir.join(&name));
let _ = std::fs::copy(&p, tmp.path().join(&name));
}
}
}