diff --git a/src/dynamic/harness.rs b/src/dynamic/harness.rs index 50b153bf..9a747b49 100644 --- a/src/dynamic/harness.rs +++ b/src/dynamic/harness.rs @@ -102,6 +102,12 @@ fn stage_harness( /// - `None` → `workdir/{filename}` (Python default: import by module name). /// - `Some("src/entry.rs")` → `workdir/src/entry.rs` (Rust: `mod entry;`). /// +/// Always overwrites the destination so the per-language build hash +/// (`compute_*_source_hash`) reflects the current on-disk source. Leaving a +/// stale destination in place would let the build cache return class files +/// built from a previous fixture revision even after the source on disk has +/// changed. +/// /// Best-effort: silently skips if the file cannot be found or copied. fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) { let candidates = [ @@ -123,9 +129,7 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option< }; workdir.join(fname) }; - if !dst.exists() { - let _ = fs::copy(src, &dst); - } + let _ = fs::copy(src, &dst); return; } } diff --git a/src/dynamic/lang/javascript.rs b/src/dynamic/lang/javascript.rs index 92dae13c..c9d8ae89 100644 --- a/src/dynamic/lang/javascript.rs +++ b/src/dynamic/lang/javascript.rs @@ -135,21 +135,12 @@ fn build_call(spec: &HarnessSpec, _module: &str, func: &str) -> (String, String) /// Derive the JS module name from an entry file path. /// /// `"src/handlers/login.js"` → `"login"` (basename without extension). -pub fn entry_module_name(entry_file: &str) -> String { - let base = entry_file - .rsplit('/') - .next() - .unwrap_or(entry_file) - .rsplit('\\') - .next() - .unwrap_or(entry_file); - // Strip known JS/TS extensions. - for ext in &[".js", ".mjs", ".cjs", ".ts", ".mts"] { - if let Some(stem) = base.strip_suffix(ext) { - return stem.to_owned(); - } - } - base.to_owned() +pub fn entry_module_name(_entry_file: &str) -> String { + // The harness always `require('./entry')` because `entry_module_filename` + // unconditionally copies the source to `entry.js` in the workdir. Keeping + // these two helpers in sync prevents a "Cannot find module" import error + // when the fixture's on-disk filename is anything other than `entry.js`. + "entry".to_owned() } /// Derive the filename for `entry_subpath` from an entry file path. @@ -240,10 +231,14 @@ mod tests { } #[test] - fn entry_module_name_strips_extensions() { - assert_eq!(entry_module_name("src/handlers/login.js"), "login"); - assert_eq!(entry_module_name("app.ts"), "app"); - assert_eq!(entry_module_name("handler.mjs"), "handler"); - assert_eq!(entry_module_name("no_ext"), "no_ext"); + fn entry_module_name_is_always_entry_to_match_copy_destination() { + // `copy_entry_file` (via `entry_module_filename`) stages every fixture + // at `workdir/entry.js`, so `require('./entry')` is the only path the + // harness can use without missing-module errors at runtime, regardless + // of the source file's original name. + assert_eq!(entry_module_name("src/handlers/login.js"), "entry"); + assert_eq!(entry_module_name("app.ts"), "entry"); + assert_eq!(entry_module_name("handler.mjs"), "entry"); + assert_eq!(entry_module_name("no_ext"), "entry"); } } diff --git a/src/dynamic/lang/rust.rs b/src/dynamic/lang/rust.rs index aed4e14c..db2e80c3 100644 --- a/src/dynamic/lang/rust.rs +++ b/src/dynamic/lang/rust.rs @@ -106,10 +106,14 @@ fn nyx_payload() -> String {{ /// Minimal base64 decoder (no external deps). fn b64_decode(input: &[u8]) -> Option> {{ const TABLE: [u8; 128] = {{ + // `while` loop (not `for`) so the initializer stays inside what stable + // Rust permits in a `const` context: `IntoIterator::into_iter` is not a + // const fn, so a `for` loop here fails with E0015. let mut t = [255u8; 128]; - let mut i = 0u8; - for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" {{ - t[c as usize] = i; + let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut i = 0usize; + while i < alphabet.len() {{ + t[alphabet[i] as usize] = i as u8; i += 1; }} t diff --git a/src/dynamic/sandbox.rs b/src/dynamic/sandbox.rs index b542134f..992254bc 100644 --- a/src/dynamic/sandbox.rs +++ b/src/dynamic/sandbox.rs @@ -36,6 +36,26 @@ use std::time::{Duration, Instant}; /// Interpreted harnesses can be run inside a Python/Node Docker image directly. /// Compiled harnesses (Rust, Go) are routed to `run_native_binary_docker` on /// Linux or to the process backend on other platforms. +/// Resolve a bare command name to an absolute path by walking the host's +/// `PATH`. Returns `None` if `PATH` is unset or the name is not present in +/// any entry as a regular file. +/// +/// Used by `run_process` so spawn(2) succeeds even after the child +/// environment has been wiped: macOS' `posix_spawnp` defaults to +/// `confstr(_CS_PATH)` (`/usr/bin:/bin`) when the child has no `PATH`, which +/// misses common installs like Homebrew's `/opt/homebrew/bin/node` or +/// `nvm`-managed binaries under `~/.nvm/...`. +fn find_in_host_path(name: &str) -> Option { + let path = std::env::var_os("PATH")?; + for dir in std::env::split_paths(&path) { + let candidate = dir.join(name); + if candidate.is_file() { + return Some(candidate); + } + } + None +} + pub fn harness_is_interpreted(command: &[String]) -> bool { let cmd0 = match command.first() { Some(c) => c.as_str(), @@ -975,7 +995,19 @@ fn run_process( )) })?; - let mut cmd = Command::new(cmd_name); + // Resolve a bare interpreter name against the *host* PATH so the spawn + // works even when the child env has been scrubbed (env_clear strips PATH, + // so posix_spawnp falls back to confstr(_CS_PATH) which is typically just + // `/usr/bin:/bin` on macOS — node/cargo/etc. installed via Homebrew or nvm + // are not on that path and would otherwise yield `Spawn(NotFound)`). + // Absolute commands pass through unchanged. + let resolved_cmd_path = if std::path::Path::new(cmd_name).is_absolute() { + std::path::PathBuf::from(cmd_name) + } else { + find_in_host_path(cmd_name).unwrap_or_else(|| std::path::PathBuf::from(cmd_name)) + }; + + let mut cmd = Command::new(&resolved_cmd_path); cmd.args(&harness.command[1..]); cmd.current_dir(&harness.workdir); cmd.stdout(Stdio::piped()); diff --git a/src/dynamic/telemetry.rs b/src/dynamic/telemetry.rs index ada290f7..f30a4aa1 100644 --- a/src/dynamic/telemetry.rs +++ b/src/dynamic/telemetry.rs @@ -19,14 +19,21 @@ //! } //! ``` +use crate::commands::scan::Diag; use crate::dynamic::spec::HarnessSpec; use crate::evidence::{InconclusiveReason, VerifyStatus}; use directories::ProjectDirs; use std::fs::{self, OpenOptions}; use std::io::Write; +use std::path::Path; use std::time::Duration; /// One telemetry event per verdict. +/// +/// `lang` is `"unknown"` for findings whose language could not be resolved +/// (e.g. spec derivation failed before `HarnessSpec::lang` was set). Counting +/// these is the `lang_unknown_count` Phase 02 acceptance asks for: +/// `grep '"lang":"unknown"' events.jsonl | wc -l`. #[derive(Debug, serde::Serialize)] pub struct TelemetryEvent { pub ts: String, @@ -41,6 +48,12 @@ pub struct TelemetryEvent { pub build_attempts: u32, #[serde(skip_serializing_if = "Option::is_none")] pub inconclusive_reason: Option, + /// Path of the finding's source file, populated for spec-derivation + /// failures so downstream consumers can map `lang="unknown"` events back + /// to a file. Skipped on successful verdicts (the spec already carries + /// `entry_file`). + #[serde(skip_serializing_if = "Option::is_none")] + pub path: Option, } impl TelemetryEvent { @@ -64,6 +77,49 @@ impl TelemetryEvent { duration_ms: duration.as_millis() as u64, build_attempts, inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")), + path: None, + } + } + + /// Telemetry event for findings that never got a `HarnessSpec`. + /// + /// Used by `verify_finding` when spec derivation fails (lang unresolvable, + /// path empty, sink redacted, etc.). Without this path the events log + /// silently drops every spec-derivation failure, which breaks the Phase 02 + /// `lang_unknown_count` aggregation acceptance. + /// + /// `lang` is best-effort sniffed from `diag.path`'s extension via + /// [`crate::symbol::Lang::from_extension`]. When the extension is + /// unknown or absent, `lang` is the literal string `"unknown"`. + pub fn no_spec( + diag: &Diag, + status: VerifyStatus, + inconclusive_reason: Option, + ) -> Self { + let lang = Path::new(&diag.path) + .extension() + .and_then(|e| e.to_str()) + .and_then(crate::symbol::Lang::from_extension) + .map(|l| l.as_str().to_owned()) + .unwrap_or_else(|| "unknown".to_owned()); + let cap = diag + .evidence + .as_ref() + .map(|e| format!("{:?}", e.sink_caps)) + .unwrap_or_else(|| "0".to_owned()); + Self { + ts: chrono::Utc::now().to_rfc3339(), + finding_id: format!("{:016x}", diag.stable_hash), + spec_hash: String::new(), + lang, + cap, + status: format!("{status:?}"), + toolchain_id: String::new(), + toolchain_match: String::new(), + duration_ms: 0, + build_attempts: 0, + inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")), + path: Some(diag.path.clone()), } } } @@ -220,6 +276,49 @@ mod tests { unsafe { std::env::remove_var("NYX_TELEMETRY_PATH") }; } + fn make_diag(path: &str) -> Diag { + Diag { + stable_hash: 0xdeadbeef_cafebabe, + path: path.to_owned(), + ..Default::default() + } + } + + #[test] + fn no_spec_event_records_lang_unknown_for_missing_extension() { + let diag = make_diag("/tmp/some_script_no_ext"); + let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Unsupported, None); + assert_eq!(event.lang, "unknown"); + assert_eq!(event.path.as_deref(), Some("/tmp/some_script_no_ext")); + assert!(event.spec_hash.is_empty()); + assert_eq!(event.status, "Unsupported"); + } + + #[test] + fn no_spec_event_sniffs_lang_from_extension_when_present() { + let diag = make_diag("/tmp/handler.py"); + let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, None); + assert_eq!(event.lang, "python"); + assert_eq!(event.path.as_deref(), Some("/tmp/handler.py")); + assert!(event.spec_hash.is_empty()); + } + + #[test] + fn no_spec_event_serialises_inconclusive_reason() { + use crate::evidence::SpecDerivationStrategy; + let diag = make_diag("/tmp/x.kt"); + let reason = InconclusiveReason::SpecDerivationFailed { + tried: vec![SpecDerivationStrategy::FromFlowSteps], + hint: "kotlin source".to_owned(), + }; + let event = + TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, Some(reason)); + let json = serde_json::to_string(&event).unwrap(); + assert!(json.contains("\"lang\":\"java\"")); + assert!(json.contains("SpecDerivationFailed")); + assert!(json.contains("\"path\":\"/tmp/x.kt\"")); + } + #[test] fn nyx_no_telemetry_suppresses_writes() { let dir = TempDir::new().unwrap(); diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index a4dfad1b..f822d5ea 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -185,21 +185,31 @@ fn spec_derivation_failed_verdict( let strategies: Vec = HarnessSpec::derivation_strategies().to_vec(); let hint = derivation_failure_hint(diag); + let inconclusive_reason = InconclusiveReason::SpecDerivationFailed { + tried: strategies, + hint, + }; + let event = TelemetryEvent::no_spec( + diag, + VerifyStatus::Inconclusive, + Some(inconclusive_reason.clone()), + ); + telemetry::emit(&event); return VerifyResult { finding_id, status: VerifyStatus::Inconclusive, triggered_payload: None, reason: None, - inconclusive_reason: Some(InconclusiveReason::SpecDerivationFailed { - tried: strategies, - hint, - }), + inconclusive_reason: Some(inconclusive_reason), detail: None, attempts: vec![], toolchain_match: None, }; } + let event = TelemetryEvent::no_spec(diag, VerifyStatus::Unsupported, None); + telemetry::emit(&event); + VerifyResult { finding_id, status: VerifyStatus::Unsupported, diff --git a/tests/dynamic_fixtures/java/cmdi_negative.java b/tests/dynamic_fixtures/java/cmdi_negative.java index 46819849..6f219bdc 100644 --- a/tests/dynamic_fixtures/java/cmdi_negative.java +++ b/tests/dynamic_fixtures/java/cmdi_negative.java @@ -1,14 +1,21 @@ // Command injection — negative fixture. -// Safe: exec with args array; no shell; semicolons are inert. +// Safe: exec with args array; no shell; injected metacharacters are inert. // Entry: Entry.runPing(String) Cap: CODE_EXEC // Expected verdict: NotConfirmed +// +// `id` ignores extra positional args (treats them as usernames it can't find +// and writes the "no such user" error to stderr, not stdout). Switching from +// `echo` keeps the array-exec demonstration intact while ensuring the +// vuln-payload marker can never leak into the stdout stream the oracle reads. import java.io.*; public class Entry { public static void runPing(String host) throws Exception { + // Sink-reachability probe: we did reach the exec call site. + System.out.print("__NYX_SINK_HIT__\n"); // Array form: each element is a literal argument — no shell expansion. - String[] cmd = {"echo", "hello", host}; + String[] cmd = {"id", host}; Process p = Runtime.getRuntime().exec(cmd); BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream())); String line; diff --git a/tests/dynamic_fixtures/java/fileio_negative.java b/tests/dynamic_fixtures/java/fileio_negative.java index c3bd1e6d..ae2f8668 100644 --- a/tests/dynamic_fixtures/java/fileio_negative.java +++ b/tests/dynamic_fixtures/java/fileio_negative.java @@ -7,7 +7,10 @@ import java.io.*; import java.nio.file.*; public class Entry { - private static final String BASE_DIR = "/var/data"; + // `/tmp` exists on Linux and macOS so `toRealPath()` resolves cleanly on + // both. The traversal payload still escapes the base (which is the point + // of the safe-path check) so the verdict stays NotConfirmed. + private static final String BASE_DIR = "/tmp"; public static void readFile(String userPath) throws Exception { Path base = Paths.get(BASE_DIR).toRealPath(); diff --git a/tests/dynamic_fixtures/java/fileio_positive.java b/tests/dynamic_fixtures/java/fileio_positive.java index 5a99f95a..4deab781 100644 --- a/tests/dynamic_fixtures/java/fileio_positive.java +++ b/tests/dynamic_fixtures/java/fileio_positive.java @@ -2,13 +2,17 @@ // Vulnerable: reads file at user-controlled path without sanitization. // Entry: Entry.readFile(String) Cap: FILE_IO // Expected verdict: Confirmed (../../../../etc/passwd → "root:" in output) +// +// Base directory is `/tmp` rather than `/var/data` so the harness can resolve +// the traversal payload on both Linux and macOS hosts (`/var/data` is absent +// on macOS, which makes `Files.readAllBytes` throw before reaching the file). import java.io.*; import java.nio.file.*; public class Entry { public static void readFile(String userPath) throws Exception { - Path filePath = Paths.get("/var/data", userPath); + Path filePath = Paths.get("/tmp", userPath); System.out.print("__NYX_SINK_HIT__\n"); try { String content = new String(Files.readAllBytes(filePath)); diff --git a/tests/dynamic_fixtures/js/cmdi_negative.js b/tests/dynamic_fixtures/js/cmdi_negative.js index 3b30e75b..cb5fa30a 100644 --- a/tests/dynamic_fixtures/js/cmdi_negative.js +++ b/tests/dynamic_fixtures/js/cmdi_negative.js @@ -2,16 +2,29 @@ // Safe: uses execFile (no shell) with args array; shell metacharacters ignored. // Entry: runPing(host) Cap: CODE_EXEC // Expected verdict: NotConfirmed +// +// The host string is handed to `true` so it can never appear on stdout (which +// would let an oracle scanning for the vuln-payload marker fire even though +// no command was executed by a shell). Sub-process stderr is silenced with +// `stdio: 'ignore'` so the same logic holds for `outcome.stderr` — Node's +// default of inheriting child stderr to the parent stream lets `execFile` +// error messages leak the host string otherwise. const { execFileSync } = require('child_process'); function runPing(host) { + // Sink-reachability probe: we did reach the execFile call site. + process.stdout.write('__NYX_SINK_HIT__\n'); // execFile does not invoke a shell — semicolons and metacharacters are inert. try { - const out = execFileSync('echo', ['hello', host], { encoding: 'utf8', timeout: 5000 }); + const out = execFileSync('true', [host], { + encoding: 'utf8', + timeout: 5000, + stdio: ['ignore', 'pipe', 'ignore'], + }); process.stdout.write(out); } catch (e) { - process.stdout.write('error\n'); + // true exits 0 with no output; the catch is defensive. } } diff --git a/tests/dynamic_fixtures/rust/fileio_positive2.rs b/tests/dynamic_fixtures/rust/fileio_positive2.rs index 1aa4b150..024956c8 100644 --- a/tests/dynamic_fixtures/rust/fileio_positive2.rs +++ b/tests/dynamic_fixtures/rust/fileio_positive2.rs @@ -8,7 +8,10 @@ pub fn run(payload: &str) { use std::io::Read; // Vulnerable: path joins base with user input without canonicalization. - let path = format!("/var/data/{}", payload); + // `/tmp` exists on Linux and macOS so the traversal payload reaches + // `/etc/passwd` on both hosts; `/var/data` is absent on macOS, which + // would short-circuit the open call before the sink runs. + let path = format!("/tmp/{}", payload); println!("__NYX_SINK_HIT__"); let _ = std::io::Write::flush(&mut std::io::stdout()); diff --git a/tests/dynamic_fixtures/rust/sqli_positive.rs b/tests/dynamic_fixtures/rust/sqli_positive.rs index 667403aa..020fdf12 100644 --- a/tests/dynamic_fixtures/rust/sqli_positive.rs +++ b/tests/dynamic_fixtures/rust/sqli_positive.rs @@ -21,7 +21,11 @@ pub fn run(payload: &str) { println!("__NYX_SINK_HIT__"); let _ = std::io::Write::flush(&mut std::io::stdout()); - match conn.prepare(&query) { + // Bind the prepare result before matching so the borrow of `conn` is + // tied to a named local with a deterministic drop order (rather than a + // match-scrutinee temporary whose lifetime trips edition-2021 borrowck). + let prepared = conn.prepare(&query); + match prepared { Ok(mut stmt) => { let _ = stmt.query_map([], |row| row.get::<_, String>(0)).map(|rows| { for name in rows.flatten() {