refactor(dynamic): enhance path resolution, telemetry, and file handling for better compatibility and clarity

This commit is contained in:
elipeter 2026-05-14 02:37:01 -05:00
parent 8abb023dd0
commit 8211d4fd47
12 changed files with 217 additions and 39 deletions

View file

@ -102,6 +102,12 @@ fn stage_harness(
/// - `None` → `workdir/{filename}` (Python default: import by module name).
/// - `Some("src/entry.rs")` → `workdir/src/entry.rs` (Rust: `mod entry;`).
///
/// Always overwrites the destination so the per-language build hash
/// (`compute_*_source_hash`) reflects the current on-disk source. Leaving a
/// stale destination in place would let the build cache return class files
/// built from a previous fixture revision even after the source on disk has
/// changed.
///
/// Best-effort: silently skips if the file cannot be found or copied.
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) {
let candidates = [
@ -123,9 +129,7 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<
};
workdir.join(fname)
};
if !dst.exists() {
let _ = fs::copy(src, &dst);
}
let _ = fs::copy(src, &dst);
return;
}
}

View file

@ -135,21 +135,12 @@ fn build_call(spec: &HarnessSpec, _module: &str, func: &str) -> (String, String)
/// Derive the JS module name from an entry file path.
///
/// `"src/handlers/login.js"` → `"login"` (basename without extension).
pub fn entry_module_name(entry_file: &str) -> String {
let base = entry_file
.rsplit('/')
.next()
.unwrap_or(entry_file)
.rsplit('\\')
.next()
.unwrap_or(entry_file);
// Strip known JS/TS extensions.
for ext in &[".js", ".mjs", ".cjs", ".ts", ".mts"] {
if let Some(stem) = base.strip_suffix(ext) {
return stem.to_owned();
}
}
base.to_owned()
pub fn entry_module_name(_entry_file: &str) -> String {
// The harness always `require('./entry')` because `entry_module_filename`
// unconditionally copies the source to `entry.js` in the workdir. Keeping
// these two helpers in sync prevents a "Cannot find module" import error
// when the fixture's on-disk filename is anything other than `entry.js`.
"entry".to_owned()
}
/// Derive the filename for `entry_subpath` from an entry file path.
@ -240,10 +231,14 @@ mod tests {
}
#[test]
fn entry_module_name_strips_extensions() {
assert_eq!(entry_module_name("src/handlers/login.js"), "login");
assert_eq!(entry_module_name("app.ts"), "app");
assert_eq!(entry_module_name("handler.mjs"), "handler");
assert_eq!(entry_module_name("no_ext"), "no_ext");
fn entry_module_name_is_always_entry_to_match_copy_destination() {
// `copy_entry_file` (via `entry_module_filename`) stages every fixture
// at `workdir/entry.js`, so `require('./entry')` is the only path the
// harness can use without missing-module errors at runtime, regardless
// of the source file's original name.
assert_eq!(entry_module_name("src/handlers/login.js"), "entry");
assert_eq!(entry_module_name("app.ts"), "entry");
assert_eq!(entry_module_name("handler.mjs"), "entry");
assert_eq!(entry_module_name("no_ext"), "entry");
}
}

View file

@ -106,10 +106,14 @@ fn nyx_payload() -> String {{
/// Minimal base64 decoder (no external deps).
fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
const TABLE: [u8; 128] = {{
// `while` loop (not `for`) so the initializer stays inside what stable
// Rust permits in a `const` context: `IntoIterator::into_iter` is not a
// const fn, so a `for` loop here fails with E0015.
let mut t = [255u8; 128];
let mut i = 0u8;
for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" {{
t[c as usize] = i;
let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut i = 0usize;
while i < alphabet.len() {{
t[alphabet[i] as usize] = i as u8;
i += 1;
}}
t

View file

@ -36,6 +36,26 @@ use std::time::{Duration, Instant};
/// Interpreted harnesses can be run inside a Python/Node Docker image directly.
/// Compiled harnesses (Rust, Go) are routed to `run_native_binary_docker` on
/// Linux or to the process backend on other platforms.
/// Resolve a bare command name to an absolute path by walking the host's
/// `PATH`. Returns `None` if `PATH` is unset or the name is not present in
/// any entry as a regular file.
///
/// Used by `run_process` so spawn(2) succeeds even after the child
/// environment has been wiped: macOS' `posix_spawnp` defaults to
/// `confstr(_CS_PATH)` (`/usr/bin:/bin`) when the child has no `PATH`, which
/// misses common installs like Homebrew's `/opt/homebrew/bin/node` or
/// `nvm`-managed binaries under `~/.nvm/...`.
fn find_in_host_path(name: &str) -> Option<std::path::PathBuf> {
let path = std::env::var_os("PATH")?;
for dir in std::env::split_paths(&path) {
let candidate = dir.join(name);
if candidate.is_file() {
return Some(candidate);
}
}
None
}
pub fn harness_is_interpreted(command: &[String]) -> bool {
let cmd0 = match command.first() {
Some(c) => c.as_str(),
@ -975,7 +995,19 @@ fn run_process(
))
})?;
let mut cmd = Command::new(cmd_name);
// Resolve a bare interpreter name against the *host* PATH so the spawn
// works even when the child env has been scrubbed (env_clear strips PATH,
// so posix_spawnp falls back to confstr(_CS_PATH) which is typically just
// `/usr/bin:/bin` on macOS — node/cargo/etc. installed via Homebrew or nvm
// are not on that path and would otherwise yield `Spawn(NotFound)`).
// Absolute commands pass through unchanged.
let resolved_cmd_path = if std::path::Path::new(cmd_name).is_absolute() {
std::path::PathBuf::from(cmd_name)
} else {
find_in_host_path(cmd_name).unwrap_or_else(|| std::path::PathBuf::from(cmd_name))
};
let mut cmd = Command::new(&resolved_cmd_path);
cmd.args(&harness.command[1..]);
cmd.current_dir(&harness.workdir);
cmd.stdout(Stdio::piped());

View file

@ -19,14 +19,21 @@
//! }
//! ```
use crate::commands::scan::Diag;
use crate::dynamic::spec::HarnessSpec;
use crate::evidence::{InconclusiveReason, VerifyStatus};
use directories::ProjectDirs;
use std::fs::{self, OpenOptions};
use std::io::Write;
use std::path::Path;
use std::time::Duration;
/// One telemetry event per verdict.
///
/// `lang` is `"unknown"` for findings whose language could not be resolved
/// (e.g. spec derivation failed before `HarnessSpec::lang` was set). Counting
/// these is the `lang_unknown_count` Phase 02 acceptance asks for:
/// `grep '"lang":"unknown"' events.jsonl | wc -l`.
#[derive(Debug, serde::Serialize)]
pub struct TelemetryEvent {
pub ts: String,
@ -41,6 +48,12 @@ pub struct TelemetryEvent {
pub build_attempts: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub inconclusive_reason: Option<String>,
/// Path of the finding's source file, populated for spec-derivation
/// failures so downstream consumers can map `lang="unknown"` events back
/// to a file. Skipped on successful verdicts (the spec already carries
/// `entry_file`).
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
}
impl TelemetryEvent {
@ -64,6 +77,49 @@ impl TelemetryEvent {
duration_ms: duration.as_millis() as u64,
build_attempts,
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
path: None,
}
}
/// Telemetry event for findings that never got a `HarnessSpec`.
///
/// Used by `verify_finding` when spec derivation fails (lang unresolvable,
/// path empty, sink redacted, etc.). Without this path the events log
/// silently drops every spec-derivation failure, which breaks the Phase 02
/// `lang_unknown_count` aggregation acceptance.
///
/// `lang` is best-effort sniffed from `diag.path`'s extension via
/// [`crate::symbol::Lang::from_extension`]. When the extension is
/// unknown or absent, `lang` is the literal string `"unknown"`.
pub fn no_spec(
diag: &Diag,
status: VerifyStatus,
inconclusive_reason: Option<InconclusiveReason>,
) -> Self {
let lang = Path::new(&diag.path)
.extension()
.and_then(|e| e.to_str())
.and_then(crate::symbol::Lang::from_extension)
.map(|l| l.as_str().to_owned())
.unwrap_or_else(|| "unknown".to_owned());
let cap = diag
.evidence
.as_ref()
.map(|e| format!("{:?}", e.sink_caps))
.unwrap_or_else(|| "0".to_owned());
Self {
ts: chrono::Utc::now().to_rfc3339(),
finding_id: format!("{:016x}", diag.stable_hash),
spec_hash: String::new(),
lang,
cap,
status: format!("{status:?}"),
toolchain_id: String::new(),
toolchain_match: String::new(),
duration_ms: 0,
build_attempts: 0,
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
path: Some(diag.path.clone()),
}
}
}
@ -220,6 +276,49 @@ mod tests {
unsafe { std::env::remove_var("NYX_TELEMETRY_PATH") };
}
fn make_diag(path: &str) -> Diag {
Diag {
stable_hash: 0xdeadbeef_cafebabe,
path: path.to_owned(),
..Default::default()
}
}
#[test]
fn no_spec_event_records_lang_unknown_for_missing_extension() {
let diag = make_diag("/tmp/some_script_no_ext");
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Unsupported, None);
assert_eq!(event.lang, "unknown");
assert_eq!(event.path.as_deref(), Some("/tmp/some_script_no_ext"));
assert!(event.spec_hash.is_empty());
assert_eq!(event.status, "Unsupported");
}
#[test]
fn no_spec_event_sniffs_lang_from_extension_when_present() {
let diag = make_diag("/tmp/handler.py");
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, None);
assert_eq!(event.lang, "python");
assert_eq!(event.path.as_deref(), Some("/tmp/handler.py"));
assert!(event.spec_hash.is_empty());
}
#[test]
fn no_spec_event_serialises_inconclusive_reason() {
use crate::evidence::SpecDerivationStrategy;
let diag = make_diag("/tmp/x.kt");
let reason = InconclusiveReason::SpecDerivationFailed {
tried: vec![SpecDerivationStrategy::FromFlowSteps],
hint: "kotlin source".to_owned(),
};
let event =
TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, Some(reason));
let json = serde_json::to_string(&event).unwrap();
assert!(json.contains("\"lang\":\"java\""));
assert!(json.contains("SpecDerivationFailed"));
assert!(json.contains("\"path\":\"/tmp/x.kt\""));
}
#[test]
fn nyx_no_telemetry_suppresses_writes() {
let dir = TempDir::new().unwrap();

View file

@ -185,21 +185,31 @@ fn spec_derivation_failed_verdict(
let strategies: Vec<SpecDerivationStrategy> =
HarnessSpec::derivation_strategies().to_vec();
let hint = derivation_failure_hint(diag);
let inconclusive_reason = InconclusiveReason::SpecDerivationFailed {
tried: strategies,
hint,
};
let event = TelemetryEvent::no_spec(
diag,
VerifyStatus::Inconclusive,
Some(inconclusive_reason.clone()),
);
telemetry::emit(&event);
return VerifyResult {
finding_id,
status: VerifyStatus::Inconclusive,
triggered_payload: None,
reason: None,
inconclusive_reason: Some(InconclusiveReason::SpecDerivationFailed {
tried: strategies,
hint,
}),
inconclusive_reason: Some(inconclusive_reason),
detail: None,
attempts: vec![],
toolchain_match: None,
};
}
let event = TelemetryEvent::no_spec(diag, VerifyStatus::Unsupported, None);
telemetry::emit(&event);
VerifyResult {
finding_id,
status: VerifyStatus::Unsupported,

View file

@ -1,14 +1,21 @@
// Command injection negative fixture.
// Safe: exec with args array; no shell; semicolons are inert.
// Safe: exec with args array; no shell; injected metacharacters are inert.
// Entry: Entry.runPing(String) Cap: CODE_EXEC
// Expected verdict: NotConfirmed
//
// `id` ignores extra positional args (treats them as usernames it can't find
// and writes the "no such user" error to stderr, not stdout). Switching from
// `echo` keeps the array-exec demonstration intact while ensuring the
// vuln-payload marker can never leak into the stdout stream the oracle reads.
import java.io.*;
public class Entry {
public static void runPing(String host) throws Exception {
// Sink-reachability probe: we did reach the exec call site.
System.out.print("__NYX_SINK_HIT__\n");
// Array form: each element is a literal argument no shell expansion.
String[] cmd = {"echo", "hello", host};
String[] cmd = {"id", host};
Process p = Runtime.getRuntime().exec(cmd);
BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()));
String line;

View file

@ -7,7 +7,10 @@ import java.io.*;
import java.nio.file.*;
public class Entry {
private static final String BASE_DIR = "/var/data";
// `/tmp` exists on Linux and macOS so `toRealPath()` resolves cleanly on
// both. The traversal payload still escapes the base (which is the point
// of the safe-path check) so the verdict stays NotConfirmed.
private static final String BASE_DIR = "/tmp";
public static void readFile(String userPath) throws Exception {
Path base = Paths.get(BASE_DIR).toRealPath();

View file

@ -2,13 +2,17 @@
// Vulnerable: reads file at user-controlled path without sanitization.
// Entry: Entry.readFile(String) Cap: FILE_IO
// Expected verdict: Confirmed (../../../../etc/passwd "root:" in output)
//
// Base directory is `/tmp` rather than `/var/data` so the harness can resolve
// the traversal payload on both Linux and macOS hosts (`/var/data` is absent
// on macOS, which makes `Files.readAllBytes` throw before reaching the file).
import java.io.*;
import java.nio.file.*;
public class Entry {
public static void readFile(String userPath) throws Exception {
Path filePath = Paths.get("/var/data", userPath);
Path filePath = Paths.get("/tmp", userPath);
System.out.print("__NYX_SINK_HIT__\n");
try {
String content = new String(Files.readAllBytes(filePath));

View file

@ -2,16 +2,29 @@
// Safe: uses execFile (no shell) with args array; shell metacharacters ignored.
// Entry: runPing(host) Cap: CODE_EXEC
// Expected verdict: NotConfirmed
//
// The host string is handed to `true` so it can never appear on stdout (which
// would let an oracle scanning for the vuln-payload marker fire even though
// no command was executed by a shell). Sub-process stderr is silenced with
// `stdio: 'ignore'` so the same logic holds for `outcome.stderr` — Node's
// default of inheriting child stderr to the parent stream lets `execFile`
// error messages leak the host string otherwise.
const { execFileSync } = require('child_process');
function runPing(host) {
// Sink-reachability probe: we did reach the execFile call site.
process.stdout.write('__NYX_SINK_HIT__\n');
// execFile does not invoke a shell — semicolons and metacharacters are inert.
try {
const out = execFileSync('echo', ['hello', host], { encoding: 'utf8', timeout: 5000 });
const out = execFileSync('true', [host], {
encoding: 'utf8',
timeout: 5000,
stdio: ['ignore', 'pipe', 'ignore'],
});
process.stdout.write(out);
} catch (e) {
process.stdout.write('error\n');
// true exits 0 with no output; the catch is defensive.
}
}

View file

@ -8,7 +8,10 @@ pub fn run(payload: &str) {
use std::io::Read;
// Vulnerable: path joins base with user input without canonicalization.
let path = format!("/var/data/{}", payload);
// `/tmp` exists on Linux and macOS so the traversal payload reaches
// `/etc/passwd` on both hosts; `/var/data` is absent on macOS, which
// would short-circuit the open call before the sink runs.
let path = format!("/tmp/{}", payload);
println!("__NYX_SINK_HIT__");
let _ = std::io::Write::flush(&mut std::io::stdout());

View file

@ -21,7 +21,11 @@ pub fn run(payload: &str) {
println!("__NYX_SINK_HIT__");
let _ = std::io::Write::flush(&mut std::io::stdout());
match conn.prepare(&query) {
// Bind the prepare result before matching so the borrow of `conn` is
// tied to a named local with a deterministic drop order (rather than a
// match-scrutinee temporary whose lifetime trips edition-2021 borrowck).
let prepared = conn.prepare(&query);
match prepared {
Ok(mut stmt) => {
let _ = stmt.query_map([], |row| row.get::<_, String>(0)).map(|rows| {
for name in rows.flatten() {