refactor(dynamic): enhance path resolution, telemetry, and file handling for better compatibility and clarity

This commit is contained in:
elipeter 2026-05-14 02:37:01 -05:00
parent 8abb023dd0
commit 8211d4fd47
12 changed files with 217 additions and 39 deletions

View file

@ -102,6 +102,12 @@ fn stage_harness(
/// - `None` → `workdir/{filename}` (Python default: import by module name).
/// - `Some("src/entry.rs")` → `workdir/src/entry.rs` (Rust: `mod entry;`).
///
/// Always overwrites the destination so the per-language build hash
/// (`compute_*_source_hash`) reflects the current on-disk source. Leaving a
/// stale destination in place would let the build cache return class files
/// built from a previous fixture revision even after the source on disk has
/// changed.
///
/// Best-effort: silently skips if the file cannot be found or copied.
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) {
let candidates = [
@ -123,9 +129,7 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<
};
workdir.join(fname)
};
if !dst.exists() {
let _ = fs::copy(src, &dst);
}
let _ = fs::copy(src, &dst);
return;
}
}

View file

@ -135,21 +135,12 @@ fn build_call(spec: &HarnessSpec, _module: &str, func: &str) -> (String, String)
/// Derive the JS module name from an entry file path.
///
/// `"src/handlers/login.js"` → `"login"` (basename without extension).
pub fn entry_module_name(entry_file: &str) -> String {
let base = entry_file
.rsplit('/')
.next()
.unwrap_or(entry_file)
.rsplit('\\')
.next()
.unwrap_or(entry_file);
// Strip known JS/TS extensions.
for ext in &[".js", ".mjs", ".cjs", ".ts", ".mts"] {
if let Some(stem) = base.strip_suffix(ext) {
return stem.to_owned();
}
}
base.to_owned()
pub fn entry_module_name(_entry_file: &str) -> String {
// The harness always `require('./entry')` because `entry_module_filename`
// unconditionally copies the source to `entry.js` in the workdir. Keeping
// these two helpers in sync prevents a "Cannot find module" import error
// when the fixture's on-disk filename is anything other than `entry.js`.
"entry".to_owned()
}
/// Derive the filename for `entry_subpath` from an entry file path.
@ -240,10 +231,14 @@ mod tests {
}
#[test]
fn entry_module_name_strips_extensions() {
assert_eq!(entry_module_name("src/handlers/login.js"), "login");
assert_eq!(entry_module_name("app.ts"), "app");
assert_eq!(entry_module_name("handler.mjs"), "handler");
assert_eq!(entry_module_name("no_ext"), "no_ext");
fn entry_module_name_is_always_entry_to_match_copy_destination() {
// `copy_entry_file` (via `entry_module_filename`) stages every fixture
// at `workdir/entry.js`, so `require('./entry')` is the only path the
// harness can use without missing-module errors at runtime, regardless
// of the source file's original name.
assert_eq!(entry_module_name("src/handlers/login.js"), "entry");
assert_eq!(entry_module_name("app.ts"), "entry");
assert_eq!(entry_module_name("handler.mjs"), "entry");
assert_eq!(entry_module_name("no_ext"), "entry");
}
}

View file

@ -106,10 +106,14 @@ fn nyx_payload() -> String {{
/// Minimal base64 decoder (no external deps).
fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
const TABLE: [u8; 128] = {{
// `while` loop (not `for`) so the initializer stays inside what stable
// Rust permits in a `const` context: `IntoIterator::into_iter` is not a
// const fn, so a `for` loop here fails with E0015.
let mut t = [255u8; 128];
let mut i = 0u8;
for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" {{
t[c as usize] = i;
let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
let mut i = 0usize;
while i < alphabet.len() {{
t[alphabet[i] as usize] = i as u8;
i += 1;
}}
t

View file

@ -36,6 +36,26 @@ use std::time::{Duration, Instant};
/// Interpreted harnesses can be run inside a Python/Node Docker image directly.
/// Compiled harnesses (Rust, Go) are routed to `run_native_binary_docker` on
/// Linux or to the process backend on other platforms.
/// Resolve a bare command name to an absolute path by walking the host's
/// `PATH`. Returns `None` if `PATH` is unset or the name is not present in
/// any entry as a regular file.
///
/// Used by `run_process` so spawn(2) succeeds even after the child
/// environment has been wiped: macOS' `posix_spawnp` defaults to
/// `confstr(_CS_PATH)` (`/usr/bin:/bin`) when the child has no `PATH`, which
/// misses common installs like Homebrew's `/opt/homebrew/bin/node` or
/// `nvm`-managed binaries under `~/.nvm/...`.
fn find_in_host_path(name: &str) -> Option<std::path::PathBuf> {
let path = std::env::var_os("PATH")?;
for dir in std::env::split_paths(&path) {
let candidate = dir.join(name);
if candidate.is_file() {
return Some(candidate);
}
}
None
}
pub fn harness_is_interpreted(command: &[String]) -> bool {
let cmd0 = match command.first() {
Some(c) => c.as_str(),
@ -975,7 +995,19 @@ fn run_process(
))
})?;
let mut cmd = Command::new(cmd_name);
// Resolve a bare interpreter name against the *host* PATH so the spawn
// works even when the child env has been scrubbed (env_clear strips PATH,
// so posix_spawnp falls back to confstr(_CS_PATH) which is typically just
// `/usr/bin:/bin` on macOS — node/cargo/etc. installed via Homebrew or nvm
// are not on that path and would otherwise yield `Spawn(NotFound)`).
// Absolute commands pass through unchanged.
let resolved_cmd_path = if std::path::Path::new(cmd_name).is_absolute() {
std::path::PathBuf::from(cmd_name)
} else {
find_in_host_path(cmd_name).unwrap_or_else(|| std::path::PathBuf::from(cmd_name))
};
let mut cmd = Command::new(&resolved_cmd_path);
cmd.args(&harness.command[1..]);
cmd.current_dir(&harness.workdir);
cmd.stdout(Stdio::piped());

View file

@ -19,14 +19,21 @@
//! }
//! ```
use crate::commands::scan::Diag;
use crate::dynamic::spec::HarnessSpec;
use crate::evidence::{InconclusiveReason, VerifyStatus};
use directories::ProjectDirs;
use std::fs::{self, OpenOptions};
use std::io::Write;
use std::path::Path;
use std::time::Duration;
/// One telemetry event per verdict.
///
/// `lang` is `"unknown"` for findings whose language could not be resolved
/// (e.g. spec derivation failed before `HarnessSpec::lang` was set). Counting
/// these is the `lang_unknown_count` Phase 02 acceptance asks for:
/// `grep '"lang":"unknown"' events.jsonl | wc -l`.
#[derive(Debug, serde::Serialize)]
pub struct TelemetryEvent {
pub ts: String,
@ -41,6 +48,12 @@ pub struct TelemetryEvent {
pub build_attempts: u32,
#[serde(skip_serializing_if = "Option::is_none")]
pub inconclusive_reason: Option<String>,
/// Path of the finding's source file, populated for spec-derivation
/// failures so downstream consumers can map `lang="unknown"` events back
/// to a file. Skipped on successful verdicts (the spec already carries
/// `entry_file`).
#[serde(skip_serializing_if = "Option::is_none")]
pub path: Option<String>,
}
impl TelemetryEvent {
@ -64,6 +77,49 @@ impl TelemetryEvent {
duration_ms: duration.as_millis() as u64,
build_attempts,
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
path: None,
}
}
/// Telemetry event for findings that never got a `HarnessSpec`.
///
/// Used by `verify_finding` when spec derivation fails (lang unresolvable,
/// path empty, sink redacted, etc.). Without this path the events log
/// silently drops every spec-derivation failure, which breaks the Phase 02
/// `lang_unknown_count` aggregation acceptance.
///
/// `lang` is best-effort sniffed from `diag.path`'s extension via
/// [`crate::symbol::Lang::from_extension`]. When the extension is
/// unknown or absent, `lang` is the literal string `"unknown"`.
pub fn no_spec(
diag: &Diag,
status: VerifyStatus,
inconclusive_reason: Option<InconclusiveReason>,
) -> Self {
let lang = Path::new(&diag.path)
.extension()
.and_then(|e| e.to_str())
.and_then(crate::symbol::Lang::from_extension)
.map(|l| l.as_str().to_owned())
.unwrap_or_else(|| "unknown".to_owned());
let cap = diag
.evidence
.as_ref()
.map(|e| format!("{:?}", e.sink_caps))
.unwrap_or_else(|| "0".to_owned());
Self {
ts: chrono::Utc::now().to_rfc3339(),
finding_id: format!("{:016x}", diag.stable_hash),
spec_hash: String::new(),
lang,
cap,
status: format!("{status:?}"),
toolchain_id: String::new(),
toolchain_match: String::new(),
duration_ms: 0,
build_attempts: 0,
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
path: Some(diag.path.clone()),
}
}
}
@ -220,6 +276,49 @@ mod tests {
unsafe { std::env::remove_var("NYX_TELEMETRY_PATH") };
}
fn make_diag(path: &str) -> Diag {
Diag {
stable_hash: 0xdeadbeef_cafebabe,
path: path.to_owned(),
..Default::default()
}
}
#[test]
fn no_spec_event_records_lang_unknown_for_missing_extension() {
let diag = make_diag("/tmp/some_script_no_ext");
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Unsupported, None);
assert_eq!(event.lang, "unknown");
assert_eq!(event.path.as_deref(), Some("/tmp/some_script_no_ext"));
assert!(event.spec_hash.is_empty());
assert_eq!(event.status, "Unsupported");
}
#[test]
fn no_spec_event_sniffs_lang_from_extension_when_present() {
let diag = make_diag("/tmp/handler.py");
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, None);
assert_eq!(event.lang, "python");
assert_eq!(event.path.as_deref(), Some("/tmp/handler.py"));
assert!(event.spec_hash.is_empty());
}
#[test]
fn no_spec_event_serialises_inconclusive_reason() {
use crate::evidence::SpecDerivationStrategy;
let diag = make_diag("/tmp/x.kt");
let reason = InconclusiveReason::SpecDerivationFailed {
tried: vec![SpecDerivationStrategy::FromFlowSteps],
hint: "kotlin source".to_owned(),
};
let event =
TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, Some(reason));
let json = serde_json::to_string(&event).unwrap();
assert!(json.contains("\"lang\":\"java\""));
assert!(json.contains("SpecDerivationFailed"));
assert!(json.contains("\"path\":\"/tmp/x.kt\""));
}
#[test]
fn nyx_no_telemetry_suppresses_writes() {
let dir = TempDir::new().unwrap();

View file

@ -185,21 +185,31 @@ fn spec_derivation_failed_verdict(
let strategies: Vec<SpecDerivationStrategy> =
HarnessSpec::derivation_strategies().to_vec();
let hint = derivation_failure_hint(diag);
let inconclusive_reason = InconclusiveReason::SpecDerivationFailed {
tried: strategies,
hint,
};
let event = TelemetryEvent::no_spec(
diag,
VerifyStatus::Inconclusive,
Some(inconclusive_reason.clone()),
);
telemetry::emit(&event);
return VerifyResult {
finding_id,
status: VerifyStatus::Inconclusive,
triggered_payload: None,
reason: None,
inconclusive_reason: Some(InconclusiveReason::SpecDerivationFailed {
tried: strategies,
hint,
}),
inconclusive_reason: Some(inconclusive_reason),
detail: None,
attempts: vec![],
toolchain_match: None,
};
}
let event = TelemetryEvent::no_spec(diag, VerifyStatus::Unsupported, None);
telemetry::emit(&event);
VerifyResult {
finding_id,
status: VerifyStatus::Unsupported,