mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
refactor(dynamic): enhance path resolution, telemetry, and file handling for better compatibility and clarity
This commit is contained in:
parent
8abb023dd0
commit
8211d4fd47
12 changed files with 217 additions and 39 deletions
|
|
@ -102,6 +102,12 @@ fn stage_harness(
|
|||
/// - `None` → `workdir/{filename}` (Python default: import by module name).
|
||||
/// - `Some("src/entry.rs")` → `workdir/src/entry.rs` (Rust: `mod entry;`).
|
||||
///
|
||||
/// Always overwrites the destination so the per-language build hash
|
||||
/// (`compute_*_source_hash`) reflects the current on-disk source. Leaving a
|
||||
/// stale destination in place would let the build cache return class files
|
||||
/// built from a previous fixture revision even after the source on disk has
|
||||
/// changed.
|
||||
///
|
||||
/// Best-effort: silently skips if the file cannot be found or copied.
|
||||
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) {
|
||||
let candidates = [
|
||||
|
|
@ -123,9 +129,7 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<
|
|||
};
|
||||
workdir.join(fname)
|
||||
};
|
||||
if !dst.exists() {
|
||||
let _ = fs::copy(src, &dst);
|
||||
}
|
||||
let _ = fs::copy(src, &dst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -135,21 +135,12 @@ fn build_call(spec: &HarnessSpec, _module: &str, func: &str) -> (String, String)
|
|||
/// Derive the JS module name from an entry file path.
|
||||
///
|
||||
/// `"src/handlers/login.js"` → `"login"` (basename without extension).
|
||||
pub fn entry_module_name(entry_file: &str) -> String {
|
||||
let base = entry_file
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or(entry_file)
|
||||
.rsplit('\\')
|
||||
.next()
|
||||
.unwrap_or(entry_file);
|
||||
// Strip known JS/TS extensions.
|
||||
for ext in &[".js", ".mjs", ".cjs", ".ts", ".mts"] {
|
||||
if let Some(stem) = base.strip_suffix(ext) {
|
||||
return stem.to_owned();
|
||||
}
|
||||
}
|
||||
base.to_owned()
|
||||
pub fn entry_module_name(_entry_file: &str) -> String {
|
||||
// The harness always `require('./entry')` because `entry_module_filename`
|
||||
// unconditionally copies the source to `entry.js` in the workdir. Keeping
|
||||
// these two helpers in sync prevents a "Cannot find module" import error
|
||||
// when the fixture's on-disk filename is anything other than `entry.js`.
|
||||
"entry".to_owned()
|
||||
}
|
||||
|
||||
/// Derive the filename for `entry_subpath` from an entry file path.
|
||||
|
|
@ -240,10 +231,14 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn entry_module_name_strips_extensions() {
|
||||
assert_eq!(entry_module_name("src/handlers/login.js"), "login");
|
||||
assert_eq!(entry_module_name("app.ts"), "app");
|
||||
assert_eq!(entry_module_name("handler.mjs"), "handler");
|
||||
assert_eq!(entry_module_name("no_ext"), "no_ext");
|
||||
fn entry_module_name_is_always_entry_to_match_copy_destination() {
|
||||
// `copy_entry_file` (via `entry_module_filename`) stages every fixture
|
||||
// at `workdir/entry.js`, so `require('./entry')` is the only path the
|
||||
// harness can use without missing-module errors at runtime, regardless
|
||||
// of the source file's original name.
|
||||
assert_eq!(entry_module_name("src/handlers/login.js"), "entry");
|
||||
assert_eq!(entry_module_name("app.ts"), "entry");
|
||||
assert_eq!(entry_module_name("handler.mjs"), "entry");
|
||||
assert_eq!(entry_module_name("no_ext"), "entry");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,10 +106,14 @@ fn nyx_payload() -> String {{
|
|||
/// Minimal base64 decoder (no external deps).
|
||||
fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
|
||||
const TABLE: [u8; 128] = {{
|
||||
// `while` loop (not `for`) so the initializer stays inside what stable
|
||||
// Rust permits in a `const` context: `IntoIterator::into_iter` is not a
|
||||
// const fn, so a `for` loop here fails with E0015.
|
||||
let mut t = [255u8; 128];
|
||||
let mut i = 0u8;
|
||||
for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" {{
|
||||
t[c as usize] = i;
|
||||
let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
let mut i = 0usize;
|
||||
while i < alphabet.len() {{
|
||||
t[alphabet[i] as usize] = i as u8;
|
||||
i += 1;
|
||||
}}
|
||||
t
|
||||
|
|
|
|||
|
|
@ -36,6 +36,26 @@ use std::time::{Duration, Instant};
|
|||
/// Interpreted harnesses can be run inside a Python/Node Docker image directly.
|
||||
/// Compiled harnesses (Rust, Go) are routed to `run_native_binary_docker` on
|
||||
/// Linux or to the process backend on other platforms.
|
||||
/// Resolve a bare command name to an absolute path by walking the host's
|
||||
/// `PATH`. Returns `None` if `PATH` is unset or the name is not present in
|
||||
/// any entry as a regular file.
|
||||
///
|
||||
/// Used by `run_process` so spawn(2) succeeds even after the child
|
||||
/// environment has been wiped: macOS' `posix_spawnp` defaults to
|
||||
/// `confstr(_CS_PATH)` (`/usr/bin:/bin`) when the child has no `PATH`, which
|
||||
/// misses common installs like Homebrew's `/opt/homebrew/bin/node` or
|
||||
/// `nvm`-managed binaries under `~/.nvm/...`.
|
||||
fn find_in_host_path(name: &str) -> Option<std::path::PathBuf> {
|
||||
let path = std::env::var_os("PATH")?;
|
||||
for dir in std::env::split_paths(&path) {
|
||||
let candidate = dir.join(name);
|
||||
if candidate.is_file() {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn harness_is_interpreted(command: &[String]) -> bool {
|
||||
let cmd0 = match command.first() {
|
||||
Some(c) => c.as_str(),
|
||||
|
|
@ -975,7 +995,19 @@ fn run_process(
|
|||
))
|
||||
})?;
|
||||
|
||||
let mut cmd = Command::new(cmd_name);
|
||||
// Resolve a bare interpreter name against the *host* PATH so the spawn
|
||||
// works even when the child env has been scrubbed (env_clear strips PATH,
|
||||
// so posix_spawnp falls back to confstr(_CS_PATH) which is typically just
|
||||
// `/usr/bin:/bin` on macOS — node/cargo/etc. installed via Homebrew or nvm
|
||||
// are not on that path and would otherwise yield `Spawn(NotFound)`).
|
||||
// Absolute commands pass through unchanged.
|
||||
let resolved_cmd_path = if std::path::Path::new(cmd_name).is_absolute() {
|
||||
std::path::PathBuf::from(cmd_name)
|
||||
} else {
|
||||
find_in_host_path(cmd_name).unwrap_or_else(|| std::path::PathBuf::from(cmd_name))
|
||||
};
|
||||
|
||||
let mut cmd = Command::new(&resolved_cmd_path);
|
||||
cmd.args(&harness.command[1..]);
|
||||
cmd.current_dir(&harness.workdir);
|
||||
cmd.stdout(Stdio::piped());
|
||||
|
|
|
|||
|
|
@ -19,14 +19,21 @@
|
|||
//! }
|
||||
//! ```
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::{InconclusiveReason, VerifyStatus};
|
||||
use directories::ProjectDirs;
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
/// One telemetry event per verdict.
|
||||
///
|
||||
/// `lang` is `"unknown"` for findings whose language could not be resolved
|
||||
/// (e.g. spec derivation failed before `HarnessSpec::lang` was set). Counting
|
||||
/// these is the `lang_unknown_count` Phase 02 acceptance asks for:
|
||||
/// `grep '"lang":"unknown"' events.jsonl | wc -l`.
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
pub struct TelemetryEvent {
|
||||
pub ts: String,
|
||||
|
|
@ -41,6 +48,12 @@ pub struct TelemetryEvent {
|
|||
pub build_attempts: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub inconclusive_reason: Option<String>,
|
||||
/// Path of the finding's source file, populated for spec-derivation
|
||||
/// failures so downstream consumers can map `lang="unknown"` events back
|
||||
/// to a file. Skipped on successful verdicts (the spec already carries
|
||||
/// `entry_file`).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub path: Option<String>,
|
||||
}
|
||||
|
||||
impl TelemetryEvent {
|
||||
|
|
@ -64,6 +77,49 @@ impl TelemetryEvent {
|
|||
duration_ms: duration.as_millis() as u64,
|
||||
build_attempts,
|
||||
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
|
||||
path: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Telemetry event for findings that never got a `HarnessSpec`.
|
||||
///
|
||||
/// Used by `verify_finding` when spec derivation fails (lang unresolvable,
|
||||
/// path empty, sink redacted, etc.). Without this path the events log
|
||||
/// silently drops every spec-derivation failure, which breaks the Phase 02
|
||||
/// `lang_unknown_count` aggregation acceptance.
|
||||
///
|
||||
/// `lang` is best-effort sniffed from `diag.path`'s extension via
|
||||
/// [`crate::symbol::Lang::from_extension`]. When the extension is
|
||||
/// unknown or absent, `lang` is the literal string `"unknown"`.
|
||||
pub fn no_spec(
|
||||
diag: &Diag,
|
||||
status: VerifyStatus,
|
||||
inconclusive_reason: Option<InconclusiveReason>,
|
||||
) -> Self {
|
||||
let lang = Path::new(&diag.path)
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.and_then(crate::symbol::Lang::from_extension)
|
||||
.map(|l| l.as_str().to_owned())
|
||||
.unwrap_or_else(|| "unknown".to_owned());
|
||||
let cap = diag
|
||||
.evidence
|
||||
.as_ref()
|
||||
.map(|e| format!("{:?}", e.sink_caps))
|
||||
.unwrap_or_else(|| "0".to_owned());
|
||||
Self {
|
||||
ts: chrono::Utc::now().to_rfc3339(),
|
||||
finding_id: format!("{:016x}", diag.stable_hash),
|
||||
spec_hash: String::new(),
|
||||
lang,
|
||||
cap,
|
||||
status: format!("{status:?}"),
|
||||
toolchain_id: String::new(),
|
||||
toolchain_match: String::new(),
|
||||
duration_ms: 0,
|
||||
build_attempts: 0,
|
||||
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
|
||||
path: Some(diag.path.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -220,6 +276,49 @@ mod tests {
|
|||
unsafe { std::env::remove_var("NYX_TELEMETRY_PATH") };
|
||||
}
|
||||
|
||||
fn make_diag(path: &str) -> Diag {
|
||||
Diag {
|
||||
stable_hash: 0xdeadbeef_cafebabe,
|
||||
path: path.to_owned(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_spec_event_records_lang_unknown_for_missing_extension() {
|
||||
let diag = make_diag("/tmp/some_script_no_ext");
|
||||
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Unsupported, None);
|
||||
assert_eq!(event.lang, "unknown");
|
||||
assert_eq!(event.path.as_deref(), Some("/tmp/some_script_no_ext"));
|
||||
assert!(event.spec_hash.is_empty());
|
||||
assert_eq!(event.status, "Unsupported");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_spec_event_sniffs_lang_from_extension_when_present() {
|
||||
let diag = make_diag("/tmp/handler.py");
|
||||
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, None);
|
||||
assert_eq!(event.lang, "python");
|
||||
assert_eq!(event.path.as_deref(), Some("/tmp/handler.py"));
|
||||
assert!(event.spec_hash.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_spec_event_serialises_inconclusive_reason() {
|
||||
use crate::evidence::SpecDerivationStrategy;
|
||||
let diag = make_diag("/tmp/x.kt");
|
||||
let reason = InconclusiveReason::SpecDerivationFailed {
|
||||
tried: vec![SpecDerivationStrategy::FromFlowSteps],
|
||||
hint: "kotlin source".to_owned(),
|
||||
};
|
||||
let event =
|
||||
TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, Some(reason));
|
||||
let json = serde_json::to_string(&event).unwrap();
|
||||
assert!(json.contains("\"lang\":\"java\""));
|
||||
assert!(json.contains("SpecDerivationFailed"));
|
||||
assert!(json.contains("\"path\":\"/tmp/x.kt\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nyx_no_telemetry_suppresses_writes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
|
|
|||
|
|
@ -185,21 +185,31 @@ fn spec_derivation_failed_verdict(
|
|||
let strategies: Vec<SpecDerivationStrategy> =
|
||||
HarnessSpec::derivation_strategies().to_vec();
|
||||
let hint = derivation_failure_hint(diag);
|
||||
let inconclusive_reason = InconclusiveReason::SpecDerivationFailed {
|
||||
tried: strategies,
|
||||
hint,
|
||||
};
|
||||
let event = TelemetryEvent::no_spec(
|
||||
diag,
|
||||
VerifyStatus::Inconclusive,
|
||||
Some(inconclusive_reason.clone()),
|
||||
);
|
||||
telemetry::emit(&event);
|
||||
return VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::SpecDerivationFailed {
|
||||
tried: strategies,
|
||||
hint,
|
||||
}),
|
||||
inconclusive_reason: Some(inconclusive_reason),
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
};
|
||||
}
|
||||
|
||||
let event = TelemetryEvent::no_spec(diag, VerifyStatus::Unsupported, None);
|
||||
telemetry::emit(&event);
|
||||
|
||||
VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Unsupported,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue