mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
refactor(dynamic): enhance path resolution, telemetry, and file handling for better compatibility and clarity
This commit is contained in:
parent
8abb023dd0
commit
8211d4fd47
12 changed files with 217 additions and 39 deletions
|
|
@ -102,6 +102,12 @@ fn stage_harness(
|
|||
/// - `None` → `workdir/{filename}` (Python default: import by module name).
|
||||
/// - `Some("src/entry.rs")` → `workdir/src/entry.rs` (Rust: `mod entry;`).
|
||||
///
|
||||
/// Always overwrites the destination so the per-language build hash
|
||||
/// (`compute_*_source_hash`) reflects the current on-disk source. Leaving a
|
||||
/// stale destination in place would let the build cache return class files
|
||||
/// built from a previous fixture revision even after the source on disk has
|
||||
/// changed.
|
||||
///
|
||||
/// Best-effort: silently skips if the file cannot be found or copied.
|
||||
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) {
|
||||
let candidates = [
|
||||
|
|
@ -123,9 +129,7 @@ fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<
|
|||
};
|
||||
workdir.join(fname)
|
||||
};
|
||||
if !dst.exists() {
|
||||
let _ = fs::copy(src, &dst);
|
||||
}
|
||||
let _ = fs::copy(src, &dst);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -135,21 +135,12 @@ fn build_call(spec: &HarnessSpec, _module: &str, func: &str) -> (String, String)
|
|||
/// Derive the JS module name from an entry file path.
|
||||
///
|
||||
/// `"src/handlers/login.js"` → `"login"` (basename without extension).
|
||||
pub fn entry_module_name(entry_file: &str) -> String {
|
||||
let base = entry_file
|
||||
.rsplit('/')
|
||||
.next()
|
||||
.unwrap_or(entry_file)
|
||||
.rsplit('\\')
|
||||
.next()
|
||||
.unwrap_or(entry_file);
|
||||
// Strip known JS/TS extensions.
|
||||
for ext in &[".js", ".mjs", ".cjs", ".ts", ".mts"] {
|
||||
if let Some(stem) = base.strip_suffix(ext) {
|
||||
return stem.to_owned();
|
||||
}
|
||||
}
|
||||
base.to_owned()
|
||||
pub fn entry_module_name(_entry_file: &str) -> String {
|
||||
// The harness always `require('./entry')` because `entry_module_filename`
|
||||
// unconditionally copies the source to `entry.js` in the workdir. Keeping
|
||||
// these two helpers in sync prevents a "Cannot find module" import error
|
||||
// when the fixture's on-disk filename is anything other than `entry.js`.
|
||||
"entry".to_owned()
|
||||
}
|
||||
|
||||
/// Derive the filename for `entry_subpath` from an entry file path.
|
||||
|
|
@ -240,10 +231,14 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn entry_module_name_strips_extensions() {
|
||||
assert_eq!(entry_module_name("src/handlers/login.js"), "login");
|
||||
assert_eq!(entry_module_name("app.ts"), "app");
|
||||
assert_eq!(entry_module_name("handler.mjs"), "handler");
|
||||
assert_eq!(entry_module_name("no_ext"), "no_ext");
|
||||
fn entry_module_name_is_always_entry_to_match_copy_destination() {
|
||||
// `copy_entry_file` (via `entry_module_filename`) stages every fixture
|
||||
// at `workdir/entry.js`, so `require('./entry')` is the only path the
|
||||
// harness can use without missing-module errors at runtime, regardless
|
||||
// of the source file's original name.
|
||||
assert_eq!(entry_module_name("src/handlers/login.js"), "entry");
|
||||
assert_eq!(entry_module_name("app.ts"), "entry");
|
||||
assert_eq!(entry_module_name("handler.mjs"), "entry");
|
||||
assert_eq!(entry_module_name("no_ext"), "entry");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -106,10 +106,14 @@ fn nyx_payload() -> String {{
|
|||
/// Minimal base64 decoder (no external deps).
|
||||
fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
|
||||
const TABLE: [u8; 128] = {{
|
||||
// `while` loop (not `for`) so the initializer stays inside what stable
|
||||
// Rust permits in a `const` context: `IntoIterator::into_iter` is not a
|
||||
// const fn, so a `for` loop here fails with E0015.
|
||||
let mut t = [255u8; 128];
|
||||
let mut i = 0u8;
|
||||
for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" {{
|
||||
t[c as usize] = i;
|
||||
let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
||||
let mut i = 0usize;
|
||||
while i < alphabet.len() {{
|
||||
t[alphabet[i] as usize] = i as u8;
|
||||
i += 1;
|
||||
}}
|
||||
t
|
||||
|
|
|
|||
|
|
@ -36,6 +36,26 @@ use std::time::{Duration, Instant};
|
|||
/// Interpreted harnesses can be run inside a Python/Node Docker image directly.
|
||||
/// Compiled harnesses (Rust, Go) are routed to `run_native_binary_docker` on
|
||||
/// Linux or to the process backend on other platforms.
|
||||
/// Resolve a bare command name to an absolute path by walking the host's
|
||||
/// `PATH`. Returns `None` if `PATH` is unset or the name is not present in
|
||||
/// any entry as a regular file.
|
||||
///
|
||||
/// Used by `run_process` so spawn(2) succeeds even after the child
|
||||
/// environment has been wiped: macOS' `posix_spawnp` defaults to
|
||||
/// `confstr(_CS_PATH)` (`/usr/bin:/bin`) when the child has no `PATH`, which
|
||||
/// misses common installs like Homebrew's `/opt/homebrew/bin/node` or
|
||||
/// `nvm`-managed binaries under `~/.nvm/...`.
|
||||
fn find_in_host_path(name: &str) -> Option<std::path::PathBuf> {
|
||||
let path = std::env::var_os("PATH")?;
|
||||
for dir in std::env::split_paths(&path) {
|
||||
let candidate = dir.join(name);
|
||||
if candidate.is_file() {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn harness_is_interpreted(command: &[String]) -> bool {
|
||||
let cmd0 = match command.first() {
|
||||
Some(c) => c.as_str(),
|
||||
|
|
@ -975,7 +995,19 @@ fn run_process(
|
|||
))
|
||||
})?;
|
||||
|
||||
let mut cmd = Command::new(cmd_name);
|
||||
// Resolve a bare interpreter name against the *host* PATH so the spawn
|
||||
// works even when the child env has been scrubbed (env_clear strips PATH,
|
||||
// so posix_spawnp falls back to confstr(_CS_PATH) which is typically just
|
||||
// `/usr/bin:/bin` on macOS — node/cargo/etc. installed via Homebrew or nvm
|
||||
// are not on that path and would otherwise yield `Spawn(NotFound)`).
|
||||
// Absolute commands pass through unchanged.
|
||||
let resolved_cmd_path = if std::path::Path::new(cmd_name).is_absolute() {
|
||||
std::path::PathBuf::from(cmd_name)
|
||||
} else {
|
||||
find_in_host_path(cmd_name).unwrap_or_else(|| std::path::PathBuf::from(cmd_name))
|
||||
};
|
||||
|
||||
let mut cmd = Command::new(&resolved_cmd_path);
|
||||
cmd.args(&harness.command[1..]);
|
||||
cmd.current_dir(&harness.workdir);
|
||||
cmd.stdout(Stdio::piped());
|
||||
|
|
|
|||
|
|
@ -19,14 +19,21 @@
|
|||
//! }
|
||||
//! ```
|
||||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::{InconclusiveReason, VerifyStatus};
|
||||
use directories::ProjectDirs;
|
||||
use std::fs::{self, OpenOptions};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::time::Duration;
|
||||
|
||||
/// One telemetry event per verdict.
|
||||
///
|
||||
/// `lang` is `"unknown"` for findings whose language could not be resolved
|
||||
/// (e.g. spec derivation failed before `HarnessSpec::lang` was set). Counting
|
||||
/// these is the `lang_unknown_count` Phase 02 acceptance asks for:
|
||||
/// `grep '"lang":"unknown"' events.jsonl | wc -l`.
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
pub struct TelemetryEvent {
|
||||
pub ts: String,
|
||||
|
|
@ -41,6 +48,12 @@ pub struct TelemetryEvent {
|
|||
pub build_attempts: u32,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub inconclusive_reason: Option<String>,
|
||||
/// Path of the finding's source file, populated for spec-derivation
|
||||
/// failures so downstream consumers can map `lang="unknown"` events back
|
||||
/// to a file. Skipped on successful verdicts (the spec already carries
|
||||
/// `entry_file`).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub path: Option<String>,
|
||||
}
|
||||
|
||||
impl TelemetryEvent {
|
||||
|
|
@ -64,6 +77,49 @@ impl TelemetryEvent {
|
|||
duration_ms: duration.as_millis() as u64,
|
||||
build_attempts,
|
||||
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
|
||||
path: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Telemetry event for findings that never got a `HarnessSpec`.
|
||||
///
|
||||
/// Used by `verify_finding` when spec derivation fails (lang unresolvable,
|
||||
/// path empty, sink redacted, etc.). Without this path the events log
|
||||
/// silently drops every spec-derivation failure, which breaks the Phase 02
|
||||
/// `lang_unknown_count` aggregation acceptance.
|
||||
///
|
||||
/// `lang` is best-effort sniffed from `diag.path`'s extension via
|
||||
/// [`crate::symbol::Lang::from_extension`]. When the extension is
|
||||
/// unknown or absent, `lang` is the literal string `"unknown"`.
|
||||
pub fn no_spec(
|
||||
diag: &Diag,
|
||||
status: VerifyStatus,
|
||||
inconclusive_reason: Option<InconclusiveReason>,
|
||||
) -> Self {
|
||||
let lang = Path::new(&diag.path)
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.and_then(crate::symbol::Lang::from_extension)
|
||||
.map(|l| l.as_str().to_owned())
|
||||
.unwrap_or_else(|| "unknown".to_owned());
|
||||
let cap = diag
|
||||
.evidence
|
||||
.as_ref()
|
||||
.map(|e| format!("{:?}", e.sink_caps))
|
||||
.unwrap_or_else(|| "0".to_owned());
|
||||
Self {
|
||||
ts: chrono::Utc::now().to_rfc3339(),
|
||||
finding_id: format!("{:016x}", diag.stable_hash),
|
||||
spec_hash: String::new(),
|
||||
lang,
|
||||
cap,
|
||||
status: format!("{status:?}"),
|
||||
toolchain_id: String::new(),
|
||||
toolchain_match: String::new(),
|
||||
duration_ms: 0,
|
||||
build_attempts: 0,
|
||||
inconclusive_reason: inconclusive_reason.map(|r| format!("{r:?}")),
|
||||
path: Some(diag.path.clone()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -220,6 +276,49 @@ mod tests {
|
|||
unsafe { std::env::remove_var("NYX_TELEMETRY_PATH") };
|
||||
}
|
||||
|
||||
fn make_diag(path: &str) -> Diag {
|
||||
Diag {
|
||||
stable_hash: 0xdeadbeef_cafebabe,
|
||||
path: path.to_owned(),
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_spec_event_records_lang_unknown_for_missing_extension() {
|
||||
let diag = make_diag("/tmp/some_script_no_ext");
|
||||
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Unsupported, None);
|
||||
assert_eq!(event.lang, "unknown");
|
||||
assert_eq!(event.path.as_deref(), Some("/tmp/some_script_no_ext"));
|
||||
assert!(event.spec_hash.is_empty());
|
||||
assert_eq!(event.status, "Unsupported");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_spec_event_sniffs_lang_from_extension_when_present() {
|
||||
let diag = make_diag("/tmp/handler.py");
|
||||
let event = TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, None);
|
||||
assert_eq!(event.lang, "python");
|
||||
assert_eq!(event.path.as_deref(), Some("/tmp/handler.py"));
|
||||
assert!(event.spec_hash.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_spec_event_serialises_inconclusive_reason() {
|
||||
use crate::evidence::SpecDerivationStrategy;
|
||||
let diag = make_diag("/tmp/x.kt");
|
||||
let reason = InconclusiveReason::SpecDerivationFailed {
|
||||
tried: vec![SpecDerivationStrategy::FromFlowSteps],
|
||||
hint: "kotlin source".to_owned(),
|
||||
};
|
||||
let event =
|
||||
TelemetryEvent::no_spec(&diag, VerifyStatus::Inconclusive, Some(reason));
|
||||
let json = serde_json::to_string(&event).unwrap();
|
||||
assert!(json.contains("\"lang\":\"java\""));
|
||||
assert!(json.contains("SpecDerivationFailed"));
|
||||
assert!(json.contains("\"path\":\"/tmp/x.kt\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nyx_no_telemetry_suppresses_writes() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
|
|
|
|||
|
|
@ -185,21 +185,31 @@ fn spec_derivation_failed_verdict(
|
|||
let strategies: Vec<SpecDerivationStrategy> =
|
||||
HarnessSpec::derivation_strategies().to_vec();
|
||||
let hint = derivation_failure_hint(diag);
|
||||
let inconclusive_reason = InconclusiveReason::SpecDerivationFailed {
|
||||
tried: strategies,
|
||||
hint,
|
||||
};
|
||||
let event = TelemetryEvent::no_spec(
|
||||
diag,
|
||||
VerifyStatus::Inconclusive,
|
||||
Some(inconclusive_reason.clone()),
|
||||
);
|
||||
telemetry::emit(&event);
|
||||
return VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Inconclusive,
|
||||
triggered_payload: None,
|
||||
reason: None,
|
||||
inconclusive_reason: Some(InconclusiveReason::SpecDerivationFailed {
|
||||
tried: strategies,
|
||||
hint,
|
||||
}),
|
||||
inconclusive_reason: Some(inconclusive_reason),
|
||||
detail: None,
|
||||
attempts: vec![],
|
||||
toolchain_match: None,
|
||||
};
|
||||
}
|
||||
|
||||
let event = TelemetryEvent::no_spec(diag, VerifyStatus::Unsupported, None);
|
||||
telemetry::emit(&event);
|
||||
|
||||
VerifyResult {
|
||||
finding_id,
|
||||
status: VerifyStatus::Unsupported,
|
||||
|
|
|
|||
|
|
@ -1,14 +1,21 @@
|
|||
// Command injection — negative fixture.
|
||||
// Safe: exec with args array; no shell; semicolons are inert.
|
||||
// Safe: exec with args array; no shell; injected metacharacters are inert.
|
||||
// Entry: Entry.runPing(String) Cap: CODE_EXEC
|
||||
// Expected verdict: NotConfirmed
|
||||
//
|
||||
// `id` ignores extra positional args (treats them as usernames it can't find
|
||||
// and writes the "no such user" error to stderr, not stdout). Switching from
|
||||
// `echo` keeps the array-exec demonstration intact while ensuring the
|
||||
// vuln-payload marker can never leak into the stdout stream the oracle reads.
|
||||
|
||||
import java.io.*;
|
||||
|
||||
public class Entry {
|
||||
public static void runPing(String host) throws Exception {
|
||||
// Sink-reachability probe: we did reach the exec call site.
|
||||
System.out.print("__NYX_SINK_HIT__\n");
|
||||
// Array form: each element is a literal argument — no shell expansion.
|
||||
String[] cmd = {"echo", "hello", host};
|
||||
String[] cmd = {"id", host};
|
||||
Process p = Runtime.getRuntime().exec(cmd);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(p.getInputStream()));
|
||||
String line;
|
||||
|
|
|
|||
|
|
@ -7,7 +7,10 @@ import java.io.*;
|
|||
import java.nio.file.*;
|
||||
|
||||
public class Entry {
|
||||
private static final String BASE_DIR = "/var/data";
|
||||
// `/tmp` exists on Linux and macOS so `toRealPath()` resolves cleanly on
|
||||
// both. The traversal payload still escapes the base (which is the point
|
||||
// of the safe-path check) so the verdict stays NotConfirmed.
|
||||
private static final String BASE_DIR = "/tmp";
|
||||
|
||||
public static void readFile(String userPath) throws Exception {
|
||||
Path base = Paths.get(BASE_DIR).toRealPath();
|
||||
|
|
|
|||
|
|
@ -2,13 +2,17 @@
|
|||
// Vulnerable: reads file at user-controlled path without sanitization.
|
||||
// Entry: Entry.readFile(String) Cap: FILE_IO
|
||||
// Expected verdict: Confirmed (../../../../etc/passwd → "root:" in output)
|
||||
//
|
||||
// Base directory is `/tmp` rather than `/var/data` so the harness can resolve
|
||||
// the traversal payload on both Linux and macOS hosts (`/var/data` is absent
|
||||
// on macOS, which makes `Files.readAllBytes` throw before reaching the file).
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.file.*;
|
||||
|
||||
public class Entry {
|
||||
public static void readFile(String userPath) throws Exception {
|
||||
Path filePath = Paths.get("/var/data", userPath);
|
||||
Path filePath = Paths.get("/tmp", userPath);
|
||||
System.out.print("__NYX_SINK_HIT__\n");
|
||||
try {
|
||||
String content = new String(Files.readAllBytes(filePath));
|
||||
|
|
|
|||
|
|
@ -2,16 +2,29 @@
|
|||
// Safe: uses execFile (no shell) with args array; shell metacharacters ignored.
|
||||
// Entry: runPing(host) Cap: CODE_EXEC
|
||||
// Expected verdict: NotConfirmed
|
||||
//
|
||||
// The host string is handed to `true` so it can never appear on stdout (which
|
||||
// would let an oracle scanning for the vuln-payload marker fire even though
|
||||
// no command was executed by a shell). Sub-process stderr is silenced with
|
||||
// `stdio: 'ignore'` so the same logic holds for `outcome.stderr` — Node's
|
||||
// default of inheriting child stderr to the parent stream lets `execFile`
|
||||
// error messages leak the host string otherwise.
|
||||
|
||||
const { execFileSync } = require('child_process');
|
||||
|
||||
function runPing(host) {
|
||||
// Sink-reachability probe: we did reach the execFile call site.
|
||||
process.stdout.write('__NYX_SINK_HIT__\n');
|
||||
// execFile does not invoke a shell — semicolons and metacharacters are inert.
|
||||
try {
|
||||
const out = execFileSync('echo', ['hello', host], { encoding: 'utf8', timeout: 5000 });
|
||||
const out = execFileSync('true', [host], {
|
||||
encoding: 'utf8',
|
||||
timeout: 5000,
|
||||
stdio: ['ignore', 'pipe', 'ignore'],
|
||||
});
|
||||
process.stdout.write(out);
|
||||
} catch (e) {
|
||||
process.stdout.write('error\n');
|
||||
// true exits 0 with no output; the catch is defensive.
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,10 @@ pub fn run(payload: &str) {
|
|||
use std::io::Read;
|
||||
|
||||
// Vulnerable: path joins base with user input without canonicalization.
|
||||
let path = format!("/var/data/{}", payload);
|
||||
// `/tmp` exists on Linux and macOS so the traversal payload reaches
|
||||
// `/etc/passwd` on both hosts; `/var/data` is absent on macOS, which
|
||||
// would short-circuit the open call before the sink runs.
|
||||
let path = format!("/tmp/{}", payload);
|
||||
|
||||
println!("__NYX_SINK_HIT__");
|
||||
let _ = std::io::Write::flush(&mut std::io::stdout());
|
||||
|
|
|
|||
|
|
@ -21,7 +21,11 @@ pub fn run(payload: &str) {
|
|||
println!("__NYX_SINK_HIT__");
|
||||
let _ = std::io::Write::flush(&mut std::io::stdout());
|
||||
|
||||
match conn.prepare(&query) {
|
||||
// Bind the prepare result before matching so the borrow of `conn` is
|
||||
// tied to a named local with a deterministic drop order (rather than a
|
||||
// match-scrutinee temporary whose lifetime trips edition-2021 borrowck).
|
||||
let prepared = conn.prepare(&query);
|
||||
match prepared {
|
||||
Ok(mut stmt) => {
|
||||
let _ = stmt.query_map([], |row| row.get::<_, String>(0)).map(|rows| {
|
||||
for name in rows.flatten() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue