//! Rust harness emitter. //! //! Generates a binary crate that: //! 1. Reads the payload from `NYX_PAYLOAD` / `NYX_PAYLOAD_B64` env vars. //! 2. Calls the entry function from `src/entry.rs` with the payload routed //! to the correct parameter slot. //! 3. The entry function calls `println!("__NYX_SINK_HIT__")` before the //! actual sink invocation (sink-reachability probe). //! 4. Captures outcome via stdout markers and exit code (§4.1). //! //! Build step: the runner calls `build_sandbox::prepare_rust()` which runs //! `cargo build --release` in the workdir. `harness.command` is updated to //! the compiled binary path before sandbox execution. //! //! Payload slot support: //! - `PayloadSlot::Param(0)` — pass payload as `&str` first argument. //! - `PayloadSlot::EnvVar(name)` — set env var before calling entry. //! - All other slots (`Stdin`, `Param(n>0)`, `QueryParam`, `HttpBody`, `Argv`) //! produce `UnsupportedReason::PayloadSlotUnsupported`. Stdin piping into the //! generated harness is not yet wired (deferred). //! //! HTML_ESCAPE is n/a for Rust (§15.4). use crate::dynamic::environment::{Environment, RuntimeArtifacts}; use crate::dynamic::lang::{HarnessSource, LangEmitter}; use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use crate::evidence::UnsupportedReason; use crate::labels::Cap; /// Zero-sized [`LangEmitter`] handle for Rust. Method bodies delegate to the /// existing free functions in this module. pub struct RustEmitter; /// Entry kinds the Rust emitter currently understands. Extended in Phase 16 /// (Track B Rust + C/C++ vertical) to include `HttpRoute` (`actix_web`, /// `axum`), `CliSubcommand` (clap), and `LibraryApi` (libfuzzer). const SUPPORTED: &[EntryKind] = &[EntryKind::Function]; impl LangEmitter for RustEmitter { fn emit(&self, spec: &HarnessSpec) -> Result { emit(spec) } fn entry_kinds_supported(&self) -> &'static [EntryKind] { SUPPORTED } fn entry_kind_hint(&self, attempted: EntryKind) -> String { format!( "rust emitter supports {SUPPORTED:?}; this finding's enclosing context is `EntryKind::{attempted}` — Track B will add actix / axum / clap / libfuzzer shapes in phase 16" ) } fn materialize_runtime(&self, env: &Environment) -> RuntimeArtifacts { materialize_rust(env) } } /// Phase 09 — Track D.2: synthesise a `Cargo.toml` that pins every /// captured crate dep. The base cap-driven dep set lives in /// [`generate_cargo_toml`]; this function layers the user's direct /// crate imports on top so the harness build can resolve symbols from /// crates the entry actually uses. pub fn materialize_rust(env: &Environment) -> RuntimeArtifacts { let mut artifacts = RuntimeArtifacts::new(); let mut deps: Vec = Vec::new(); let mut seen: std::collections::HashSet = std::collections::HashSet::new(); for d in &env.direct_deps { if is_rust_stdlib(d) { continue; } if seen.insert(d.clone()) { deps.push(d.clone()); } } deps.sort_unstable(); let mut body = String::with_capacity(256); body.push_str("[package]\n"); body.push_str("name = \"nyx-harness\"\n"); body.push_str("version = \"0.1.0\"\n"); body.push_str("edition = \"2021\"\n\n"); body.push_str("[[bin]]\n"); body.push_str("name = \"nyx_harness\"\n"); body.push_str("path = \"src/main.rs\"\n\n"); body.push_str("[dependencies]\n"); for d in &deps { body.push_str(d); body.push_str(" = \"*\"\n"); } artifacts.push("Cargo.toml", body); artifacts } fn is_rust_stdlib(name: &str) -> bool { matches!( name, "std" | "core" | "alloc" | "proc_macro" | "test" | "self" | "super" | "crate" ) } /// Source of the `__nyx_probe` shim for the Rust harness (Phase 06 — /// Track C.1). /// /// Defined here so future sink-rewrite passes can splice /// `__nyx_probe("os.system", payload)` into the entry source without /// depending on serde at the harness boundary. Hand-rolled JSON keeps /// the shim's only dep on `std`; matches the /// [`crate::dynamic::probe::SinkProbe`] wire format. pub fn probe_shim() -> &'static str { r#" // ── __nyx_probe shim (Phase 06 — Track C.1, Phase 08 — Track C.4 + C.5) ────── #[allow(dead_code)] const __NYX_DENY_SUBSTRINGS: &[&str] = &[ "TOKEN","SECRET","PASSWORD","PASSWD","API_KEY","APIKEY","PRIVATE_KEY", "CREDENTIAL","SESSION","COOKIE","AUTH","BEARER","AWS_ACCESS","AWS_SESSION", "GH_TOKEN","GITHUB_TOKEN","NPM_TOKEN","PYPI_TOKEN","DOCKER_PASS", ]; #[allow(dead_code)] const __NYX_PAYLOAD_LIMIT: usize = 16 * 1024; #[allow(dead_code)] const __NYX_REDACTED: &str = ""; #[allow(dead_code)] fn __nyx_esc(s: &str, out: &mut String) { for ch in s.chars() { match ch { '"' => out.push_str("\\\""), '\\' => out.push_str("\\\\"), '\n' => out.push_str("\\n"), '\r' => out.push_str("\\r"), '\t' => out.push_str("\\t"), c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)), c => out.push(c), } } } #[allow(dead_code)] fn __nyx_witness_json(sink_callee: &str, args: &[&str]) -> String { let mut out = String::with_capacity(256); out.push_str("{\"env_snapshot\":{"); let mut first = true; let mut keys: Vec<(String, String)> = std::env::vars().collect(); keys.sort(); for (k, v) in keys { let ku = k.to_ascii_uppercase(); let denied = __NYX_DENY_SUBSTRINGS.iter().any(|n| ku.contains(n)); let val = if denied { __NYX_REDACTED } else { v.as_str() }; if !first { out.push(','); } first = false; out.push('"'); __nyx_esc(&k, &mut out); out.push_str("\":\""); __nyx_esc(val, &mut out); out.push('"'); } out.push_str("},\"cwd\":\""); let cwd = std::env::current_dir() .map(|p| p.to_string_lossy().into_owned()) .unwrap_or_default(); __nyx_esc(&cwd, &mut out); out.push_str("\",\"payload_bytes\":["); let payload = std::env::var("NYX_PAYLOAD").unwrap_or_default(); let bytes = payload.as_bytes(); let cap = bytes.len().min(__NYX_PAYLOAD_LIMIT); for i in 0..cap { if i > 0 { out.push(','); } out.push_str(&format!("{}", bytes[i])); } out.push_str("],\"callee\":\""); __nyx_esc(sink_callee, &mut out); out.push_str("\",\"args_repr\":["); for (i, a) in args.iter().enumerate() { if i > 0 { out.push(','); } out.push('"'); __nyx_esc(a, &mut out); out.push('"'); } out.push_str("]}"); out } #[allow(dead_code)] fn __nyx_emit(line: &str) { use std::io::Write; let p = match std::env::var("NYX_PROBE_PATH") { Ok(v) => v, Err(_) => return, }; if let Ok(mut f) = std::fs::OpenOptions::new() .create(true) .append(true) .open(&p) { let _ = f.write_all(line.as_bytes()); let _ = f.write_all(b"\n"); } } #[allow(dead_code)] fn __nyx_probe(sink_callee: &str, args: &[&str]) { let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_nanos() as u64) .unwrap_or(0); let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default(); let mut line = String::with_capacity(256); line.push_str("{\"sink_callee\":\""); __nyx_esc(sink_callee, &mut line); line.push_str("\",\"args\":["); for (i, a) in args.iter().enumerate() { if i > 0 { line.push(','); } line.push_str("{\"kind\":\"String\",\"value\":\""); __nyx_esc(a, &mut line); line.push_str("\"}"); } line.push_str(&format!( "],\"captured_at_ns\":{},\"payload_id\":\"", now )); __nyx_esc(&payload_id, &mut line); line.push_str("\",\"kind\":{\"kind\":\"Normal\"},\"witness\":"); line.push_str(&__nyx_witness_json(sink_callee, args)); line.push('}'); __nyx_emit(&line); } // Phase 08: install a sink-site signal handler via `libc::sigaction` so a // SIGSEGV / SIGABRT / etc. inside the sink call is captured as a Crash // probe before the kernel re-delivers it via SIG_DFL. The shim is // no-op on non-Unix targets (the dynamic-verification supported set is // Unix-only) so consumers can splice it unconditionally. #[cfg(unix)] #[allow(dead_code)] fn __nyx_install_crash_guard(sink_callee: &'static str) { use std::sync::atomic::{AtomicPtr, Ordering}; static SINK_CALLEE: AtomicPtr = AtomicPtr::new(std::ptr::null_mut()); SINK_CALLEE.store(sink_callee.as_ptr() as *mut u8, Ordering::SeqCst); let len = sink_callee.len(); static CALLEE_LEN: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); CALLEE_LEN.store(len, Ordering::SeqCst); extern "C" fn handler(sig: i32) { // async-signal-unsafe code is unavoidable here (file I/O); we // accept the risk because the process is already dying and we // need the forensic record. let name = match sig { libc::SIGSEGV => "SIGSEGV", libc::SIGABRT => "SIGABRT", libc::SIGBUS => "SIGBUS", libc::SIGFPE => "SIGFPE", libc::SIGILL => "SIGILL", _ => "SIGABRT", }; let p = SINK_CALLEE.load(Ordering::SeqCst); let len = CALLEE_LEN.load(Ordering::SeqCst); let sink_callee: &str = unsafe { if p.is_null() { "" } else { let slice = std::slice::from_raw_parts(p as *const u8, len); std::str::from_utf8_unchecked(slice) } }; let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .map(|d| d.as_nanos() as u64) .unwrap_or(0); let payload_id = std::env::var("NYX_PAYLOAD_ID").unwrap_or_default(); let mut line = String::with_capacity(256); line.push_str("{\"sink_callee\":\""); __nyx_esc(sink_callee, &mut line); line.push_str("\",\"args\":[],\"captured_at_ns\":"); line.push_str(&format!("{now},\"payload_id\":\"")); __nyx_esc(&payload_id, &mut line); line.push_str("\",\"kind\":{\"kind\":\"Crash\",\"signal\":\""); line.push_str(name); line.push_str("\"},\"witness\":"); line.push_str(&__nyx_witness_json(sink_callee, &[])); line.push('}'); __nyx_emit(&line); // Restore default handler and re-raise so process actually dies. unsafe { let mut sa: libc::sigaction = std::mem::zeroed(); sa.sa_sigaction = libc::SIG_DFL; libc::sigaction(sig, &sa, std::ptr::null_mut()); libc::raise(sig); } } unsafe { let mut sa: libc::sigaction = std::mem::zeroed(); sa.sa_sigaction = handler as usize; libc::sigemptyset(&mut sa.sa_mask); for sig in [libc::SIGSEGV, libc::SIGABRT, libc::SIGBUS, libc::SIGFPE, libc::SIGILL] { libc::sigaction(sig, &sa, std::ptr::null_mut()); } } } #[cfg(not(unix))] #[allow(dead_code)] fn __nyx_install_crash_guard(_sink_callee: &'static str) {} "# } /// Emit a Rust harness for `spec`. pub fn emit(spec: &HarnessSpec) -> Result { match &spec.payload_slot { PayloadSlot::Param(0) | PayloadSlot::EnvVar(_) => {} _ => return Err(UnsupportedReason::PayloadSlotUnsupported), } let cargo_toml = generate_cargo_toml(spec.expected_cap); let main_rs = generate_main_rs(spec); Ok(HarnessSource { source: main_rs, filename: "src/main.rs".into(), command: vec!["target/release/nyx_harness".into()], extra_files: vec![("Cargo.toml".into(), cargo_toml)], entry_subpath: Some("src/entry.rs".into()), }) } /// Generate `Cargo.toml` for the harness crate. /// /// Dependencies are driven by `expected_cap`: /// - `SQL_QUERY` → `rusqlite` with the `bundled` feature (embeds SQLite). /// - Other caps use only std (no extra deps). pub fn generate_cargo_toml(cap: Cap) -> String { let mut deps = String::new(); if cap.contains(Cap::SQL_QUERY) { deps.push_str("rusqlite = { version = \"0.39\", features = [\"bundled\"] }\n"); } format!( "[package]\n\ name = \"nyx-harness\"\n\ version = \"0.1.0\"\n\ edition = \"2021\"\n\n\ [[bin]]\n\ name = \"nyx_harness\"\n\ path = \"src/main.rs\"\n\n\ [dependencies]\n\ {deps}" ) } /// Generate `src/main.rs` — the harness entry point. /// /// Reads the payload from env, calls `entry::{entry_name}` with the payload /// routed according to `spec.payload_slot`. fn generate_main_rs(spec: &HarnessSpec) -> String { let entry_fn = &spec.entry_name; let (pre_call, call_expr) = build_call(spec, entry_fn); format!( r#"//! Nyx dynamic harness — auto-generated, do not edit. mod entry; fn main() {{ let payload = nyx_payload(); {pre_call} {call_expr} }} fn nyx_payload() -> String {{ // Prefer raw NYX_PAYLOAD (set on Unix). if let Ok(v) = std::env::var("NYX_PAYLOAD") {{ if !v.is_empty() {{ return v; }} }} // Fall back to base64-encoded NYX_PAYLOAD_B64. if let Ok(b64) = std::env::var("NYX_PAYLOAD_B64") {{ if let Some(bytes) = b64_decode(b64.as_bytes()) {{ return String::from_utf8_lossy(&bytes).into_owned(); }} }} String::new() }} /// Minimal base64 decoder (no external deps). fn b64_decode(input: &[u8]) -> Option> {{ const TABLE: [u8; 128] = {{ // `while` loop (not `for`) so the initializer stays inside what stable // Rust permits in a `const` context: `IntoIterator::into_iter` is not a // const fn, so a `for` loop here fails with E0015. let mut t = [255u8; 128]; let alphabet: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; let mut i = 0usize; while i < alphabet.len() {{ t[alphabet[i] as usize] = i as u8; i += 1; }} t }}; let input: Vec = input.iter().copied().filter(|&c| c != b'\n' && c != b'\r').collect(); let mut out = Vec::with_capacity(input.len() * 3 / 4); let mut i = 0; while i + 3 < input.len() {{ let a = *TABLE.get(input[i] as usize)? as u32; let b = *TABLE.get(input[i + 1] as usize)? as u32; let c = if input[i + 2] == b'=' {{ 64 }} else {{ *TABLE.get(input[i + 2] as usize)? as u32 }}; let d = if input[i + 3] == b'=' {{ 64 }} else {{ *TABLE.get(input[i + 3] as usize)? as u32 }}; if a == 255 || b == 255 || c == 255 || d == 255 {{ return None; }} out.push(((a << 2) | (b >> 4)) as u8); if input[i + 2] != b'=' {{ out.push(((b << 4) | (c >> 2)) as u8); }} if input[i + 3] != b'=' {{ out.push(((c << 6) | d) as u8); }} i += 4; }} Some(out) }} "#, pre_call = pre_call, call_expr = call_expr, ) } /// Build `(pre_call_setup, call_expression)` strings for the chosen payload slot. fn build_call(spec: &HarnessSpec, func: &str) -> (String, String) { match &spec.payload_slot { PayloadSlot::Param(0) => { let pre = String::new(); let call = format!("entry::{func}(&payload);"); (pre, call) } PayloadSlot::EnvVar(name) => { let pre = format!(" std::env::set_var({name:?}, &payload);\n"); let call = format!("entry::{func}();"); (pre, call) } _ => { // Unreachable: `emit()` rejects all other slots up front. let pre = String::new(); let call = format!("entry::{func}(&payload);"); (pre, call) } } } #[cfg(test)] mod tests { use super::*; use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; use crate::labels::Cap; use crate::symbol::Lang; fn make_spec(payload_slot: PayloadSlot) -> HarnessSpec { HarnessSpec { finding_id: "rust000000000001".into(), entry_file: "src/handler.rs".into(), entry_name: "run".into(), entry_kind: EntryKind::Function, lang: Lang::Rust, toolchain_id: "rust-stable".into(), payload_slot, expected_cap: Cap::SQL_QUERY, constraint_hints: vec![], sink_file: "src/handler.rs".into(), sink_line: 10, spec_hash: "rusttest00000001".into(), derivation: crate::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, stubs_required: vec![], } } #[test] fn emit_sql_query_produces_source() { let spec = make_spec(PayloadSlot::Param(0)); let harness = emit(&spec).unwrap(); assert!(harness.source.contains("mod entry;")); assert!(harness.source.contains("nyx_payload()")); assert!(harness.source.contains("entry::run(&payload)")); assert_eq!(harness.filename, "src/main.rs"); assert_eq!(harness.command, vec!["target/release/nyx_harness"]); } #[test] fn emit_includes_cargo_toml_in_extra_files() { let spec = make_spec(PayloadSlot::Param(0)); let harness = emit(&spec).unwrap(); let cargo = harness.extra_files.iter().find(|(n, _)| n == "Cargo.toml"); assert!(cargo.is_some(), "Cargo.toml must be in extra_files"); let cargo_content = &cargo.unwrap().1; assert!(cargo_content.contains("rusqlite"), "SQL_QUERY cap needs rusqlite dep"); assert!(cargo_content.contains("bundled"), "rusqlite must use bundled feature"); } #[test] fn emit_code_exec_no_rusqlite_dep() { let mut spec = make_spec(PayloadSlot::Param(0)); spec.expected_cap = Cap::CODE_EXEC; let harness = emit(&spec).unwrap(); let cargo = harness.extra_files.iter().find(|(n, _)| n == "Cargo.toml").unwrap(); assert!(!cargo.1.contains("rusqlite"), "CODE_EXEC must not have rusqlite dep"); } #[test] fn emit_entry_subpath_is_src_entry_rs() { let spec = make_spec(PayloadSlot::Param(0)); let harness = emit(&spec).unwrap(); assert_eq!(harness.entry_subpath, Some("src/entry.rs".to_string())); } #[test] fn emit_env_var_slot() { let spec = make_spec(PayloadSlot::EnvVar("NYX_INPUT".into())); let harness = emit(&spec).unwrap(); assert!(harness.source.contains("set_var")); assert!(harness.source.contains("\"NYX_INPUT\"")); } #[test] fn emit_param_gt_0_is_unsupported() { let spec = make_spec(PayloadSlot::Param(1)); let err = emit(&spec).unwrap_err(); assert_eq!(err, UnsupportedReason::PayloadSlotUnsupported); } #[test] fn cargo_toml_has_correct_bin_target() { let cargo = generate_cargo_toml(Cap::SQL_QUERY); assert!(cargo.contains("name = \"nyx_harness\"")); assert!(cargo.contains("path = \"src/main.rs\"")); } #[test] fn entry_kinds_supported_is_non_empty() { assert!(!RustEmitter.entry_kinds_supported().is_empty()); assert!(RustEmitter .entry_kinds_supported() .contains(&EntryKind::Function)); } #[test] fn entry_kind_hint_names_attempted_and_phase() { let hint = RustEmitter.entry_kind_hint(EntryKind::HttpRoute); assert!(hint.contains("HttpRoute")); assert!(hint.contains("phase 16")); } #[test] fn b64_decode_roundtrip() { // Test by compiling: actual b64_decode is in generated code. // Just verify the Cargo.toml generation doesn't panic. let _ = generate_cargo_toml(Cap::FILE_IO); let _ = generate_cargo_toml(Cap::CODE_EXEC); let _ = generate_cargo_toml(Cap::SSRF); } }