mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 04: M4 — Rust harness (second-language validation)
This commit is contained in:
parent
e875aa1208
commit
3ffe480660
37 changed files with 1872 additions and 54 deletions
|
|
@ -19,6 +19,125 @@ use std::path::{Path, PathBuf};
|
|||
use std::process::Command;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
// ── Rust build sandbox ────────────────────────────────────────────────────────
|
||||
|
||||
/// Prepare a compiled Rust binary for `spec`.
|
||||
///
|
||||
/// Checks a build cache keyed on `(Cargo.lock hash, "rust", toolchain_id)`.
|
||||
/// On a cache hit returns immediately; otherwise runs `cargo build --release`
|
||||
/// in `workdir` and caches the resulting binary.
|
||||
///
|
||||
/// The compiled binary is at `cache_path/nyx_harness` on success.
|
||||
///
|
||||
/// Build isolation is NOT yet implemented (deferred to Phase 05). `cargo build`
|
||||
/// runs as a plain subprocess on the host with `env_clear()` plus a minimal
|
||||
/// inherited env (PATH/HOME/CARGO_HOME/RUSTUP_HOME). A malicious `build.rs`
|
||||
/// runs with host privileges. Vendoring / network sandboxing comes later (§19.2).
|
||||
pub fn prepare_rust(spec: &HarnessSpec, workdir: &Path) -> Result<BuildResult, BuildError> {
|
||||
let lockfile_hash = compute_rust_lockfile_hash(workdir);
|
||||
let cache_path = build_cache_path(&lockfile_hash, "rust", &spec.toolchain_id)?;
|
||||
|
||||
// Cache hit: binary already compiled and stored.
|
||||
let binary = cache_path.join("nyx_harness");
|
||||
if binary.exists() {
|
||||
return Ok(BuildResult { venv_path: cache_path, cache_hit: true, duration: Duration::ZERO });
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
const MAX_ATTEMPTS: u32 = 2;
|
||||
const BACKOFF: [u64; 2] = [1, 4];
|
||||
let mut last_err = String::new();
|
||||
|
||||
for attempt in 0..MAX_ATTEMPTS {
|
||||
if attempt > 0 {
|
||||
std::thread::sleep(Duration::from_secs(BACKOFF[attempt as usize - 1]));
|
||||
}
|
||||
let _ = std::fs::remove_dir_all(&cache_path);
|
||||
std::fs::create_dir_all(&cache_path)?;
|
||||
|
||||
match try_build_rust_binary(workdir, &binary) {
|
||||
Ok(()) => {
|
||||
return Ok(BuildResult {
|
||||
venv_path: cache_path,
|
||||
cache_hit: false,
|
||||
duration: start.elapsed(),
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
last_err = e;
|
||||
let _ = std::fs::remove_file(&binary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(BuildError::BuildFailed { stderr: last_err, attempts: MAX_ATTEMPTS })
|
||||
}
|
||||
|
||||
fn try_build_rust_binary(workdir: &Path, binary_dest: &Path) -> Result<(), String> {
|
||||
let cargo = cargo_binary();
|
||||
|
||||
// Run `cargo build --release` in the workdir.
|
||||
let output = Command::new(&cargo)
|
||||
.args(["build", "--release"])
|
||||
.current_dir(workdir)
|
||||
.env_clear()
|
||||
.env("PATH", std::env::var("PATH").unwrap_or_default())
|
||||
.env("HOME", std::env::var("HOME").unwrap_or_default())
|
||||
// Inherit CARGO_HOME so the local registry cache is reused.
|
||||
.env("CARGO_HOME", std::env::var("CARGO_HOME").unwrap_or_else(|_| {
|
||||
dirs_next_cargo_home()
|
||||
}))
|
||||
.env("RUSTUP_HOME", std::env::var("RUSTUP_HOME").unwrap_or_default())
|
||||
.output()
|
||||
.map_err(|e| format!("cargo build: {e}"))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
|
||||
return Err(stderr);
|
||||
}
|
||||
|
||||
// Copy binary to cache location.
|
||||
let compiled = workdir.join("target").join("release").join("nyx_harness");
|
||||
if compiled.exists() {
|
||||
std::fs::copy(&compiled, binary_dest)
|
||||
.map_err(|e| format!("copy binary: {e}"))?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn cargo_binary() -> String {
|
||||
// Respect NYX_CARGO_BIN for testing.
|
||||
std::env::var("NYX_CARGO_BIN").unwrap_or_else(|_| "cargo".to_owned())
|
||||
}
|
||||
|
||||
fn dirs_next_cargo_home() -> String {
|
||||
// ~/.cargo is the default CARGO_HOME.
|
||||
std::env::var("HOME")
|
||||
.map(|h| format!("{h}/.cargo"))
|
||||
.unwrap_or_else(|_| ".cargo".to_owned())
|
||||
}
|
||||
|
||||
fn compute_rust_lockfile_hash(workdir: &Path) -> String {
|
||||
let mut h = Hasher::new();
|
||||
// Cargo manifest and lock determine dependency graph.
|
||||
for fname in &["Cargo.lock", "Cargo.toml"] {
|
||||
if let Ok(content) = std::fs::read(workdir.join(fname)) {
|
||||
h.update(fname.as_bytes());
|
||||
h.update(&content);
|
||||
}
|
||||
}
|
||||
// Entry file is compiled into the binary, so it must be part of the cache key.
|
||||
// Without this, two fixtures with the same Cargo.toml but different entry.rs
|
||||
// would collide and the second would receive the wrong cached binary.
|
||||
if let Ok(content) = std::fs::read(workdir.join("src").join("entry.rs")) {
|
||||
h.update(b"src/entry.rs");
|
||||
h.update(&content);
|
||||
}
|
||||
let out = h.finalize();
|
||||
format!("{:016x}", u64::from_le_bytes(out.as_bytes()[..8].try_into().unwrap()))
|
||||
}
|
||||
|
||||
/// Result of a successful build.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BuildResult {
|
||||
|
|
|
|||
|
|
@ -74,31 +74,57 @@ fn stage_harness(
|
|||
let workdir = base_dir.join(&spec.spec_hash);
|
||||
fs::create_dir_all(&workdir)?;
|
||||
|
||||
// Write harness source.
|
||||
// Write harness source (create parent dir if needed, e.g. "src/main.rs").
|
||||
let harness_path = workdir.join(&harness_src.filename);
|
||||
if let Some(parent) = harness_path.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
fs::write(&harness_path, harness_src.source.as_bytes())?;
|
||||
|
||||
// Copy the entry file into the workdir so the harness can import it.
|
||||
copy_entry_file(spec, &workdir);
|
||||
// Write any extra files (e.g. Cargo.toml for Rust).
|
||||
for (rel_path, content) in &harness_src.extra_files {
|
||||
let dest = workdir.join(rel_path);
|
||||
if let Some(parent) = dest.parent() {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
fs::write(&dest, content.as_bytes())?;
|
||||
}
|
||||
|
||||
// Copy the entry file into the workdir so the harness can import/include it.
|
||||
copy_entry_file(spec, &workdir, harness_src.entry_subpath.as_deref());
|
||||
|
||||
Ok(workdir)
|
||||
}
|
||||
|
||||
/// Copy the entry Python file to the workdir so the harness can `import` it.
|
||||
/// Best-effort: silently skips if the file cannot be found/copied.
|
||||
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf) {
|
||||
// Try the entry file relative to the project root candidates.
|
||||
/// Copy the entry source file to the workdir.
|
||||
///
|
||||
/// `entry_subpath` controls the destination:
|
||||
/// - `None` → `workdir/{filename}` (Python default: import by module name).
|
||||
/// - `Some("src/entry.rs")` → `workdir/src/entry.rs` (Rust: `mod entry;`).
|
||||
///
|
||||
/// Best-effort: silently skips if the file cannot be found or copied.
|
||||
fn copy_entry_file(spec: &HarnessSpec, workdir: &PathBuf, entry_subpath: Option<&str>) {
|
||||
let candidates = [
|
||||
PathBuf::from(&spec.entry_file),
|
||||
PathBuf::from(".").join(&spec.entry_file),
|
||||
];
|
||||
for src in &candidates {
|
||||
if src.exists() {
|
||||
if let Some(fname) = src.file_name() {
|
||||
let dst = workdir.join(fname);
|
||||
if !dst.exists() {
|
||||
let _ = fs::copy(src, &dst);
|
||||
let dst = if let Some(subpath) = entry_subpath {
|
||||
let dest = workdir.join(subpath);
|
||||
if let Some(parent) = dest.parent() {
|
||||
let _ = fs::create_dir_all(parent);
|
||||
}
|
||||
dest
|
||||
} else {
|
||||
let fname = match src.file_name() {
|
||||
Some(f) => f,
|
||||
None => return,
|
||||
};
|
||||
workdir.join(fname)
|
||||
};
|
||||
if !dst.exists() {
|
||||
let _ = fs::copy(src, &dst);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -151,17 +177,18 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn build_unsupported_lang_returns_err() {
|
||||
// Go is not yet supported (unsupported lang path).
|
||||
let spec = HarnessSpec {
|
||||
finding_id: "0000000000000001".into(),
|
||||
entry_file: "src/main.rs".into(),
|
||||
entry_name: "handle_request".into(),
|
||||
entry_file: "main.go".into(),
|
||||
entry_name: "handleRequest".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Rust,
|
||||
toolchain_id: "rust-stable".into(),
|
||||
lang: Lang::Go,
|
||||
toolchain_id: "go-stable".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "src/main.rs".into(),
|
||||
sink_file: "main.go".into(),
|
||||
sink_line: 5,
|
||||
spec_hash: "0000000000000000".into(),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
//! The top-level [`emit`] function dispatches on `spec.lang`.
|
||||
|
||||
pub mod python;
|
||||
pub mod rust;
|
||||
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::evidence::UnsupportedReason;
|
||||
|
|
@ -14,16 +15,25 @@ use crate::symbol::Lang;
|
|||
pub struct HarnessSource {
|
||||
/// Harness source code as a UTF-8 string.
|
||||
pub source: String,
|
||||
/// Filename for the harness (e.g. `"harness.py"`).
|
||||
/// Filename for the harness (e.g. `"harness.py"`, `"src/main.rs"`).
|
||||
pub filename: String,
|
||||
/// Shell command to invoke the harness (relative to the workdir).
|
||||
pub command: Vec<String>,
|
||||
/// Additional files to write to the workdir alongside the main source.
|
||||
/// Each entry is `(relative_path, content)`. Subdirectories are created
|
||||
/// automatically (e.g. `"Cargo.toml"` or `"src/entry.rs"`).
|
||||
pub extra_files: Vec<(String, String)>,
|
||||
/// Where to copy the entry source file (relative to workdir).
|
||||
/// `None` = workdir root (Python default).
|
||||
/// `Some("src/entry.rs")` = Rust module path.
|
||||
pub entry_subpath: Option<String>,
|
||||
}
|
||||
|
||||
/// Dispatch to the appropriate language emitter.
|
||||
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
||||
match spec.lang {
|
||||
Lang::Python => python::emit(spec),
|
||||
Lang::Rust => rust::emit(spec),
|
||||
_ => Err(UnsupportedReason::LangUnsupported),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,8 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
source,
|
||||
filename: "harness.py".to_owned(),
|
||||
command: vec!["python3".to_owned(), "harness.py".to_owned()],
|
||||
extra_files: vec![],
|
||||
entry_subpath: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
253
src/dynamic/lang/rust.rs
Normal file
253
src/dynamic/lang/rust.rs
Normal file
|
|
@ -0,0 +1,253 @@
|
|||
//! Rust harness emitter.
|
||||
//!
|
||||
//! Generates a binary crate that:
|
||||
//! 1. Reads the payload from `NYX_PAYLOAD` / `NYX_PAYLOAD_B64` env vars.
|
||||
//! 2. Calls the entry function from `src/entry.rs` with the payload routed
|
||||
//! to the correct parameter slot.
|
||||
//! 3. The entry function calls `println!("__NYX_SINK_HIT__")` before the
|
||||
//! actual sink invocation (sink-reachability probe).
|
||||
//! 4. Captures outcome via stdout markers and exit code (§4.1).
|
||||
//!
|
||||
//! Build step: the runner calls `build_sandbox::prepare_rust()` which runs
|
||||
//! `cargo build --release` in the workdir. `harness.command` is updated to
|
||||
//! the compiled binary path before sandbox execution.
|
||||
//!
|
||||
//! Payload slot support:
|
||||
//! - `PayloadSlot::Param(0)` — pass payload as `&str` first argument.
|
||||
//! - `PayloadSlot::EnvVar(name)` — set env var before calling entry.
|
||||
//! - All other slots (`Stdin`, `Param(n>0)`, `QueryParam`, `HttpBody`, `Argv`)
|
||||
//! produce `UnsupportedReason::EntryKindUnsupported`. Stdin piping into the
|
||||
//! generated harness is not yet wired (deferred).
|
||||
//!
|
||||
//! HTML_ESCAPE is n/a for Rust (§15.4).
|
||||
|
||||
use crate::dynamic::lang::HarnessSource;
|
||||
use crate::dynamic::spec::{HarnessSpec, PayloadSlot};
|
||||
use crate::evidence::UnsupportedReason;
|
||||
use crate::labels::Cap;
|
||||
|
||||
/// Emit a Rust harness for `spec`.
|
||||
pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
||||
match &spec.payload_slot {
|
||||
PayloadSlot::Param(0) | PayloadSlot::EnvVar(_) => {}
|
||||
_ => return Err(UnsupportedReason::EntryKindUnsupported),
|
||||
}
|
||||
|
||||
let cargo_toml = generate_cargo_toml(spec.expected_cap);
|
||||
let main_rs = generate_main_rs(spec);
|
||||
|
||||
Ok(HarnessSource {
|
||||
source: main_rs,
|
||||
filename: "src/main.rs".into(),
|
||||
command: vec!["target/release/nyx_harness".into()],
|
||||
extra_files: vec![("Cargo.toml".into(), cargo_toml)],
|
||||
entry_subpath: Some("src/entry.rs".into()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Generate `Cargo.toml` for the harness crate.
|
||||
///
|
||||
/// Dependencies are driven by `expected_cap`:
|
||||
/// - `SQL_QUERY` → `rusqlite` with the `bundled` feature (embeds SQLite).
|
||||
/// - Other caps use only std (no extra deps).
|
||||
pub fn generate_cargo_toml(cap: Cap) -> String {
|
||||
let mut deps = String::new();
|
||||
|
||||
if cap.contains(Cap::SQL_QUERY) {
|
||||
deps.push_str("rusqlite = { version = \"0.39\", features = [\"bundled\"] }\n");
|
||||
}
|
||||
|
||||
format!(
|
||||
"[package]\n\
|
||||
name = \"nyx-harness\"\n\
|
||||
version = \"0.1.0\"\n\
|
||||
edition = \"2021\"\n\n\
|
||||
[[bin]]\n\
|
||||
name = \"nyx_harness\"\n\
|
||||
path = \"src/main.rs\"\n\n\
|
||||
[dependencies]\n\
|
||||
{deps}"
|
||||
)
|
||||
}
|
||||
|
||||
/// Generate `src/main.rs` — the harness entry point.
|
||||
///
|
||||
/// Reads the payload from env, calls `entry::{entry_name}` with the payload
|
||||
/// routed according to `spec.payload_slot`.
|
||||
fn generate_main_rs(spec: &HarnessSpec) -> String {
|
||||
let entry_fn = &spec.entry_name;
|
||||
let (pre_call, call_expr) = build_call(spec, entry_fn);
|
||||
|
||||
format!(
|
||||
r#"//! Nyx dynamic harness — auto-generated, do not edit.
|
||||
mod entry;
|
||||
|
||||
fn main() {{
|
||||
let payload = nyx_payload();
|
||||
{pre_call} {call_expr}
|
||||
}}
|
||||
|
||||
fn nyx_payload() -> String {{
|
||||
// Prefer raw NYX_PAYLOAD (set on Unix).
|
||||
if let Ok(v) = std::env::var("NYX_PAYLOAD") {{
|
||||
if !v.is_empty() {{
|
||||
return v;
|
||||
}}
|
||||
}}
|
||||
// Fall back to base64-encoded NYX_PAYLOAD_B64.
|
||||
if let Ok(b64) = std::env::var("NYX_PAYLOAD_B64") {{
|
||||
if let Some(bytes) = b64_decode(b64.as_bytes()) {{
|
||||
return String::from_utf8_lossy(&bytes).into_owned();
|
||||
}}
|
||||
}}
|
||||
String::new()
|
||||
}}
|
||||
|
||||
/// Minimal base64 decoder (no external deps).
|
||||
fn b64_decode(input: &[u8]) -> Option<Vec<u8>> {{
|
||||
const TABLE: [u8; 128] = {{
|
||||
let mut t = [255u8; 128];
|
||||
let mut i = 0u8;
|
||||
for &c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" {{
|
||||
t[c as usize] = i;
|
||||
i += 1;
|
||||
}}
|
||||
t
|
||||
}};
|
||||
let input: Vec<u8> = input.iter().copied().filter(|&c| c != b'\n' && c != b'\r').collect();
|
||||
let mut out = Vec::with_capacity(input.len() * 3 / 4);
|
||||
let mut i = 0;
|
||||
while i + 3 < input.len() {{
|
||||
let a = *TABLE.get(input[i] as usize)? as u32;
|
||||
let b = *TABLE.get(input[i + 1] as usize)? as u32;
|
||||
let c = if input[i + 2] == b'=' {{ 64 }} else {{ *TABLE.get(input[i + 2] as usize)? as u32 }};
|
||||
let d = if input[i + 3] == b'=' {{ 64 }} else {{ *TABLE.get(input[i + 3] as usize)? as u32 }};
|
||||
if a == 255 || b == 255 || c == 255 || d == 255 {{ return None; }}
|
||||
out.push(((a << 2) | (b >> 4)) as u8);
|
||||
if input[i + 2] != b'=' {{ out.push(((b << 4) | (c >> 2)) as u8); }}
|
||||
if input[i + 3] != b'=' {{ out.push(((c << 6) | d) as u8); }}
|
||||
i += 4;
|
||||
}}
|
||||
Some(out)
|
||||
}}
|
||||
"#,
|
||||
pre_call = pre_call,
|
||||
call_expr = call_expr,
|
||||
)
|
||||
}
|
||||
|
||||
/// Build `(pre_call_setup, call_expression)` strings for the chosen payload slot.
|
||||
fn build_call(spec: &HarnessSpec, func: &str) -> (String, String) {
|
||||
match &spec.payload_slot {
|
||||
PayloadSlot::Param(0) => {
|
||||
let pre = String::new();
|
||||
let call = format!("entry::{func}(&payload);");
|
||||
(pre, call)
|
||||
}
|
||||
PayloadSlot::EnvVar(name) => {
|
||||
let pre = format!(" std::env::set_var({name:?}, &payload);\n");
|
||||
let call = format!("entry::{func}();");
|
||||
(pre, call)
|
||||
}
|
||||
_ => {
|
||||
// Unreachable: `emit()` rejects all other slots up front.
|
||||
let pre = String::new();
|
||||
let call = format!("entry::{func}(&payload);");
|
||||
(pre, call)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
fn make_spec(payload_slot: PayloadSlot) -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "rust000000000001".into(),
|
||||
entry_file: "src/handler.rs".into(),
|
||||
entry_name: "run".into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang: Lang::Rust,
|
||||
toolchain_id: "rust-stable".into(),
|
||||
payload_slot,
|
||||
expected_cap: Cap::SQL_QUERY,
|
||||
constraint_hints: vec![],
|
||||
sink_file: "src/handler.rs".into(),
|
||||
sink_line: 10,
|
||||
spec_hash: "rusttest00000001".into(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_sql_query_produces_source() {
|
||||
let spec = make_spec(PayloadSlot::Param(0));
|
||||
let harness = emit(&spec).unwrap();
|
||||
assert!(harness.source.contains("mod entry;"));
|
||||
assert!(harness.source.contains("nyx_payload()"));
|
||||
assert!(harness.source.contains("entry::run(&payload)"));
|
||||
assert_eq!(harness.filename, "src/main.rs");
|
||||
assert_eq!(harness.command, vec!["target/release/nyx_harness"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_includes_cargo_toml_in_extra_files() {
|
||||
let spec = make_spec(PayloadSlot::Param(0));
|
||||
let harness = emit(&spec).unwrap();
|
||||
let cargo = harness.extra_files.iter().find(|(n, _)| n == "Cargo.toml");
|
||||
assert!(cargo.is_some(), "Cargo.toml must be in extra_files");
|
||||
let cargo_content = &cargo.unwrap().1;
|
||||
assert!(cargo_content.contains("rusqlite"), "SQL_QUERY cap needs rusqlite dep");
|
||||
assert!(cargo_content.contains("bundled"), "rusqlite must use bundled feature");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_code_exec_no_rusqlite_dep() {
|
||||
let mut spec = make_spec(PayloadSlot::Param(0));
|
||||
spec.expected_cap = Cap::CODE_EXEC;
|
||||
let harness = emit(&spec).unwrap();
|
||||
let cargo = harness.extra_files.iter().find(|(n, _)| n == "Cargo.toml").unwrap();
|
||||
assert!(!cargo.1.contains("rusqlite"), "CODE_EXEC must not have rusqlite dep");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_entry_subpath_is_src_entry_rs() {
|
||||
let spec = make_spec(PayloadSlot::Param(0));
|
||||
let harness = emit(&spec).unwrap();
|
||||
assert_eq!(harness.entry_subpath, Some("src/entry.rs".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_env_var_slot() {
|
||||
let spec = make_spec(PayloadSlot::EnvVar("NYX_INPUT".into()));
|
||||
let harness = emit(&spec).unwrap();
|
||||
assert!(harness.source.contains("set_var"));
|
||||
assert!(harness.source.contains("\"NYX_INPUT\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_param_gt_0_is_unsupported() {
|
||||
let spec = make_spec(PayloadSlot::Param(1));
|
||||
let err = emit(&spec).unwrap_err();
|
||||
assert_eq!(err, UnsupportedReason::EntryKindUnsupported);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cargo_toml_has_correct_bin_target() {
|
||||
let cargo = generate_cargo_toml(Cap::SQL_QUERY);
|
||||
assert!(cargo.contains("name = \"nyx_harness\""));
|
||||
assert!(cargo.contains("path = \"src/main.rs\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn b64_decode_roundtrip() {
|
||||
// Test by compiling: actual b64_decode is in generated code.
|
||||
// Just verify the Cargo.toml generation doesn't panic.
|
||||
let _ = generate_cargo_toml(Cap::FILE_IO);
|
||||
let _ = generate_cargo_toml(Cap::CODE_EXEC);
|
||||
let _ = generate_cargo_toml(Cap::SSRF);
|
||||
}
|
||||
}
|
||||
|
|
@ -111,8 +111,18 @@ pub fn write(
|
|||
let entry_path = root.join("entry").join(format!("extracted_source.{ext}"));
|
||||
fs::write(&entry_path, entry_source.as_bytes())?;
|
||||
|
||||
// harness/harness.py (or other lang ext)
|
||||
let harness_path = root.join("harness").join(format!("harness.{ext}"));
|
||||
// harness/harness.{ext} (or for Rust: harness/src/main.rs)
|
||||
use crate::symbol::Lang;
|
||||
let harness_path = if matches!(spec.lang, Lang::Rust) {
|
||||
let src_dir = root.join("harness").join("src");
|
||||
fs::create_dir_all(&src_dir)?;
|
||||
// Also write Cargo.toml for Rust repro bundles.
|
||||
let cargo_content = crate::dynamic::lang::rust::generate_cargo_toml(spec.expected_cap);
|
||||
fs::write(root.join("harness").join("Cargo.toml"), cargo_content.as_bytes())?;
|
||||
src_dir.join("main.rs")
|
||||
} else {
|
||||
root.join("harness").join(format!("harness.{ext}"))
|
||||
};
|
||||
fs::write(&harness_path, harness_source.as_bytes())?;
|
||||
|
||||
// harness/Dockerfile.harness
|
||||
|
|
@ -232,22 +242,55 @@ fn source_ext_for_lang(lang: &crate::symbol::Lang) -> &'static str {
|
|||
}
|
||||
|
||||
fn dockerfile_for_spec(spec: &HarnessSpec) -> String {
|
||||
let image = format!("python:{}", spec.toolchain_id.strip_prefix("python-").unwrap_or("3"));
|
||||
format!(
|
||||
"FROM {image}\nWORKDIR /harness\nCOPY harness.py .\nCMD [\"python3\", \"harness.py\"]\n"
|
||||
)
|
||||
use crate::symbol::Lang;
|
||||
match spec.lang {
|
||||
Lang::Rust => {
|
||||
let toolchain = spec.toolchain_id.strip_prefix("rust-").unwrap_or("stable");
|
||||
// Multi-stage: build with Rust, run the binary directly.
|
||||
format!(
|
||||
"FROM rust:{toolchain}-slim AS builder\n\
|
||||
WORKDIR /harness\n\
|
||||
COPY Cargo.toml Cargo.lock* ./\n\
|
||||
COPY src/ src/\n\
|
||||
RUN cargo build --release\n\n\
|
||||
FROM debian:bookworm-slim\n\
|
||||
WORKDIR /harness\n\
|
||||
COPY --from=builder /harness/target/release/nyx_harness .\n\
|
||||
CMD [\"/harness/nyx_harness\"]\n"
|
||||
)
|
||||
}
|
||||
Lang::Python => {
|
||||
let image = format!("python:{}", spec.toolchain_id.strip_prefix("python-").unwrap_or("3"));
|
||||
format!(
|
||||
"FROM {image}\nWORKDIR /harness\nCOPY harness.py .\nCMD [\"python3\", \"harness.py\"]\n"
|
||||
)
|
||||
}
|
||||
_ => {
|
||||
format!("# Unsupported language: {:?}\nFROM ubuntu:latest\n", spec.lang)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
|
||||
use crate::symbol::Lang;
|
||||
let run_cmd = match spec.lang {
|
||||
Lang::Rust => {
|
||||
"NYX_PAYLOAD=\"$(cat payload/payload.bin)\" ./harness/nyx_harness".to_owned()
|
||||
}
|
||||
_ => {
|
||||
"NYX_PAYLOAD=\"$(cat payload/payload.bin)\" python3 harness/harness.py".to_owned()
|
||||
}
|
||||
};
|
||||
format!(
|
||||
"#!/bin/sh\n\
|
||||
# Repro script for finding {finding_id} ({payload_label})\n\
|
||||
set -e\n\
|
||||
SCRIPT_DIR=\"$(cd \"$(dirname \"$0\")\" && pwd)\"\n\
|
||||
cd \"$SCRIPT_DIR\"\n\
|
||||
NYX_PAYLOAD=\"$(cat payload/payload.bin)\" python3 harness/harness.py\n",
|
||||
{run_cmd}\n",
|
||||
finding_id = spec.finding_id,
|
||||
payload_label = payload_label,
|
||||
run_cmd = run_cmd,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ use crate::dynamic::corpus::{benign_payload_for, payloads_for, Oracle, Payload};
|
|||
use crate::dynamic::harness::{self, HarnessError};
|
||||
use crate::dynamic::sandbox::{self, SandboxError, SandboxOptions, SandboxOutcome};
|
||||
use crate::dynamic::spec::HarnessSpec;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
/// Max harness-build attempts before giving up.
|
||||
const MAX_BUILD_ATTEMPTS: u32 = 2;
|
||||
|
|
@ -86,28 +87,55 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
|
|||
}
|
||||
};
|
||||
|
||||
// Prepare Python venv for build-time isolation and dependency caching.
|
||||
// Errors from prepare_python propagate as RunError::BuildFailed (making
|
||||
// that variant reachable) or are swallowed for non-fatal failures (Io /
|
||||
// Unsupported), falling back to the system python3 in the harness command.
|
||||
match build_sandbox::prepare_python(spec, &harness.workdir) {
|
||||
Ok(build_result) => {
|
||||
// Patch harness command to use venv Python when the venv was built
|
||||
// or found in cache.
|
||||
if let Some(cmd0) = harness.command.first_mut() {
|
||||
if cmd0 == "python3" || cmd0 == "python" {
|
||||
let venv_python = build_result.venv_path.join("bin").join("python3");
|
||||
if venv_python.exists() {
|
||||
*cmd0 = venv_python.to_string_lossy().into_owned();
|
||||
// Build-time isolation and dependency setup — dispatched by language.
|
||||
match spec.lang {
|
||||
Lang::Python => {
|
||||
// Prepare Python venv for dependency caching.
|
||||
// Errors propagate as RunError::BuildFailed or are swallowed for
|
||||
// non-fatal failures (Io / Unsupported), falling back to system python3.
|
||||
match build_sandbox::prepare_python(spec, &harness.workdir) {
|
||||
Ok(build_result) => {
|
||||
if let Some(cmd0) = harness.command.first_mut() {
|
||||
if cmd0 == "python3" || cmd0 == "python" {
|
||||
let venv_python = build_result.venv_path.join("bin").join("python3");
|
||||
if venv_python.exists() {
|
||||
*cmd0 = venv_python.to_string_lossy().into_owned();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => {
|
||||
return Err(RunError::BuildFailed { stderr, attempts });
|
||||
}
|
||||
Err(_) => {}
|
||||
}
|
||||
}
|
||||
Lang::Rust => {
|
||||
// Compile the harness binary with `cargo build --release`.
|
||||
match build_sandbox::prepare_rust(spec, &harness.workdir) {
|
||||
Ok(build_result) => {
|
||||
// Update command to the compiled binary path.
|
||||
let binary = build_result.venv_path.join("nyx_harness");
|
||||
if binary.exists() {
|
||||
harness.command = vec![binary.to_string_lossy().into_owned()];
|
||||
} else {
|
||||
// Fall back to binary inside the workdir.
|
||||
let fallback = harness.workdir.join("target").join("release").join("nyx_harness");
|
||||
if fallback.exists() {
|
||||
harness.command = vec![fallback.to_string_lossy().into_owned()];
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => {
|
||||
return Err(RunError::BuildFailed { stderr, attempts });
|
||||
}
|
||||
Err(_) => {
|
||||
// Io: fall back to whatever command was set (will likely fail at exec).
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => {
|
||||
return Err(RunError::BuildFailed { stderr, attempts });
|
||||
}
|
||||
Err(_) => {
|
||||
// Io / Unsupported: fall back to system python3 already in command.
|
||||
_ => {
|
||||
// No build step for other interpreted languages.
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,29 @@ use std::path::Path;
|
|||
use std::sync::OnceLock;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
// ── Harness interpretation probe ──────────────────────────────────────────────
|
||||
|
||||
/// Returns true when the harness is driven by an interpreter (Python, Node, …)
|
||||
/// rather than a compiled native binary.
|
||||
///
|
||||
/// Interpreted harnesses can be run inside a Python/Node Docker image directly.
|
||||
/// Compiled harnesses (Rust, C) require a platform-matching binary; the Docker
|
||||
/// backend falls back to the process backend for them in Phase 04.
|
||||
pub fn harness_is_interpreted(command: &[String]) -> bool {
|
||||
let cmd0 = match command.first() {
|
||||
Some(c) => c.as_str(),
|
||||
None => return false,
|
||||
};
|
||||
let base = std::path::Path::new(cmd0)
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or(cmd0);
|
||||
matches!(
|
||||
base,
|
||||
"python3" | "python" | "python2" | "node" | "nodejs" | "ruby" | "php" | "perl"
|
||||
)
|
||||
}
|
||||
|
||||
/// Result of a single sandboxed run.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SandboxOutcome {
|
||||
|
|
@ -201,9 +224,18 @@ pub fn run(
|
|||
opts: &SandboxOptions,
|
||||
) -> Result<SandboxOutcome, SandboxError> {
|
||||
match opts.backend {
|
||||
SandboxBackend::Docker => run_docker(harness, payload, opts),
|
||||
SandboxBackend::Docker => {
|
||||
// Docker backend currently only supports interpreted harnesses.
|
||||
// Compiled binaries (Rust, C) are not yet cross-platform in containers;
|
||||
// fall back to the process backend for them.
|
||||
if harness_is_interpreted(&harness.command) {
|
||||
run_docker(harness, payload, opts)
|
||||
} else {
|
||||
run_process(harness, payload, opts)
|
||||
}
|
||||
}
|
||||
SandboxBackend::Auto => {
|
||||
if docker_available() {
|
||||
if docker_available() && harness_is_interpreted(&harness.command) {
|
||||
run_docker(harness, payload, opts)
|
||||
} else {
|
||||
run_process(harness, payload, opts)
|
||||
|
|
@ -366,15 +398,33 @@ fn exec_in_container(
|
|||
}
|
||||
cmd_args.push(container_name.into());
|
||||
|
||||
// The harness script is at /workdir/{filename} inside the container.
|
||||
let harness_file = harness
|
||||
.command
|
||||
.get(1)
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("harness.py");
|
||||
// Build the exec command inside the container.
|
||||
// For interpreters: `python3 /workdir/harness.py`
|
||||
// For compiled binaries: `/workdir/target/release/nyx_harness`
|
||||
let exec_cmd = harness.command.first().map(|s| s.as_str()).unwrap_or("python3");
|
||||
cmd_args.push(exec_cmd.into());
|
||||
cmd_args.push(format!("/workdir/{harness_file}"));
|
||||
if harness_is_interpreted(&harness.command) {
|
||||
let harness_file = harness
|
||||
.command
|
||||
.get(1)
|
||||
.map(|s| s.as_str())
|
||||
.unwrap_or("harness.py");
|
||||
cmd_args.push(exec_cmd.into());
|
||||
cmd_args.push(format!("/workdir/{harness_file}"));
|
||||
} else {
|
||||
// Compiled binary: the command is the relative path within workdir.
|
||||
// e.g. "target/release/nyx_harness" → run "/workdir/target/release/nyx_harness"
|
||||
let rel = std::path::Path::new(exec_cmd)
|
||||
.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or(exec_cmd);
|
||||
if exec_cmd.contains('/') || exec_cmd.contains('\\') {
|
||||
// Relative path within workdir (e.g. "target/release/nyx_harness").
|
||||
cmd_args.push(format!("/workdir/{exec_cmd}"));
|
||||
} else {
|
||||
// Just a filename — try /workdir directly.
|
||||
cmd_args.push(format!("/workdir/{rel}"));
|
||||
}
|
||||
}
|
||||
|
||||
let mut cmd = Command::new(docker_bin());
|
||||
cmd.args(&cmd_args);
|
||||
|
|
|
|||
|
|
@ -32,10 +32,146 @@ pub enum PinOrigin {
|
|||
Pipfile,
|
||||
/// `runtime.txt` (Heroku-style).
|
||||
RuntimeTxt,
|
||||
/// `rust-toolchain.toml` `[toolchain] channel`.
|
||||
RustToolchainToml,
|
||||
/// `rust-toolchain` (plain text channel file).
|
||||
RustToolchainFile,
|
||||
/// `Cargo.toml` `rust-version` field.
|
||||
CargoToml,
|
||||
/// No pin found; used the system default.
|
||||
SystemDefault,
|
||||
}
|
||||
|
||||
// ── Rust toolchain resolver ───────────────────────────────────────────────────
|
||||
|
||||
/// Resolve the Rust toolchain for `project_root` (§22.2).
|
||||
///
|
||||
/// Reads project pin files in priority order:
|
||||
/// `rust-toolchain.toml` > `rust-toolchain` > `Cargo.toml` `rust-version` > default.
|
||||
pub fn resolve_rust(project_root: &Path) -> ToolchainResolution {
|
||||
if let Some(r) = try_rust_toolchain_toml(project_root) {
|
||||
return r;
|
||||
}
|
||||
if let Some(r) = try_rust_toolchain_file(project_root) {
|
||||
return r;
|
||||
}
|
||||
if let Some(r) = try_cargo_toml_rust_version(project_root) {
|
||||
return r;
|
||||
}
|
||||
default_rust()
|
||||
}
|
||||
|
||||
fn try_rust_toolchain_toml(root: &Path) -> Option<ToolchainResolution> {
|
||||
let content = std::fs::read_to_string(root.join("rust-toolchain.toml")).ok()?;
|
||||
// Look for `channel = "stable"` or `channel = "1.75"` in [toolchain] section.
|
||||
let mut in_toolchain = false;
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
if line == "[toolchain]" {
|
||||
in_toolchain = true;
|
||||
continue;
|
||||
}
|
||||
if line.starts_with('[') {
|
||||
in_toolchain = false;
|
||||
}
|
||||
if in_toolchain && line.starts_with("channel") {
|
||||
if let Some(ver) = extract_version_from_toml_value(line) {
|
||||
return Some(map_rust_version(&ver, RustPinOrigin::RustToolchainToml));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn try_rust_toolchain_file(root: &Path) -> Option<ToolchainResolution> {
|
||||
let content = std::fs::read_to_string(root.join("rust-toolchain")).ok()?;
|
||||
let version = content.trim().to_owned();
|
||||
if version.is_empty() {
|
||||
return None;
|
||||
}
|
||||
// Simple format: just the channel name (e.g. "stable", "1.75.0", "nightly-2024-01-01")
|
||||
Some(map_rust_version(&version, RustPinOrigin::RustToolchainFile))
|
||||
}
|
||||
|
||||
fn try_cargo_toml_rust_version(root: &Path) -> Option<ToolchainResolution> {
|
||||
let content = std::fs::read_to_string(root.join("Cargo.toml")).ok()?;
|
||||
for line in content.lines() {
|
||||
let line = line.trim();
|
||||
if line.starts_with("rust-version") {
|
||||
if let Some(ver) = extract_version_from_toml_value(line) {
|
||||
return Some(map_rust_version(&ver, RustPinOrigin::CargoToml));
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn default_rust() -> ToolchainResolution {
|
||||
ToolchainResolution {
|
||||
toolchain_id: "rust-stable".to_owned(),
|
||||
pin_origin: PinOrigin::SystemDefault,
|
||||
toolchain_drift: false,
|
||||
version_string: "stable".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Internal origin enum for Rust (mapped to PinOrigin for the public API).
|
||||
enum RustPinOrigin {
|
||||
RustToolchainToml,
|
||||
RustToolchainFile,
|
||||
CargoToml,
|
||||
}
|
||||
|
||||
fn map_rust_version(version: &str, origin: RustPinOrigin) -> ToolchainResolution {
|
||||
let pin_origin = match origin {
|
||||
RustPinOrigin::RustToolchainToml => PinOrigin::RustToolchainToml,
|
||||
RustPinOrigin::RustToolchainFile => PinOrigin::RustToolchainFile,
|
||||
RustPinOrigin::CargoToml => PinOrigin::CargoToml,
|
||||
};
|
||||
|
||||
// Named channels.
|
||||
if version == "stable" || version.is_empty() {
|
||||
return ToolchainResolution {
|
||||
toolchain_id: "rust-stable".to_owned(),
|
||||
pin_origin,
|
||||
toolchain_drift: false,
|
||||
version_string: "stable".to_owned(),
|
||||
};
|
||||
}
|
||||
if version.starts_with("nightly") {
|
||||
return ToolchainResolution {
|
||||
toolchain_id: "rust-nightly".to_owned(),
|
||||
pin_origin,
|
||||
toolchain_drift: true, // nightly != stable reference image
|
||||
version_string: version.to_owned(),
|
||||
};
|
||||
}
|
||||
if version.starts_with("beta") {
|
||||
return ToolchainResolution {
|
||||
toolchain_id: "rust-beta".to_owned(),
|
||||
pin_origin,
|
||||
toolchain_drift: true,
|
||||
version_string: version.to_owned(),
|
||||
};
|
||||
}
|
||||
|
||||
// Semver pinned version like "1.75.0" or "1.75".
|
||||
let parts: Vec<&str> = version.splitn(3, '.').collect();
|
||||
let major = parts.first().copied().unwrap_or("1");
|
||||
let minor = parts.get(1).copied();
|
||||
|
||||
// Map to stable; drift = true when exact version differs from "stable".
|
||||
let drift = minor.is_some(); // pin to specific version = drift from "stable" label
|
||||
ToolchainResolution {
|
||||
toolchain_id: format!("rust-{major}.{}", minor.unwrap_or("x")),
|
||||
pin_origin,
|
||||
toolchain_drift: drift,
|
||||
version_string: version.to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
// ── Python toolchain resolver ─────────────────────────────────────────────────
|
||||
|
||||
/// Resolve the Python toolchain for `project_root`.
|
||||
///
|
||||
/// Reads project pin files in priority order:
|
||||
|
|
@ -220,4 +356,49 @@ mod tests {
|
|||
let r = resolve_python(dir.path());
|
||||
assert_eq!(r.pin_origin, PinOrigin::SystemDefault);
|
||||
}
|
||||
|
||||
// ── Rust toolchain tests ─────────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn rust_toolchain_toml_stable() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(
|
||||
dir.path().join("rust-toolchain.toml"),
|
||||
"[toolchain]\nchannel = \"stable\"\n",
|
||||
).unwrap();
|
||||
let r = resolve_rust(dir.path());
|
||||
assert_eq!(r.toolchain_id, "rust-stable");
|
||||
assert!(!r.toolchain_drift);
|
||||
assert_eq!(r.pin_origin, PinOrigin::RustToolchainToml);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_toolchain_file_nightly() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(dir.path().join("rust-toolchain"), "nightly\n").unwrap();
|
||||
let r = resolve_rust(dir.path());
|
||||
assert_eq!(r.toolchain_id, "rust-nightly");
|
||||
assert!(r.toolchain_drift);
|
||||
assert_eq!(r.pin_origin, PinOrigin::RustToolchainFile);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cargo_toml_rust_version() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
fs::write(
|
||||
dir.path().join("Cargo.toml"),
|
||||
"[package]\nname = \"foo\"\nrust-version = \"1.75\"\n",
|
||||
).unwrap();
|
||||
let r = resolve_rust(dir.path());
|
||||
assert_eq!(r.pin_origin, PinOrigin::CargoToml);
|
||||
assert!(r.toolchain_id.starts_with("rust-1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_default_is_stable() {
|
||||
let dir = TempDir::new().unwrap();
|
||||
let r = resolve_rust(dir.path());
|
||||
assert_eq!(r.toolchain_id, "rust-stable");
|
||||
assert_eq!(r.pin_origin, PinOrigin::SystemDefault);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,8 +94,12 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
|||
}
|
||||
}
|
||||
|
||||
// Resolve toolchain information.
|
||||
let toolchain_res = toolchain::resolve_python(Path::new("."));
|
||||
// Resolve toolchain information (lang-aware: §22.2).
|
||||
use crate::symbol::Lang;
|
||||
let toolchain_res = match spec.lang {
|
||||
Lang::Rust => toolchain::resolve_rust(Path::new(".")),
|
||||
_ => toolchain::resolve_python(Path::new(".")),
|
||||
};
|
||||
let toolchain_match = if toolchain_res.toolchain_drift { "drift" } else { "exact" };
|
||||
|
||||
let start = Instant::now();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue