Add native binary Docker backend for Rust/Go and enhance sandboxing

This commit is contained in:
elipeter 2026-05-12 02:56:51 -04:00
parent 62bd480db3
commit 09553f5b4c
8 changed files with 424 additions and 17 deletions

View file

@ -233,8 +233,19 @@ jobs:
- uses: taiki-e/install-action@nextest
- name: Pull python image for sandbox tests
run: docker pull python:3-slim
- name: Pull language images for sandbox tests
run: |
docker pull python:3-slim
docker pull node:20-slim
docker pull eclipse-temurin:21-jre-jammy
docker pull php:8-cli
- name: Smoke-test interpreter availability
run: |
docker run --rm python:3-slim python3 --version
docker run --rm node:20-slim node --version
docker run --rm eclipse-temurin:21-jre-jammy java -version
docker run --rm php:8-cli php --version
- name: Rust tests with docker (sandbox escape gate)
run: cargo nextest run --all-features --test dynamic_sandbox_escape --test dynamic_parity

View file

@ -273,24 +273,68 @@ fn dockerfile_for_spec(spec: &HarnessSpec) -> String {
fn reproduce_script(spec: &HarnessSpec, payload_label: &str) -> String {
use crate::symbol::Lang;
let run_cmd = match spec.lang {
Lang::Rust => {
"NYX_PAYLOAD=\"$(cat payload/payload.bin)\" ./harness/nyx_harness".to_owned()
}
_ => {
"NYX_PAYLOAD=\"$(cat payload/payload.bin)\" python3 harness/harness.py".to_owned()
}
// Shell command for the process backend (relative to SCRIPT_DIR).
let process_run_cmd = match spec.lang {
Lang::Rust | Lang::Go => "./harness/nyx_harness".to_owned(),
Lang::Python => "python3 ./harness/harness.py".to_owned(),
Lang::JavaScript | Lang::TypeScript => "node ./harness/harness.js".to_owned(),
Lang::Java => "java -cp ./harness NyxHarness".to_owned(),
Lang::Php => "php ./harness/harness.php".to_owned(),
_ => "echo 'unsupported language' >&2; exit 2".to_owned(),
};
// Docker image tag is derived from spec_hash so each finding gets its own image.
let image_tag = format!("nyx-repro-{}", spec.spec_hash);
// Double braces escape literal { } in Rust format strings.
format!(
"#!/bin/sh\n\
# Repro script for finding {finding_id} ({payload_label})\n\
# Nyx dynamic repro finding {finding_id} / payload {payload_label}\n\
#\n\
# Usage:\n\
# ./reproduce.sh run via process backend (direct)\n\
# ./reproduce.sh --docker run via Docker backend (isolated)\n\
#\n\
# Exits 0 when sink_hit matches expected/outcome.json, 1 on mismatch.\n\
set -e\n\
SCRIPT_DIR=\"$(cd \"$(dirname \"$0\")\" && pwd)\"\n\
cd \"$SCRIPT_DIR\"\n\
{run_cmd}\n",
PAYLOAD=\"$(cat payload/payload.bin)\"\n\
EXPECTED_SINK=$(grep -o '\"sink_hit\"[[:space:]]*:[[:space:]]*[a-z]*' \\\n\
expected/outcome.json | grep -o '[a-z]*$')\n\
\n\
if [ \"${{1:-}}\" = \"--docker\" ]; then\n\
if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then\n\
echo 'error: docker not available' >&2; exit 2\n\
fi\n\
IMAGE=\"{image_tag}\"\n\
docker build -t \"$IMAGE\" -f harness/Dockerfile.harness harness/ >/dev/null\n\
ACTUAL=$(docker run --rm --cap-drop=ALL \
--security-opt no-new-privileges:true --network none \
-e NYX_PAYLOAD=\"$PAYLOAD\" \"$IMAGE\" 2>&1) || ACTUAL=''\n\
docker rmi \"$IMAGE\" >/dev/null 2>&1 || true\n\
else\n\
ACTUAL=$(NYX_PAYLOAD=\"$PAYLOAD\" {process_run_cmd} 2>&1) || ACTUAL=''\n\
fi\n\
\n\
if echo \"$ACTUAL\" | grep -q '__NYX_SINK_HIT__'; then\n\
ACTUAL_SINK=true\n\
else\n\
ACTUAL_SINK=false\n\
fi\n\
\n\
if [ \"$ACTUAL_SINK\" = \"$EXPECTED_SINK\" ]; then\n\
echo \"PASS: sink_hit=$ACTUAL_SINK (matches expected)\"\n\
exit 0\n\
else\n\
echo \"FAIL: sink_hit=$ACTUAL_SINK expected=$EXPECTED_SINK\"\n\
exit 1\n\
fi\n",
finding_id = spec.finding_id,
payload_label = payload_label,
run_cmd = run_cmd,
process_run_cmd = process_run_cmd,
image_tag = image_tag,
)
}

View file

@ -34,8 +34,8 @@ use std::time::{Duration, Instant};
/// rather than a compiled native binary.
///
/// Interpreted harnesses can be run inside a Python/Node Docker image directly.
/// Compiled harnesses (Rust, C) require a platform-matching binary; the Docker
/// backend falls back to the process backend for them in Phase 04.
/// Compiled harnesses (Rust, Go) are routed to `run_native_binary_docker` on
/// Linux or to the process backend on other platforms.
pub fn harness_is_interpreted(command: &[String]) -> bool {
let cmd0 = match command.first() {
Some(c) => c.as_str(),
@ -51,6 +51,34 @@ pub fn harness_is_interpreted(command: &[String]) -> bool {
)
}
/// Returns true when the harness is a compiled native binary that can be run
/// inside a Linux Docker container.
///
/// Compiled harnesses (Rust, Go) set `command[0]` to an absolute path after
/// `prepare_rust()` / `prepare_go()` succeeds. This distinguishes them from
/// interpreter commands (bare names like `python3`) and lets the Docker backend
/// route them to `run_native_binary_docker` instead of the process backend.
///
/// Only returns true on Linux: native binaries compiled on macOS or Windows are
/// not Linux ELF and cannot execute in Linux Docker containers.
pub fn harness_is_native_binary(command: &[String]) -> bool {
if !cfg!(target_os = "linux") {
return false;
}
match command.first() {
Some(cmd) => {
std::path::Path::new(cmd.as_str()).is_absolute() && !harness_is_interpreted(command)
}
None => false,
}
}
/// Docker image used to run compiled native binaries (Rust, Go).
///
/// `debian:bookworm-slim` provides glibc and a minimal runtime compatible with
/// dynamically-linked Rust/Go binaries produced by the standard toolchains.
const NATIVE_BINARY_IMAGE: &str = "debian:bookworm-slim";
/// Result of a single sandboxed run.
#[derive(Debug, Clone)]
pub struct SandboxOutcome {
@ -239,11 +267,10 @@ pub fn run(
) -> Result<SandboxOutcome, SandboxError> {
match opts.backend {
SandboxBackend::Docker => {
// Docker backend currently only supports interpreted harnesses.
// Compiled binaries (Rust, C) are not yet cross-platform in containers;
// fall back to the process backend for them.
if harness_is_interpreted(&harness.command) {
run_docker(harness, payload, opts)
} else if harness_is_native_binary(&harness.command) {
run_native_binary_docker(harness, payload, opts)
} else {
run_process(harness, payload, opts)
}
@ -251,6 +278,8 @@ pub fn run(
SandboxBackend::Auto => {
if docker_available() && harness_is_interpreted(&harness.command) {
run_docker(harness, payload, opts)
} else if docker_available() && harness_is_native_binary(&harness.command) {
run_native_binary_docker(harness, payload, opts)
} else {
run_process(harness, payload, opts)
}
@ -578,6 +607,175 @@ fn detect_image_for_harness(harness: &BuiltHarness) -> String {
}
}
// ── Native binary Docker backend ──────────────────────────────────────────────
/// Docker backend for compiled native binaries (Rust, Go).
///
/// Starts a `debian:bookworm-slim` container (glibc-compatible runtime), copies
/// the compiled binary into it, then executes it via `docker exec`. This gives
/// the same `--cap-drop=ALL` / `--network none` isolation as the interpreted
/// harness path.
///
/// Only reachable on Linux (see [`harness_is_native_binary`]). On other platforms
/// the dispatch in [`run`] routes compiled harnesses to [`run_process`].
fn run_native_binary_docker(
harness: &BuiltHarness,
payload: &Payload,
opts: &SandboxOptions,
) -> Result<SandboxOutcome, SandboxError> {
if !is_docker_reachable() {
return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker));
}
let binary_path = match harness.command.first() {
Some(p) => p.clone(),
None => return Err(SandboxError::Spawn(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"empty command for native binary",
))),
};
let container_name = workdir_to_container_name(&harness.workdir);
let registry = container_registry();
let reused = if registry.contains_key(&container_name) {
is_container_running(&container_name)
} else {
false
};
if !reused {
start_container(&container_name, &harness.workdir, NATIVE_BINARY_IMAGE)?;
// Copy the compiled binary into the container as /workdir/nyx_harness.
let cp_dst = format!("{container_name}:/workdir/nyx_harness");
let cp_status = std::process::Command::new(docker_bin())
.args(["cp", &binary_path, &cp_dst])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(SandboxError::Io)?;
if !cp_status.success() {
return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker));
}
// Ensure execute bit is set (docker cp preserves it on Linux, but be explicit).
let chmod_status = std::process::Command::new(docker_bin())
.args(["exec", &container_name, "chmod", "+x", "/workdir/nyx_harness"])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(SandboxError::Io)?;
if !chmod_status.success() {
return Err(SandboxError::BackendUnavailable(SandboxBackend::Docker));
}
registry.insert(container_name.clone(), container_name.clone());
}
exec_native_binary_in_container(&container_name, harness, payload, opts)
}
/// Execute a native binary already in the container at `/workdir/nyx_harness`.
fn exec_native_binary_in_container(
container_name: &str,
harness: &BuiltHarness,
payload: &Payload,
opts: &SandboxOptions,
) -> Result<SandboxOutcome, SandboxError> {
use std::io::Read;
use std::process::{Command, Stdio};
let payload_b64 = base64_encode(payload.bytes);
let mut cmd_args: Vec<String> = vec![
"exec".into(),
"-i".into(),
"--user".into(), "65534:65534".into(),
"-e".into(), format!("NYX_PAYLOAD_B64={payload_b64}"),
];
for (k, v) in &harness.env {
cmd_args.push("-e".into());
cmd_args.push(format!("{k}={v}"));
}
cmd_args.push(container_name.into());
cmd_args.push("/workdir/nyx_harness".into());
let mut cmd = Command::new(docker_bin());
cmd.args(&cmd_args);
cmd.stdout(Stdio::piped());
cmd.stderr(Stdio::piped());
let start = std::time::Instant::now();
let mut child = cmd.spawn().map_err(SandboxError::Spawn)?;
let timeout = opts.timeout;
let timed_out = std::sync::Arc::new(std::sync::atomic::AtomicBool::new(false));
let timed_out_clone = timed_out.clone();
let child_id = child.id();
let container_name_for_kill = container_name.to_owned();
let _timer = std::thread::spawn(move || {
std::thread::sleep(timeout);
timed_out_clone.store(true, std::sync::atomic::Ordering::SeqCst);
#[cfg(unix)]
libc_kill(child_id as i32, 9);
#[cfg(not(unix))]
let _ = child_id;
let _ = std::process::Command::new(docker_bin())
.args(["exec", &container_name_for_kill, "kill", "-9", "-1"])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status();
});
let limit = opts.output_limit;
let stdout_pipe = child.stdout.take();
let stderr_pipe = child.stderr.take();
let stdout_handle = stdout_pipe.map(|s| {
std::thread::spawn(move || -> std::io::Result<Vec<u8>> {
let mut buf = Vec::new();
std::io::Read::take(s, limit as u64).read_to_end(&mut buf)?;
Ok(buf)
})
});
let stderr_handle = stderr_pipe.map(|s| {
std::thread::spawn(move || -> std::io::Result<Vec<u8>> {
let mut buf = Vec::new();
std::io::Read::take(s, limit as u64).read_to_end(&mut buf)?;
Ok(buf)
})
});
let status = child.wait().map_err(SandboxError::Io)?;
let stdout_buf = stdout_handle
.and_then(|h| h.join().ok())
.and_then(|r| r.ok())
.unwrap_or_default();
let stderr_buf = stderr_handle
.and_then(|h| h.join().ok())
.and_then(|r| r.ok())
.unwrap_or_default();
let duration = start.elapsed();
let did_time_out = timed_out.load(std::sync::atomic::Ordering::SeqCst);
let exit_code = if did_time_out { None } else { status.code() };
const SINK_HIT_SENTINEL: &[u8] = b"__NYX_SINK_HIT__";
let sink_hit = contains_subslice(&stdout_buf, SINK_HIT_SENTINEL)
|| contains_subslice(&stderr_buf, SINK_HIT_SENTINEL);
Ok(SandboxOutcome {
exit_code,
stdout: stdout_buf,
stderr: stderr_buf,
timed_out: did_time_out,
oob_callback_seen: false,
sink_hit,
duration,
})
}
// ── Process backend ───────────────────────────────────────────────────────────
/// Process backend: spawns the harness command in a subprocess with timeout,
@ -961,4 +1159,42 @@ mod tests {
reg.insert(name.clone(), name.clone());
assert!(reg.contains_key(&name));
}
#[test]
fn harness_is_native_binary_absolute_path() {
let abs = "/home/ci/.cache/nyx/dynamic/build-cache/abc123-rust-stable/nyx_harness";
let cmd = vec![abs.to_owned()];
// On Linux: absolute path + not an interpreter → native binary.
// On other platforms: always false (not ELF).
#[cfg(target_os = "linux")]
assert!(harness_is_native_binary(&cmd));
#[cfg(not(target_os = "linux"))]
assert!(!harness_is_native_binary(&cmd));
}
#[test]
fn harness_is_native_binary_relative_path_false() {
// Relative paths are not detected as native binaries.
let cmd = vec!["./nyx_harness".to_owned()];
assert!(!harness_is_native_binary(&cmd));
}
#[test]
fn harness_is_native_binary_interpreter_false() {
let cmd = vec!["python3".to_owned(), "harness.py".to_owned()];
assert!(!harness_is_native_binary(&cmd));
}
#[test]
fn harness_is_native_binary_empty_false() {
assert!(!harness_is_native_binary(&[]));
}
#[test]
fn harness_is_native_binary_node_absolute_path_false() {
// Even an absolute path to an interpreter is not a native binary.
let cmd = vec!["/usr/bin/node".to_owned(), "harness.js".to_owned()];
// node is in the interpreter list → not native binary
assert!(!harness_is_native_binary(&cmd));
}
}

View file

@ -0,0 +1,15 @@
/// XSS — adversarial collision fixture.
///
/// Prints the XSS oracle marker unconditionally without going through an HTML
/// sink and without printing the sink-reachability sentinel.
///
/// Expected verdict: Inconclusive(OracleCollisionSuspected)
/// oracle_fired = true (OutputContains("<script>NYX_XSS_CONFIRMED</script>"))
/// sink_hit = false (__NYX_SINK_HIT__ never printed)
/// Cap: HTML_ESCAPE Entry: `run(payload: &str)`
pub fn run(payload: &str) {
// Coincidental oracle match — not an HTML sink.
println!("<script>NYX_XSS_CONFIRMED</script>");
// Ensure payload is consumed so the compiler does not optimise it away.
let _ = payload.len();
}

View file

@ -0,0 +1,16 @@
/// XSS — negative fixture.
///
/// Safe function: HTML-escapes user input before embedding in output.
/// Expected verdict: NotConfirmed (XSS payload is escaped; no raw script tag in output).
/// Cap: HTML_ESCAPE Entry: `run(payload: &str)`
pub fn run(payload: &str) {
// Safe: escape all HTML special characters before rendering.
let escaped = payload
.replace('&', "&amp;")
.replace('<', "&lt;")
.replace('>', "&gt;")
.replace('"', "&quot;")
.replace('\'', "&#x27;");
let html = format!("<div class='comment'>{}</div>", escaped);
println!("{}", html);
}

View file

@ -0,0 +1,12 @@
/// XSS — positive fixture.
///
/// Vulnerable function: echoes user input directly into HTML without escaping.
/// Expected verdict: Confirmed (XSS payload echoed verbatim to output).
/// Cap: HTML_ESCAPE Entry: `run(payload: &str)`
pub fn run(payload: &str) {
// Vulnerable: direct string interpolation into HTML output.
println!("__NYX_SINK_HIT__");
let _ = std::io::Write::flush(&mut std::io::stdout());
let html = format!("<div class='comment'>{}</div>", payload);
println!("{}", html);
}

View file

@ -0,0 +1,16 @@
/// XSS — unsupported entry-kind fixture.
///
/// The vulnerable logic lives inside a struct method. The test creates a Diag
/// with Confidence::Low, so `HarnessSpec::from_finding` returns
/// `Err(UnsupportedReason::ConfidenceTooLow)`.
///
/// Expected verdict: Unsupported(ConfidenceTooLow)
/// Cap: HTML_ESCAPE
pub struct PageRenderer;
impl PageRenderer {
pub fn render(&self, user_input: &str) -> String {
// Vulnerable: no HTML escaping.
format!("<div>{}</div>", user_input)
}
}

View file

@ -283,6 +283,63 @@ mod rust_fixture_tests {
);
}
// ── XSS fixtures ─────────────────────────────────────────────────────────
#[test]
fn xss_positive_is_confirmed() {
let result = run_fixture("xss_positive.rs", "run", Cap::HTML_ESCAPE, 11);
assert_eq!(
result.status,
VerifyStatus::Confirmed,
"xss_positive must be Confirmed; got {:?} (detail: {:?})",
result.status,
result.detail
);
assert!(
result.triggered_payload.is_some(),
"Confirmed result must have triggered_payload"
);
}
#[test]
fn xss_negative_is_not_confirmed() {
let result = run_fixture("xss_negative.rs", "run", Cap::HTML_ESCAPE, 15);
assert_eq!(
result.status,
VerifyStatus::NotConfirmed,
"xss_negative must be NotConfirmed; got {:?} (detail: {:?})",
result.status,
result.detail
);
}
#[test]
fn xss_unsupported_is_unsupported() {
let path = fixture_path("xss_unsupported.rs");
let mut d = make_diag(&path, "render", Cap::HTML_ESCAPE, 14);
d.confidence = Some(Confidence::Low);
let opts = VerifyOptions::default();
let result = verify_finding(&d, &opts);
assert_eq!(result.status, VerifyStatus::Unsupported);
assert_eq!(result.reason, Some(UnsupportedReason::ConfidenceTooLow));
}
#[test]
fn xss_adversarial_is_inconclusive_collision() {
let result = run_fixture("xss_adversarial.rs", "run", Cap::HTML_ESCAPE, 999);
assert_eq!(
result.status,
VerifyStatus::Inconclusive,
"xss_adversarial must be Inconclusive; got {:?}",
result.status
);
assert_eq!(
result.inconclusive_reason,
Some(InconclusiveReason::OracleCollisionSuspected),
"adversarial must be OracleCollisionSuspected"
);
}
// ── Variant fixtures (smoke-test second positive paths) ──────────────────
#[test]