From 03b698ddc101949477875dd73344b018b396cf85 Mon Sep 17 00:00:00 2001 From: elipeter Date: Thu, 4 Jun 2026 13:53:29 -0500 Subject: [PATCH] fixed dynamic sandbox hardening to graft /proc --- src/dynamic/build_pool/java.rs | 75 +++++++- src/dynamic/lang/js_shared.rs | 28 ++- src/dynamic/sandbox/mod.rs | 26 +++ src/dynamic/sandbox/process_linux.rs | 178 +++++++++++++++--- .../sandbox/seccomp/seccomp_policy.toml | 2 + 5 files changed, 277 insertions(+), 32 deletions(-) diff --git a/src/dynamic/build_pool/java.rs b/src/dynamic/build_pool/java.rs index 3862ebbd..6ce907da 100644 --- a/src/dynamic/build_pool/java.rs +++ b/src/dynamic/build_pool/java.rs @@ -739,8 +739,9 @@ mod tests { let parser = dir.join(format!("{WORKER_CLASS}$Parser.class")); let request = dir.join(format!("{WORKER_CLASS}$Request.class")); let manifest = dir.join(WORKER_MANIFEST); - let manifest_body = - format!("{WORKER_CLASS}.class\n{WORKER_CLASS}$Parser.class\n{WORKER_CLASS}$Request.class"); + let manifest_body = format!( + "{WORKER_CLASS}.class\n{WORKER_CLASS}$Parser.class\n{WORKER_CLASS}$Request.class" + ); // Nothing on disk yet. assert!(!worker_class_ready(dir)); @@ -787,7 +788,11 @@ mod tests { let tmp = tempfile::TempDir::new().unwrap(); let dir = tmp.path(); std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap(); - std::fs::write(dir.join(format!("{WORKER_CLASS}.class")), b"\xca\xfe\xba\xbe").unwrap(); + std::fs::write( + dir.join(format!("{WORKER_CLASS}.class")), + b"\xca\xfe\xba\xbe", + ) + .unwrap(); // Manifest names only the top-level class -- exactly what poisoned // the persisted bootstrap cache. std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap(); @@ -797,13 +802,57 @@ mod tests { ); // Drop in the nested classes the worker actually loads -> ready. - std::fs::write(dir.join(format!("{WORKER_CLASS}$Parser.class")), b"\xca\xfe\xba\xbe") - .unwrap(); - std::fs::write(dir.join(format!("{WORKER_CLASS}$Request.class")), b"\xca\xfe\xba\xbe") - .unwrap(); + std::fs::write( + dir.join(format!("{WORKER_CLASS}$Parser.class")), + b"\xca\xfe\xba\xbe", + ) + .unwrap(); + std::fs::write( + dir.join(format!("{WORKER_CLASS}$Request.class")), + b"\xca\xfe\xba\xbe", + ) + .unwrap(); assert!(worker_class_ready(dir)); } + #[test] + fn ensure_worker_compiled_heals_partial_cache() { + // End-to-end heal: seed the exact poisoned-cache shape that broke + // Linux (top-level class + a one-line manifest, nested classes + // absent) and confirm `ensure_worker_compiled` recompiles a full, + // loadable class set instead of trusting the stale manifest. + let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned()); + let have_javac = std::process::Command::new(&javac) + .arg("-version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if !have_javac { + return; // No JDK on this host: nothing to recompile with. + } + let tmp = tempfile::TempDir::new().unwrap(); + let dir = tmp.path(); + std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap(); + std::fs::write( + dir.join(format!("{WORKER_CLASS}.class")), + b"\xca\xfe\xba\xbe", + ) + .unwrap(); + std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap(); + assert!( + !worker_class_ready(dir), + "poisoned cache must read not-ready" + ); + + ensure_worker_compiled(dir).expect("recompile heals the cache"); + + assert!(worker_class_ready(dir), "healed cache must read ready"); + for cls in WORKER_CLASS_FILES { + let meta = std::fs::metadata(dir.join(cls)).expect("class published"); + assert!(meta.len() > 0, "{cls} must be a real (non-empty) class"); + } + } + #[test] fn worker_class_files_match_javac_output() { // Guards `WORKER_CLASS_FILES` against drift: compile the embedded @@ -859,8 +908,16 @@ mod tests { // Simulate javac output: top-level + nested classes plus a // non-class artifact that must be ignored. std::fs::write(staging.join("NyxJavacWorker.class"), b"\xca\xfe\xba\xbe").unwrap(); - std::fs::write(staging.join("NyxJavacWorker$Parser.class"), b"\xca\xfe\xba\xbe").unwrap(); - std::fs::write(staging.join("NyxJavacWorker$Request.class"), b"\xca\xfe\xba\xbe").unwrap(); + std::fs::write( + staging.join("NyxJavacWorker$Parser.class"), + b"\xca\xfe\xba\xbe", + ) + .unwrap(); + std::fs::write( + staging.join("NyxJavacWorker$Request.class"), + b"\xca\xfe\xba\xbe", + ) + .unwrap(); std::fs::write(staging.join("notes.txt"), b"ignore me").unwrap(); publish_class_set(&staging, dir).expect("publish"); diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index ec6b0267..efe2c24e 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -2729,15 +2729,41 @@ const TS_ENTRY_LOADER_JS: &str = r#"function nyxEsmToCjs(src) { return src + suffix; } +// Best-effort TypeScript type erasure for Node runtimes that lack +// `module.stripTypeScriptTypes` (added in Node 22.6; CI runs node-20). +// Applied only as a fallback, only *after* the ESM->CJS rewrite (so an +// `import * as x` line is already gone and cannot be mistaken for an +// `as` cast), and behind the same try/catch as native loading: if a +// regex over-strips into invalid JS, `_compile` throws and the caller +// drops to the synthetic direct-sink path — never worse than today's +// always-fails-on-node-20 behaviour. +function nyxStripTsTypes(src) { + return src + // `interface X { ... }` declarations. + .replace(/^\s*(export\s+)?interface\s+[A-Za-z_$][\w$]*\s*(<[^>]*>)?\s*\{[\s\S]*?\n\}/gm, '') + // top-level `type X = ...;` aliases. + .replace(/^\s*(export\s+)?type\s+[A-Za-z_$][\w$]*\s*(<[^>]*>)?\s*=[^\n;]*;?\s*$/gm, '') + // `x as Type` / `x as const` casts. + .replace(/\bas\s+(const\b|[A-Za-z_$][\w$.\[\]<>| ]*)/g, '') + // typed variable declarations: `const x: T =` -> `const x =`. + .replace(/\b(const|let|var)\s+([A-Za-z_$][\w$]*)\s*:\s*[A-Za-z_$][\w$.\[\]<>| ]*(?=\s*=)/g, '$1 $2') + // function/arrow return types: `): T {` / `): T =>` -> `) {` / `) =>`. + .replace(/\)\s*:\s*[A-Za-z_$][\w$.\[\]<>| ]*(?=\s*(\{|=>))/g, ')') + // parameter type annotations: `(name: T` / `, name: T` before , or ). + .replace(/([(,]\s*[A-Za-z_$][\w$]*)\s*:\s*[A-Za-z_$][\w$.\[\]<>| ]*(?=\s*[,)])/g, '$1'); +} + function nyxLoadTsEntry(file) { const fs = require('fs'); const Module = require('module'); const path = require('path'); let src = fs.readFileSync(file, 'utf8'); + let stripped = false; if (typeof Module.stripTypeScriptTypes === 'function') { - try { src = Module.stripTypeScriptTypes(src, { mode: 'transform' }); } catch (e) { /* fall through with raw source */ } + try { src = Module.stripTypeScriptTypes(src, { mode: 'transform' }); stripped = true; } catch (e) { /* fall through with raw source */ } } src = nyxEsmToCjs(src); + if (!stripped) { src = nyxStripTsTypes(src); } const m = new Module(file, module); m.filename = path.resolve(file); m.paths = Module._nodeModulePaths(path.dirname(m.filename)); diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs index 06bdf1fe..ed2b9cfb 100644 --- a/src/dynamic/sandbox/mod.rs +++ b/src/dynamic/sandbox/mod.rs @@ -1695,6 +1695,32 @@ fn run_process( let (effective_cmd_path, effective_cmd_args): (std::path::PathBuf, Vec) = (resolved_cmd_path.clone(), harness.command[1..].to_vec()); + // Phase 17 follow-up: when the Strict profile will `chroot(workdir)` in + // pre_exec, the workdir becomes the filesystem root for the harness, so + // any command token that is an absolute path *under* the workdir + // (`/nyx_harness`, the staged probe, an interpreter script) + // resolves against `//…` post-chroot and dies with + // ENOENT at execve. Reroot each such token to its chroot-relative + // form (`/nyx_harness`); tokens outside the workdir (the bind-mounted + // `/usr/bin` interpreter, literal flags) pass through untouched. + #[cfg(target_os = "linux")] + let (effective_cmd_path, effective_cmd_args) = if process_linux::chroot_will_apply(opts) { + let canon_workdir = + std::fs::canonicalize(&harness.workdir).unwrap_or_else(|_| harness.workdir.clone()); + let path = process_linux::reroot_under_chroot( + &effective_cmd_path, + &canon_workdir, + &harness.workdir, + ); + let args = effective_cmd_args + .iter() + .map(|a| process_linux::reroot_arg_under_chroot(a, &canon_workdir, &harness.workdir)) + .collect(); + (path, args) + } else { + (effective_cmd_path, effective_cmd_args) + }; + let mut cmd = Command::new(&effective_cmd_path); cmd.args(&effective_cmd_args); cmd.current_dir(&harness.workdir); diff --git a/src/dynamic/sandbox/process_linux.rs b/src/dynamic/sandbox/process_linux.rs index 86823c5e..4482dc4f 100644 --- a/src/dynamic/sandbox/process_linux.rs +++ b/src/dynamic/sandbox/process_linux.rs @@ -295,9 +295,9 @@ unsafe extern "C" { fn chroot(path: *const i8) -> i32; fn chdir(path: *const i8) -> i32; fn mount( - source: *const i8, - target: *const i8, - fstype: *const i8, + source: *const core::ffi::c_char, + target: *const core::ffi::c_char, + fstype: *const core::ffi::c_char, flags: u64, data: *const core::ffi::c_void, ) -> i32; @@ -419,9 +419,9 @@ fn apply_bind_mounts(mounts: &[BindMount]) { // every pointer references a valid C string for the duration of the call. let r = unsafe { mount( - m.source_nul.as_ptr() as *const i8, - m.dest_nul.as_ptr() as *const i8, - none.as_ptr() as *const i8, + m.source_nul.as_ptr() as *const core::ffi::c_char, + m.dest_nul.as_ptr() as *const core::ffi::c_char, + none.as_ptr() as *const core::ffi::c_char, MS_BIND, std::ptr::null(), ) @@ -434,7 +434,7 @@ fn apply_bind_mounts(mounts: &[BindMount]) { unsafe { mount( std::ptr::null(), - m.dest_nul.as_ptr() as *const i8, + m.dest_nul.as_ptr() as *const core::ffi::c_char, std::ptr::null(), MS_REMOUNT | MS_BIND | MS_RDONLY, std::ptr::null(), @@ -618,7 +618,18 @@ fn run_pre_exec_in_child(plan: &PreExecPlan) -> HardeningOutcome { // the new mount namespace catches them) and before chroot (so the // bind sources are still reachable at their absolute host paths). // No-op when `bind_mounts` is empty. - apply_bind_mounts(&plan.bind_mounts); + // + // Gate on a successful `unshare`: if the namespace unshare failed + // (e.g. an AppArmor-restricted unprivileged-userns host, as on + // Ubuntu 24.04 CI runners), we are still in the *host* mount + // namespace. Bind-mounting there would mutate the host and — worse — + // the mounts outlive the child, so the harness tempdir can no longer + // be removed (`rmdir` → EBUSY) and the leak poisons every sibling + // test sharing the temp root. Skipping the mounts degrades an + // interpreter harness to a self-contained cold-start failure instead. + if matches!(outcome.unshare, PrimitiveStatus::Applied) { + apply_bind_mounts(&plan.bind_mounts); + } outcome.chroot = if ablation.no_chroot { PrimitiveStatus::Skipped } else { @@ -664,14 +675,27 @@ fn build_plan(opts: &SandboxOptions, workdir: &Path) -> PreExecPlan { // chroot via ablation drops the bind-mounts too — leaving them on // would mount over the host directly inside the unshared mount // namespace, which is not what the ablation harness wants. - let bind_mounts = if opts.bind_mount_host_libs - && matches!(profile, ProcessHardeningProfileTag::Strict) - && !mask.no_chroot - { - compute_host_lib_bind_mounts(workdir) - } else { - Vec::new() - }; + let mut bind_mounts = Vec::new(); + if matches!(profile, ProcessHardeningProfileTag::Strict) && !mask.no_chroot { + // `/proc` is grafted in unconditionally under Strict+chroot: + // `chroot(workdir)` strips the host `/proc`, but a harness still + // needs `/proc/self` — the hardening probe reads `/proc/self/status` + // (NoNewPrivs / Seccomp lines), and real interpreters / runtimes + // (Go, the JVM, glibc) read `/proc/self/*` at start-up. A read-only + // bind keeps `/proc/self` per-task-accurate while the chroot still + // blocks the *write* side of `/proc//root`-style escapes (the + // escape suite's `proc_root_passwd` is contained by the blocked + // sentinel write, not by `/proc` being absent). The mount is gated + // on `unshare` success in `run_pre_exec_in_child`, so a host where + // the namespace unshare failed never grafts it into the live host + // mount namespace. + if let Some(proc_mount) = compute_proc_bind_mount(workdir) { + bind_mounts.push(proc_mount); + } + if opts.bind_mount_host_libs { + bind_mounts.extend(compute_host_lib_bind_mounts(workdir)); + } + } PreExecPlan { rlimit_cpu_seconds, @@ -749,6 +773,31 @@ fn compute_host_lib_bind_mounts(workdir: &Path) -> Vec { out } +/// Build the read-only bind-mount that grafts the host `/proc` into the +/// harness workdir at `workdir/proc`, so `/proc/self/*` stays reachable +/// after `chroot(workdir)`. Returns `None` when the dest dir cannot be +/// created (the mount would have no target). A fresh `mount -t proc` +/// would be cleaner but requires the caller to already be *inside* a PID +/// namespace it owns — `unshare(CLONE_NEWPID)` only moves the child's +/// descendants, not the harness itself — so a bind of the existing host +/// procfs is the only option that works from pre_exec without a second +/// fork. `/proc/self` is rendered per-reading-task by the kernel, so the +/// probe still observes its own NoNewPrivs / Seccomp state correctly. +fn compute_proc_bind_mount(workdir: &Path) -> Option { + if !Path::new("/proc").exists() { + return None; + } + let dest = workdir.join("proc"); + if std::fs::create_dir_all(&dest).is_err() { + return None; + } + let dest_canonical = std::fs::canonicalize(&dest).unwrap_or(dest); + Some(BindMount { + source_nul: nul_terminate(b"/proc"), + dest_nul: nul_terminate(dest_canonical.to_string_lossy().as_bytes()), + }) +} + fn nul_terminate(bytes: &[u8]) -> Vec { let mut v = Vec::with_capacity(bytes.len() + 1); v.extend_from_slice(bytes); @@ -766,6 +815,75 @@ fn canonicalize_workdir(workdir: &Path) -> Vec { bytes } +// ── Chroot-relative command rewriting ──────────────────────────────────────── + +/// True when [`install_pre_exec`]'s child will `chroot(2)` for these +/// options: the Strict profile with the chroot primitive not ablated. +/// +/// `run_process` consults this to decide whether the harness command's +/// paths need rerooting (see [`reroot_under_chroot`]): after +/// `chroot(workdir)` the workdir *becomes* the filesystem root, so any +/// command token that is an absolute path under the workdir would +/// otherwise resolve against `//…` and fail with ENOENT +/// at execve — before the harness prints a single line. +pub fn chroot_will_apply(opts: &SandboxOptions) -> bool { + matches!(opts.process_hardening, ProcessHardeningProfile::Strict) + && opts.ablation.map_or(true, |m| !m.no_chroot) +} + +/// Reroot an absolute path that lives under `workdir` to a *cwd-relative* +/// form (`./`). +/// +/// `run_process` sets `Command::current_dir(workdir)`, so the child's cwd +/// is the workdir before pre_exec runs. [`apply_chroot`] only calls +/// `chdir("/")` *after a successful* `chroot(workdir)`; on a host where +/// `chroot(2)` fails (unprivileged, no `CAP_SYS_CHROOT`, AppArmor-locked +/// userns) it leaves the cwd at the workdir. Either way the cwd points at +/// the workdir's contents, so a cwd-relative `./nyx_harness` resolves to +/// the staged binary whether the chroot landed or not — an *absolute* +/// `/nyx_harness` would only work in the chroot-succeeded case and would +/// ENOENT (harness fails to boot) on every locked-down host. The leading +/// `./` is required so `std`'s exec treats the token as a path rather than +/// a `PATH` search. +/// +/// Paths that do not live under the workdir (the bind-mounted +/// `/usr/bin/python3` interpreter, system tools) are returned unchanged. +/// Matching is attempted against the raw workdir first, then its canonical +/// form, then the canonical form of `path` itself — so a symlinked workdir +/// (or a symlinked path component) still rewrites correctly. +pub fn reroot_under_chroot(path: &Path, canon_workdir: &Path, raw_workdir: &Path) -> PathBuf { + if !path.is_absolute() { + return path.to_path_buf(); + } + for base in [raw_workdir, canon_workdir] { + if let Ok(rel) = path.strip_prefix(base) { + return Path::new(".").join(rel); + } + } + if let Ok(canon) = std::fs::canonicalize(path) { + if let Ok(rel) = canon.strip_prefix(canon_workdir) { + return Path::new(".").join(rel); + } + } + path.to_path_buf() +} + +/// Apply [`reroot_under_chroot`] to a single command-line argument when +/// it is an absolute path under the workdir; non-path and outside-workdir +/// arguments pass through verbatim. +pub fn reroot_arg_under_chroot(arg: &str, canon_workdir: &Path, raw_workdir: &Path) -> String { + let p = Path::new(arg); + if !p.is_absolute() { + return arg.to_owned(); + } + let rerooted = reroot_under_chroot(p, canon_workdir, raw_workdir); + if rerooted.as_path() == p { + arg.to_owned() + } else { + rerooted.to_string_lossy().into_owned() + } +} + // ── Tests ──────────────────────────────────────────────────────────────────── #[cfg(test)] @@ -873,16 +991,32 @@ mod tests { } #[test] - fn build_plan_without_bind_mount_flag_yields_empty_list() { + fn build_plan_strict_grafts_proc_without_lib_flag() { + // Even with `bind_mount_host_libs=false`, Strict+chroot grafts + // `/proc` (the harness needs `/proc/self` after chroot) but no + // host-lib mounts. On a build host without `/proc` (macOS dev + // box) the graft is a no-op and the list stays empty. + let workdir = tempfile::TempDir::new().expect("tempdir"); let opts = SandboxOptions { process_hardening: ProcessHardeningProfile::Strict, ..SandboxOptions::default() }; - let plan = build_plan(&opts, std::path::Path::new("/tmp")); - assert!( - plan.bind_mounts.is_empty(), - "bind_mounts should stay empty when bind_mount_host_libs=false", - ); + let plan = build_plan(&opts, workdir.path()); + if std::path::Path::new("/proc").exists() { + assert!( + plan.bind_mounts.iter().any(|m| m.source_nul == b"/proc\0"), + "Strict+chroot must graft /proc so the harness can read /proc/self", + ); + assert!( + !plan + .bind_mounts + .iter() + .any(|m| { m.source_nul == b"/lib\0" || m.source_nul == b"/usr/lib\0" }), + "no host-lib mounts should appear without bind_mount_host_libs", + ); + } else { + assert!(plan.bind_mounts.is_empty()); + } } #[test] diff --git a/src/dynamic/sandbox/seccomp/seccomp_policy.toml b/src/dynamic/sandbox/seccomp/seccomp_policy.toml index 74cdf2ef..13bb9515 100644 --- a/src/dynamic/sandbox/seccomp/seccomp_policy.toml +++ b/src/dynamic/sandbox/seccomp/seccomp_policy.toml @@ -86,6 +86,8 @@ allow = [ "kill", "openat", "open", + "execve", + "execveat", "access", "faccessat", "faccessat2",