fixed dynamic sandbox hardening to graft /proc

This commit is contained in:
elipeter 2026-06-04 13:53:29 -05:00
parent 18baf71bcc
commit 03b698ddc1
5 changed files with 277 additions and 32 deletions

View file

@ -739,8 +739,9 @@ mod tests {
let parser = dir.join(format!("{WORKER_CLASS}$Parser.class"));
let request = dir.join(format!("{WORKER_CLASS}$Request.class"));
let manifest = dir.join(WORKER_MANIFEST);
let manifest_body =
format!("{WORKER_CLASS}.class\n{WORKER_CLASS}$Parser.class\n{WORKER_CLASS}$Request.class");
let manifest_body = format!(
"{WORKER_CLASS}.class\n{WORKER_CLASS}$Parser.class\n{WORKER_CLASS}$Request.class"
);
// Nothing on disk yet.
assert!(!worker_class_ready(dir));
@ -787,7 +788,11 @@ mod tests {
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path();
std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap();
std::fs::write(dir.join(format!("{WORKER_CLASS}.class")), b"\xca\xfe\xba\xbe").unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
// Manifest names only the top-level class -- exactly what poisoned
// the persisted bootstrap cache.
std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap();
@ -797,13 +802,57 @@ mod tests {
);
// Drop in the nested classes the worker actually loads -> ready.
std::fs::write(dir.join(format!("{WORKER_CLASS}$Parser.class")), b"\xca\xfe\xba\xbe")
.unwrap();
std::fs::write(dir.join(format!("{WORKER_CLASS}$Request.class")), b"\xca\xfe\xba\xbe")
.unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}$Parser.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}$Request.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
assert!(worker_class_ready(dir));
}
#[test]
fn ensure_worker_compiled_heals_partial_cache() {
// End-to-end heal: seed the exact poisoned-cache shape that broke
// Linux (top-level class + a one-line manifest, nested classes
// absent) and confirm `ensure_worker_compiled` recompiles a full,
// loadable class set instead of trusting the stale manifest.
let javac = std::env::var("NYX_JAVAC_BIN").unwrap_or_else(|_| "javac".to_owned());
let have_javac = std::process::Command::new(&javac)
.arg("-version")
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !have_javac {
return; // No JDK on this host: nothing to recompile with.
}
let tmp = tempfile::TempDir::new().unwrap();
let dir = tmp.path();
std::fs::write(dir.join(WORKER_FILENAME), WORKER_SOURCE).unwrap();
std::fs::write(
dir.join(format!("{WORKER_CLASS}.class")),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(dir.join(WORKER_MANIFEST), format!("{WORKER_CLASS}.class")).unwrap();
assert!(
!worker_class_ready(dir),
"poisoned cache must read not-ready"
);
ensure_worker_compiled(dir).expect("recompile heals the cache");
assert!(worker_class_ready(dir), "healed cache must read ready");
for cls in WORKER_CLASS_FILES {
let meta = std::fs::metadata(dir.join(cls)).expect("class published");
assert!(meta.len() > 0, "{cls} must be a real (non-empty) class");
}
}
#[test]
fn worker_class_files_match_javac_output() {
// Guards `WORKER_CLASS_FILES` against drift: compile the embedded
@ -859,8 +908,16 @@ mod tests {
// Simulate javac output: top-level + nested classes plus a
// non-class artifact that must be ignored.
std::fs::write(staging.join("NyxJavacWorker.class"), b"\xca\xfe\xba\xbe").unwrap();
std::fs::write(staging.join("NyxJavacWorker$Parser.class"), b"\xca\xfe\xba\xbe").unwrap();
std::fs::write(staging.join("NyxJavacWorker$Request.class"), b"\xca\xfe\xba\xbe").unwrap();
std::fs::write(
staging.join("NyxJavacWorker$Parser.class"),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(
staging.join("NyxJavacWorker$Request.class"),
b"\xca\xfe\xba\xbe",
)
.unwrap();
std::fs::write(staging.join("notes.txt"), b"ignore me").unwrap();
publish_class_set(&staging, dir).expect("publish");

View file

@ -2729,15 +2729,41 @@ const TS_ENTRY_LOADER_JS: &str = r#"function nyxEsmToCjs(src) {
return src + suffix;
}
// Best-effort TypeScript type erasure for Node runtimes that lack
// `module.stripTypeScriptTypes` (added in Node 22.6; CI runs node-20).
// Applied only as a fallback, only *after* the ESM->CJS rewrite (so an
// `import * as x` line is already gone and cannot be mistaken for an
// `as` cast), and behind the same try/catch as native loading: if a
// regex over-strips into invalid JS, `_compile` throws and the caller
// drops to the synthetic direct-sink path — never worse than today's
// always-fails-on-node-20 behaviour.
function nyxStripTsTypes(src) {
return src
// `interface X { ... }` declarations.
.replace(/^\s*(export\s+)?interface\s+[A-Za-z_$][\w$]*\s*(<[^>]*>)?\s*\{[\s\S]*?\n\}/gm, '')
// top-level `type X = ...;` aliases.
.replace(/^\s*(export\s+)?type\s+[A-Za-z_$][\w$]*\s*(<[^>]*>)?\s*=[^\n;]*;?\s*$/gm, '')
// `x as Type` / `x as const` casts.
.replace(/\bas\s+(const\b|[A-Za-z_$][\w$.\[\]<>| ]*)/g, '')
// typed variable declarations: `const x: T =` -> `const x =`.
.replace(/\b(const|let|var)\s+([A-Za-z_$][\w$]*)\s*:\s*[A-Za-z_$][\w$.\[\]<>| ]*(?=\s*=)/g, '$1 $2')
// function/arrow return types: `): T {` / `): T =>` -> `) {` / `) =>`.
.replace(/\)\s*:\s*[A-Za-z_$][\w$.\[\]<>| ]*(?=\s*(\{|=>))/g, ')')
// parameter type annotations: `(name: T` / `, name: T` before , or ).
.replace(/([(,]\s*[A-Za-z_$][\w$]*)\s*:\s*[A-Za-z_$][\w$.\[\]<>| ]*(?=\s*[,)])/g, '$1');
}
function nyxLoadTsEntry(file) {
const fs = require('fs');
const Module = require('module');
const path = require('path');
let src = fs.readFileSync(file, 'utf8');
let stripped = false;
if (typeof Module.stripTypeScriptTypes === 'function') {
try { src = Module.stripTypeScriptTypes(src, { mode: 'transform' }); } catch (e) { /* fall through with raw source */ }
try { src = Module.stripTypeScriptTypes(src, { mode: 'transform' }); stripped = true; } catch (e) { /* fall through with raw source */ }
}
src = nyxEsmToCjs(src);
if (!stripped) { src = nyxStripTsTypes(src); }
const m = new Module(file, module);
m.filename = path.resolve(file);
m.paths = Module._nodeModulePaths(path.dirname(m.filename));

View file

@ -1695,6 +1695,32 @@ fn run_process(
let (effective_cmd_path, effective_cmd_args): (std::path::PathBuf, Vec<String>) =
(resolved_cmd_path.clone(), harness.command[1..].to_vec());
// Phase 17 follow-up: when the Strict profile will `chroot(workdir)` in
// pre_exec, the workdir becomes the filesystem root for the harness, so
// any command token that is an absolute path *under* the workdir
// (`<workdir>/nyx_harness`, the staged probe, an interpreter script)
// resolves against `<workdir>/<workdir>/…` post-chroot and dies with
// ENOENT at execve. Reroot each such token to its chroot-relative
// form (`/nyx_harness`); tokens outside the workdir (the bind-mounted
// `/usr/bin` interpreter, literal flags) pass through untouched.
#[cfg(target_os = "linux")]
let (effective_cmd_path, effective_cmd_args) = if process_linux::chroot_will_apply(opts) {
let canon_workdir =
std::fs::canonicalize(&harness.workdir).unwrap_or_else(|_| harness.workdir.clone());
let path = process_linux::reroot_under_chroot(
&effective_cmd_path,
&canon_workdir,
&harness.workdir,
);
let args = effective_cmd_args
.iter()
.map(|a| process_linux::reroot_arg_under_chroot(a, &canon_workdir, &harness.workdir))
.collect();
(path, args)
} else {
(effective_cmd_path, effective_cmd_args)
};
let mut cmd = Command::new(&effective_cmd_path);
cmd.args(&effective_cmd_args);
cmd.current_dir(&harness.workdir);

View file

@ -295,9 +295,9 @@ unsafe extern "C" {
fn chroot(path: *const i8) -> i32;
fn chdir(path: *const i8) -> i32;
fn mount(
source: *const i8,
target: *const i8,
fstype: *const i8,
source: *const core::ffi::c_char,
target: *const core::ffi::c_char,
fstype: *const core::ffi::c_char,
flags: u64,
data: *const core::ffi::c_void,
) -> i32;
@ -419,9 +419,9 @@ fn apply_bind_mounts(mounts: &[BindMount]) {
// every pointer references a valid C string for the duration of the call.
let r = unsafe {
mount(
m.source_nul.as_ptr() as *const i8,
m.dest_nul.as_ptr() as *const i8,
none.as_ptr() as *const i8,
m.source_nul.as_ptr() as *const core::ffi::c_char,
m.dest_nul.as_ptr() as *const core::ffi::c_char,
none.as_ptr() as *const core::ffi::c_char,
MS_BIND,
std::ptr::null(),
)
@ -434,7 +434,7 @@ fn apply_bind_mounts(mounts: &[BindMount]) {
unsafe {
mount(
std::ptr::null(),
m.dest_nul.as_ptr() as *const i8,
m.dest_nul.as_ptr() as *const core::ffi::c_char,
std::ptr::null(),
MS_REMOUNT | MS_BIND | MS_RDONLY,
std::ptr::null(),
@ -618,7 +618,18 @@ fn run_pre_exec_in_child(plan: &PreExecPlan) -> HardeningOutcome {
// the new mount namespace catches them) and before chroot (so the
// bind sources are still reachable at their absolute host paths).
// No-op when `bind_mounts` is empty.
apply_bind_mounts(&plan.bind_mounts);
//
// Gate on a successful `unshare`: if the namespace unshare failed
// (e.g. an AppArmor-restricted unprivileged-userns host, as on
// Ubuntu 24.04 CI runners), we are still in the *host* mount
// namespace. Bind-mounting there would mutate the host and — worse —
// the mounts outlive the child, so the harness tempdir can no longer
// be removed (`rmdir` → EBUSY) and the leak poisons every sibling
// test sharing the temp root. Skipping the mounts degrades an
// interpreter harness to a self-contained cold-start failure instead.
if matches!(outcome.unshare, PrimitiveStatus::Applied) {
apply_bind_mounts(&plan.bind_mounts);
}
outcome.chroot = if ablation.no_chroot {
PrimitiveStatus::Skipped
} else {
@ -664,14 +675,27 @@ fn build_plan(opts: &SandboxOptions, workdir: &Path) -> PreExecPlan {
// chroot via ablation drops the bind-mounts too — leaving them on
// would mount over the host directly inside the unshared mount
// namespace, which is not what the ablation harness wants.
let bind_mounts = if opts.bind_mount_host_libs
&& matches!(profile, ProcessHardeningProfileTag::Strict)
&& !mask.no_chroot
{
compute_host_lib_bind_mounts(workdir)
} else {
Vec::new()
};
let mut bind_mounts = Vec::new();
if matches!(profile, ProcessHardeningProfileTag::Strict) && !mask.no_chroot {
// `/proc` is grafted in unconditionally under Strict+chroot:
// `chroot(workdir)` strips the host `/proc`, but a harness still
// needs `/proc/self` — the hardening probe reads `/proc/self/status`
// (NoNewPrivs / Seccomp lines), and real interpreters / runtimes
// (Go, the JVM, glibc) read `/proc/self/*` at start-up. A read-only
// bind keeps `/proc/self` per-task-accurate while the chroot still
// blocks the *write* side of `/proc/<pid>/root`-style escapes (the
// escape suite's `proc_root_passwd` is contained by the blocked
// sentinel write, not by `/proc` being absent). The mount is gated
// on `unshare` success in `run_pre_exec_in_child`, so a host where
// the namespace unshare failed never grafts it into the live host
// mount namespace.
if let Some(proc_mount) = compute_proc_bind_mount(workdir) {
bind_mounts.push(proc_mount);
}
if opts.bind_mount_host_libs {
bind_mounts.extend(compute_host_lib_bind_mounts(workdir));
}
}
PreExecPlan {
rlimit_cpu_seconds,
@ -749,6 +773,31 @@ fn compute_host_lib_bind_mounts(workdir: &Path) -> Vec<BindMount> {
out
}
/// Build the read-only bind-mount that grafts the host `/proc` into the
/// harness workdir at `workdir/proc`, so `/proc/self/*` stays reachable
/// after `chroot(workdir)`. Returns `None` when the dest dir cannot be
/// created (the mount would have no target). A fresh `mount -t proc`
/// would be cleaner but requires the caller to already be *inside* a PID
/// namespace it owns — `unshare(CLONE_NEWPID)` only moves the child's
/// descendants, not the harness itself — so a bind of the existing host
/// procfs is the only option that works from pre_exec without a second
/// fork. `/proc/self` is rendered per-reading-task by the kernel, so the
/// probe still observes its own NoNewPrivs / Seccomp state correctly.
fn compute_proc_bind_mount(workdir: &Path) -> Option<BindMount> {
if !Path::new("/proc").exists() {
return None;
}
let dest = workdir.join("proc");
if std::fs::create_dir_all(&dest).is_err() {
return None;
}
let dest_canonical = std::fs::canonicalize(&dest).unwrap_or(dest);
Some(BindMount {
source_nul: nul_terminate(b"/proc"),
dest_nul: nul_terminate(dest_canonical.to_string_lossy().as_bytes()),
})
}
fn nul_terminate(bytes: &[u8]) -> Vec<u8> {
let mut v = Vec::with_capacity(bytes.len() + 1);
v.extend_from_slice(bytes);
@ -766,6 +815,75 @@ fn canonicalize_workdir(workdir: &Path) -> Vec<u8> {
bytes
}
// ── Chroot-relative command rewriting ────────────────────────────────────────
/// True when [`install_pre_exec`]'s child will `chroot(2)` for these
/// options: the Strict profile with the chroot primitive not ablated.
///
/// `run_process` consults this to decide whether the harness command's
/// paths need rerooting (see [`reroot_under_chroot`]): after
/// `chroot(workdir)` the workdir *becomes* the filesystem root, so any
/// command token that is an absolute path under the workdir would
/// otherwise resolve against `<workdir>/<workdir>/…` and fail with ENOENT
/// at execve — before the harness prints a single line.
pub fn chroot_will_apply(opts: &SandboxOptions) -> bool {
matches!(opts.process_hardening, ProcessHardeningProfile::Strict)
&& opts.ablation.map_or(true, |m| !m.no_chroot)
}
/// Reroot an absolute path that lives under `workdir` to a *cwd-relative*
/// form (`./<rel>`).
///
/// `run_process` sets `Command::current_dir(workdir)`, so the child's cwd
/// is the workdir before pre_exec runs. [`apply_chroot`] only calls
/// `chdir("/")` *after a successful* `chroot(workdir)`; on a host where
/// `chroot(2)` fails (unprivileged, no `CAP_SYS_CHROOT`, AppArmor-locked
/// userns) it leaves the cwd at the workdir. Either way the cwd points at
/// the workdir's contents, so a cwd-relative `./nyx_harness` resolves to
/// the staged binary whether the chroot landed or not — an *absolute*
/// `/nyx_harness` would only work in the chroot-succeeded case and would
/// ENOENT (harness fails to boot) on every locked-down host. The leading
/// `./` is required so `std`'s exec treats the token as a path rather than
/// a `PATH` search.
///
/// Paths that do not live under the workdir (the bind-mounted
/// `/usr/bin/python3` interpreter, system tools) are returned unchanged.
/// Matching is attempted against the raw workdir first, then its canonical
/// form, then the canonical form of `path` itself — so a symlinked workdir
/// (or a symlinked path component) still rewrites correctly.
pub fn reroot_under_chroot(path: &Path, canon_workdir: &Path, raw_workdir: &Path) -> PathBuf {
if !path.is_absolute() {
return path.to_path_buf();
}
for base in [raw_workdir, canon_workdir] {
if let Ok(rel) = path.strip_prefix(base) {
return Path::new(".").join(rel);
}
}
if let Ok(canon) = std::fs::canonicalize(path) {
if let Ok(rel) = canon.strip_prefix(canon_workdir) {
return Path::new(".").join(rel);
}
}
path.to_path_buf()
}
/// Apply [`reroot_under_chroot`] to a single command-line argument when
/// it is an absolute path under the workdir; non-path and outside-workdir
/// arguments pass through verbatim.
pub fn reroot_arg_under_chroot(arg: &str, canon_workdir: &Path, raw_workdir: &Path) -> String {
let p = Path::new(arg);
if !p.is_absolute() {
return arg.to_owned();
}
let rerooted = reroot_under_chroot(p, canon_workdir, raw_workdir);
if rerooted.as_path() == p {
arg.to_owned()
} else {
rerooted.to_string_lossy().into_owned()
}
}
// ── Tests ────────────────────────────────────────────────────────────────────
#[cfg(test)]
@ -873,16 +991,32 @@ mod tests {
}
#[test]
fn build_plan_without_bind_mount_flag_yields_empty_list() {
fn build_plan_strict_grafts_proc_without_lib_flag() {
// Even with `bind_mount_host_libs=false`, Strict+chroot grafts
// `/proc` (the harness needs `/proc/self` after chroot) but no
// host-lib mounts. On a build host without `/proc` (macOS dev
// box) the graft is a no-op and the list stays empty.
let workdir = tempfile::TempDir::new().expect("tempdir");
let opts = SandboxOptions {
process_hardening: ProcessHardeningProfile::Strict,
..SandboxOptions::default()
};
let plan = build_plan(&opts, std::path::Path::new("/tmp"));
assert!(
plan.bind_mounts.is_empty(),
"bind_mounts should stay empty when bind_mount_host_libs=false",
);
let plan = build_plan(&opts, workdir.path());
if std::path::Path::new("/proc").exists() {
assert!(
plan.bind_mounts.iter().any(|m| m.source_nul == b"/proc\0"),
"Strict+chroot must graft /proc so the harness can read /proc/self",
);
assert!(
!plan
.bind_mounts
.iter()
.any(|m| { m.source_nul == b"/lib\0" || m.source_nul == b"/usr/lib\0" }),
"no host-lib mounts should appear without bind_mount_host_libs",
);
} else {
assert!(plan.bind_mounts.is_empty());
}
}
#[test]

View file

@ -86,6 +86,8 @@ allow = [
"kill",
"openat",
"open",
"execve",
"execveat",
"access",
"faccessat",
"faccessat2",