From cadb3e4449098c3428974074648df3ca5e6d90f5 Mon Sep 17 00:00:00 2001 From: pitboss Date: Sun, 17 May 2026 02:52:16 -0500 Subject: [PATCH] [pitboss/grind] deferred session-0008 (20260517T044708Z-e058) --- src/dynamic/sandbox/mod.rs | 12 ++ src/dynamic/sandbox/process_linux.rs | 219 ++++++++++++++++++++++++++- tests/determinism_audit.rs | 208 +++++++++++++++++++++++++ 3 files changed, 434 insertions(+), 5 deletions(-) diff --git a/src/dynamic/sandbox/mod.rs b/src/dynamic/sandbox/mod.rs index 0af58e90..e0f07f80 100644 --- a/src/dynamic/sandbox/mod.rs +++ b/src/dynamic/sandbox/mod.rs @@ -232,6 +232,17 @@ pub struct SandboxOptions { /// process backend. See [`ProcessHardeningProfile`] for the per- /// variant primitive matrix. pub process_hardening: ProcessHardeningProfile, + /// Phase 17 follow-up: when true and the active profile is + /// [`ProcessHardeningProfile::Strict`], the Linux process backend + /// bind-mounts the host's `/lib`, `/lib64`, `/usr/lib`, and `/usr/bin` + /// read-only into the harness workdir before `chroot(2)` so dynamic + /// loaders (python3, node, java) can resolve shared libraries from + /// inside the chroot. No-op on macOS — the `sandbox-exec` wrap + /// handles this via its allow-list grammar. Default `false` so + /// statically-linked C/Go harnesses (Phase 17 fixture path) keep + /// today's behaviour; opt-in callers (interpreted-language harness + /// builders) set the field when an interpreter is on the run path. + pub bind_mount_host_libs: bool, /// Phase 30 (Track C observability): optional [`VerifyTrace`] handle /// the runner appends pipeline stages to (`build_started`, /// `build_done`, `sandbox_started`, `oracle_wait`, `oracle_observed`). @@ -292,6 +303,7 @@ impl Default for SandboxOptions { stub_harness: None, seccomp_caps: 0, process_hardening: ProcessHardeningProfile::Standard, + bind_mount_host_libs: false, trace: None, } } diff --git a/src/dynamic/sandbox/process_linux.rs b/src/dynamic/sandbox/process_linux.rs index 75eadb43..509fd4c9 100644 --- a/src/dynamic/sandbox/process_linux.rs +++ b/src/dynamic/sandbox/process_linux.rs @@ -254,6 +254,13 @@ const CLONE_NEWNS: i32 = 0x0002_0000; const CLONE_NEWUSER: i32 = 0x1000_0000; const CLONE_NEWPID: i32 = 0x2000_0000; +// `mount(2)` flag bits used by the bind-mount path. Constants match +// `` on glibc / musl; kept inline so pre_exec does not need +// a libc-bindings crate. +const MS_RDONLY: u64 = 0x0000_0001; +const MS_REMOUNT: u64 = 0x0000_0020; +const MS_BIND: u64 = 0x0000_1000; + #[repr(C)] struct Rlimit { cur: u64, @@ -266,6 +273,13 @@ unsafe extern "C" { fn unshare(flags: i32) -> i32; fn chroot(path: *const i8) -> i32; fn chdir(path: *const i8) -> i32; + fn mount( + source: *const i8, + target: *const i8, + fstype: *const i8, + flags: u64, + data: *const i8, + ) -> i32; fn write(fd: i32, buf: *const u8, count: usize) -> isize; fn __errno_location() -> *mut i32; } @@ -322,6 +336,54 @@ fn apply_chroot(workdir: &[u8]) -> PrimitiveStatus { PrimitiveStatus::Applied } +/// One read-only bind-mount the child applies after `unshare(CLONE_NEWNS)` +/// and before `chroot(2)`. Both fields are NUL-terminated by +/// [`canonicalize_bind_mount`] so the pre_exec callback can hand the +/// bytes straight to `mount(2)` without allocating. +#[derive(Clone, Debug)] +struct BindMount { + source_nul: Vec, + dest_nul: Vec, +} + +/// Apply each bind-mount in `mounts`: first `mount(... MS_BIND ...)` to +/// graft the host path into the workdir, then a second `mount(... MS_REMOUNT +/// | MS_BIND | MS_RDONLY ...)` to flip the new mount read-only. Both +/// calls are best-effort — a failure surfaces only via the post-chroot +/// behaviour (the interpreter cannot resolve its `ld.so`) rather than +/// the [`HardeningOutcome`] wire record, so callers that care about the +/// bind-mount succeeding gate on whether the harness produced output. +/// +/// Called in pre_exec between [`apply_unshare`] and [`apply_chroot`] so +/// the new mount namespace is private to the child + grandchildren and +/// the workdir is still reachable at its host-side absolute path. +fn apply_bind_mounts(mounts: &[BindMount]) { + let none = b"none\0"; + for m in mounts { + let r = unsafe { + mount( + m.source_nul.as_ptr() as *const i8, + m.dest_nul.as_ptr() as *const i8, + none.as_ptr() as *const i8, + MS_BIND, + std::ptr::null(), + ) + }; + if r != 0 { + continue; + } + unsafe { + mount( + std::ptr::null(), + m.dest_nul.as_ptr() as *const i8, + std::ptr::null(), + MS_REMOUNT | MS_BIND | MS_RDONLY, + std::ptr::null(), + ) + }; + } +} + /// Install a pre-compiled seccomp BPF filter on the calling thread. /// /// `program` is a heap-allocated BPF instruction array compiled in the @@ -347,6 +409,11 @@ struct PreExecPlan { /// allocator. seccomp_program: Arc>, profile: ProcessHardeningProfileTag, + /// Read-only bind-mounts the child applies after `unshare(CLONE_NEWNS)` + /// and before `chroot(2)`. Empty when + /// [`SandboxOptions::bind_mount_host_libs`] is false or the active + /// profile is `Standard` (no namespace to bind into). + bind_mounts: Vec, } /// Returned by [`install_pre_exec`]. The caller MUST invoke either @@ -465,9 +532,14 @@ fn run_pre_exec_in_child(plan: &PreExecPlan) -> HardeningOutcome { outcome.rlimit_cpu = apply_rlimit(RLIMIT_CPU, plan.rlimit_cpu_seconds); outcome.rlimit_nofile = apply_rlimit(RLIMIT_NOFILE, plan.rlimit_nofile); outcome.unshare = apply_unshare(); + // Bind-mount host library paths into the workdir after unshare (so + // the new mount namespace catches them) and before chroot (so the + // bind sources are still reachable at their absolute host paths). + // No-op when `bind_mounts` is empty. + apply_bind_mounts(&plan.bind_mounts); outcome.chroot = apply_chroot(&plan.workdir_nul); // seccomp is applied last so the filter does not block any of the - // earlier syscalls (setrlimit, prctl, unshare, chroot, chdir). + // earlier syscalls (setrlimit, prctl, unshare, chroot, chdir, mount). outcome.seccomp = apply_seccomp(plan.seccomp_program.as_slice()); outcome @@ -489,19 +561,84 @@ fn build_plan(opts: &SandboxOptions, workdir: &Path) -> PreExecPlan { let nrs = seccomp::allowed_syscall_numbers(opts.seccomp_caps); let program = seccomp::bpf::compile(&nrs, seccomp::syscalls::AUDIT_ARCH); + let profile = match opts.process_hardening { + ProcessHardeningProfile::Standard => ProcessHardeningProfileTag::Standard, + ProcessHardeningProfile::Strict => ProcessHardeningProfileTag::Strict, + }; + + // Bind-mounts are only useful when the child will chroot, i.e. under + // the Strict profile. Computing them under Standard would create + // empty dest dirs in the workdir for no reason. + let bind_mounts = if opts.bind_mount_host_libs + && matches!(profile, ProcessHardeningProfileTag::Strict) + { + compute_host_lib_bind_mounts(workdir) + } else { + Vec::new() + }; + PreExecPlan { rlimit_cpu_seconds, rlimit_nofile: 256, rlimit_as_bytes, workdir_nul, seccomp_program: Arc::new(program), - profile: match opts.process_hardening { - ProcessHardeningProfile::Standard => ProcessHardeningProfileTag::Standard, - ProcessHardeningProfile::Strict => ProcessHardeningProfileTag::Strict, - }, + profile, + bind_mounts, } } +/// Build the bind-mount list for the dynamic-loader paths an interpreted +/// harness needs to find shared libraries from inside the chroot. Each +/// entry is `(host_source, workdir_dest)` where `host_source` is a real +/// host path that exists and `workdir_dest` is a freshly-created mount +/// point inside the harness workdir. +/// +/// Skips any candidate whose host source does not exist (e.g. `/lib64` +/// on a multi-arch Debian box that puts everything under `/lib/x86_64-linux-gnu`). +/// Also skips any candidate whose dest directory creation fails — the +/// mount would not have a target to attach to anyway. +fn compute_host_lib_bind_mounts(workdir: &Path) -> Vec { + // The candidate set covers the dynamic-loader resolution path on + // every mainstream glibc distro: + // * /lib — ld-linux.so on multilib-i386 systems, and the + // traditional location on musl-based distros. + // * /lib64 — ld-linux-x86-64.so.2 on glibc x86_64 systems. + // * /usr/lib — the bulk of shared libraries on modern distros + // after the `/usr` merge. + // * /usr/bin — interpreter binaries (python3, node, java) + // resolved via PATH=/usr/bin after chroot. + const CANDIDATES: &[(&str, &str)] = &[ + ("/lib", "lib"), + ("/lib64", "lib64"), + ("/usr/lib", "usr/lib"), + ("/usr/bin", "usr/bin"), + ]; + let mut out = Vec::with_capacity(CANDIDATES.len()); + for (host, rel) in CANDIDATES { + if !Path::new(host).exists() { + continue; + } + let dest = workdir.join(rel); + if std::fs::create_dir_all(&dest).is_err() { + continue; + } + let dest_canonical = std::fs::canonicalize(&dest).unwrap_or(dest); + out.push(BindMount { + source_nul: nul_terminate(host.as_bytes()), + dest_nul: nul_terminate(dest_canonical.to_string_lossy().as_bytes()), + }); + } + out +} + +fn nul_terminate(bytes: &[u8]) -> Vec { + let mut v = Vec::with_capacity(bytes.len() + 1); + v.extend_from_slice(bytes); + v.push(0); + v +} + fn canonicalize_workdir(workdir: &Path) -> Vec { let canonical: PathBuf = std::fs::canonicalize(workdir).unwrap_or_else(|_| workdir.to_path_buf()); let mut bytes = canonical.into_os_string().into_encoded_bytes(); @@ -607,4 +744,76 @@ mod tests { assert!(decode_outcome(&[0_u8; OUTCOME_LEN - 1]).is_none()); } + #[test] + fn build_plan_without_bind_mount_flag_yields_empty_list() { + let opts = SandboxOptions { + process_hardening: ProcessHardeningProfile::Strict, + ..SandboxOptions::default() + }; + let plan = build_plan(&opts, std::path::Path::new("/tmp")); + assert!( + plan.bind_mounts.is_empty(), + "bind_mounts should stay empty when bind_mount_host_libs=false", + ); + } + + #[test] + fn build_plan_standard_profile_skips_bind_mounts_even_when_flag_set() { + // Standard profile does not chroot, so bind-mounting host libs + // would just create dead dirs in the workdir for no reason. + let opts = SandboxOptions { + bind_mount_host_libs: true, + process_hardening: ProcessHardeningProfile::Standard, + ..SandboxOptions::default() + }; + let plan = build_plan(&opts, std::path::Path::new("/tmp")); + assert!(plan.bind_mounts.is_empty()); + } + + #[test] + fn build_plan_strict_with_bind_mount_flag_pre_creates_dest_dirs() { + // /usr/lib exists on every mainstream Linux distro, so at least + // one bind-mount entry should land. The dest must be a real + // directory by the time build_plan returns — pre_exec cannot + // mkdir during the no-allocate window. + let workdir = tempfile::TempDir::new().expect("tempdir"); + let opts = SandboxOptions { + bind_mount_host_libs: true, + process_hardening: ProcessHardeningProfile::Strict, + ..SandboxOptions::default() + }; + let plan = build_plan(&opts, workdir.path()); + + // Every entry's source must be NUL-terminated for the `mount(2)` + // call, and every dest must exist on disk. + for m in &plan.bind_mounts { + assert!(m.source_nul.ends_with(&[0]), "source path must be NUL-terminated"); + assert!(m.dest_nul.ends_with(&[0]), "dest path must be NUL-terminated"); + let dest_str = std::str::from_utf8(&m.dest_nul[..m.dest_nul.len() - 1]) + .expect("dest path must be valid UTF-8"); + assert!( + std::path::Path::new(dest_str).is_dir(), + "dest dir must be pre-created by build_plan: {dest_str}", + ); + } + // The candidate set has four entries; on a working Linux host at + // least `/usr/lib` and `/usr/bin` exist, so we expect ≥ 2 entries. + // We do not assert the exact count to stay portable across multi- + // arch (`/lib64`-less) and musl distros. + assert!( + plan.bind_mounts.len() >= 2, + "expected ≥ 2 bind-mount entries on a Linux host; got {}", + plan.bind_mounts.len(), + ); + } + + #[test] + fn nul_terminate_appends_zero_byte_once() { + assert_eq!(nul_terminate(b""), b"\0"); + assert_eq!(nul_terminate(b"/lib"), b"/lib\0"); + // Idempotency property does NOT hold — caller must not double-terminate. + let twice = nul_terminate(b"/lib\0"); + assert_eq!(twice, b"/lib\0\0"); + } + } diff --git a/tests/determinism_audit.rs b/tests/determinism_audit.rs index c86c8666..f0740ae6 100644 --- a/tests/determinism_audit.rs +++ b/tests/determinism_audit.rs @@ -140,6 +140,214 @@ fn ten_runs_produce_byte_identical_telemetry_minus_timestamps() { } } +/// Recursively strip volatile fields from a `serde_json::Value` tree. +/// The Confirmed-path `VerifyResult` carries timing fields buried under +/// `differential.vuln_probes[].captured_at_ns` etc., so a flat top-level +/// `obj.remove(...)` is not enough. +/// +/// Field denylist: +/// - `captured_at_ns` — wall-clock probe capture timestamp. +/// - `ts` / `duration_ms` — telemetry-side timing fields stripped by +/// [`strip_volatile_fields`] but worth re-stripping here too in case +/// a future code path lands them on `VerifyResult` directly. +/// - `repro_bundle` / `bundle_dir` — `NYX_REPRO_BASE` is fed an +/// in-test-tempdir whose path is stable across the loop, but the +/// hashed sub-directory name folds in any per-run randomness; strip +/// defensively. +#[cfg(target_os = "macos")] +fn strip_volatile_recursive(value: &mut Value) { + const VOLATILE_KEYS: &[&str] = &[ + "captured_at_ns", + "ts", + "duration_ms", + "repro_bundle", + "bundle_dir", + ]; + match value { + Value::Object(map) => { + for key in VOLATILE_KEYS { + map.remove(*key); + } + for (_, v) in map.iter_mut() { + strip_volatile_recursive(v); + } + } + Value::Array(arr) => { + for v in arr.iter_mut() { + strip_volatile_recursive(v); + } + } + _ => {} + } +} + +/// Confirmed-path determinism: drive the verifier through a real +/// payload run (macOS process backend + sandbox-exec wrap + python3 +/// harness) `RUN_COUNT_CONFIRMED` times and assert byte-identical +/// `VerifyResult` once volatile timing fields are stripped. +/// +/// Mirrors [`ten_runs_produce_byte_identical_telemetry_minus_timestamps`] +/// (the deny-path determinism contract) but exercises the build → +/// sandbox → probe pipeline instead of the policy-deny short-circuit. +/// Closes the determinism audit's "complete coverage needs an end-to-end +/// Confirmed run" gap. +/// +/// macOS-only: the Linux process backend needs `cc -static` + libc.a to +/// drive the C fixture through chroot, and `cc -static` is unsupported +/// by the Darwin clang shipped with Xcode. The Linux row's analogue +/// lands when the Phase 17 follow-up's `bind_mount_host_libs` opt-in +/// wiring (see `deferred.md`) lets the python harness survive chroot. +/// +/// `RUN_COUNT_CONFIRMED = 3` keeps the test cost bounded (~6s per run +/// on a warm cache → ~20s total) while still gating against single-run +/// hash collisions that would flake at N=2. Bumping to N=10 (matching +/// the deny-path test) is a wall-clock decision, not a coverage one. +#[cfg(all(feature = "dynamic", target_os = "macos"))] +#[test] +fn confirmed_run_is_byte_identical_across_runs() { + use nyx_scanner::evidence::{FlowStep, FlowStepKind}; + use nyx_scanner::labels::Cap; + use nyx_scanner::utils::config::Config; + use std::path::PathBuf; + + const RUN_COUNT_CONFIRMED: usize = 3; + + // Pre-flight skips: the macOS process backend needs the sandbox-exec + // wrap binary + a working python3 to drive the cmdi_positive fixture. + if !std::path::Path::new("/usr/bin/sandbox-exec").exists() { + eprintln!("SKIP: /usr/bin/sandbox-exec missing — cannot exercise process-backend wrap"); + return; + } + if !std::process::Command::new("/usr/bin/python3") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) + { + eprintln!("SKIP: /usr/bin/python3 missing — cannot run python harness"); + return; + } + + let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/dynamic_fixtures/python/cmdi_positive.py"); + + let tmp = tempfile::TempDir::new().expect("create tempdir"); + let dst = tmp.path().join("cmdi_positive.py"); + std::fs::copy(&fixture_src, &dst).expect("stage fixture into tempdir"); + + // Pin the repro bundle + telemetry log to in-test tempdir paths so + // every run reads + writes the same absolute paths (the per-run path + // would otherwise leak into VerifyResult and break determinism). + unsafe { + std::env::set_var( + "NYX_REPRO_BASE", + tmp.path().join("repro").to_str().unwrap(), + ); + std::env::set_var( + "NYX_TELEMETRY_PATH", + tmp.path().join("events.jsonl").to_str().unwrap(), + ); + std::env::remove_var("NYX_NO_TELEMETRY"); + } + + let path_str = dst.to_string_lossy().into_owned(); + let evidence = Evidence { + flow_steps: vec![ + FlowStep { + step: 1, + kind: FlowStepKind::Source, + file: path_str.clone(), + line: 1, + col: 0, + snippet: None, + variable: Some("host".into()), + callee: None, + function: Some("run_ping".into()), + is_cross_file: false, + }, + FlowStep { + step: 2, + kind: FlowStepKind::Sink, + file: path_str.clone(), + line: 13, + col: 4, + snippet: None, + variable: None, + callee: None, + function: None, + is_cross_file: false, + }, + ], + sink_caps: Cap::CODE_EXEC.bits(), + ..Default::default() + }; + let diag = Diag { + path: path_str, + line: 13, + col: 0, + severity: Severity::High, + id: "taint-unsanitised-flow".into(), + category: FindingCategory::Security, + path_validated: false, + guard_kind: None, + message: None, + labels: vec![], + confidence: Some(Confidence::High), + evidence: Some(evidence), + rank_score: None, + rank_reason: None, + suppressed: false, + suppression: None, + rollup: None, + finding_id: String::new(), + alternative_finding_ids: vec![], + stable_hash: 0xdec0_de00_dec0_de00, + }; + + let mut config = Config::default(); + config.scanner.harden_profile = "strict".to_owned(); + // Force the process backend: Auto would route python to docker on + // CI hosts where docker is reachable, and docker ignores the + // hardening profile. Pinning to `process` exercises the sandbox- + // exec wrap on every run, which is the surface the determinism + // contract covers. + config.scanner.verify_backend = "process".to_owned(); + let mut opts = VerifyOptions::from_config(&config); + opts.telemetry_policy = SamplingPolicy::keep_all(); + opts.trace_verbose = false; + + let mut stripped: BTreeSet = BTreeSet::new(); + for i in 0..RUN_COUNT_CONFIRMED { + let result = verify_finding(&diag, &opts); + assert_eq!( + result.status, + VerifyStatus::Confirmed, + "run {i}: cmdi_positive.py under --harden=strict must Confirm — got {:?} (detail={:?})", + result.status, + result.detail, + ); + let mut json: Value = + serde_json::from_str(&serde_json::to_string(&result).expect("VerifyResult serialises")) + .expect("re-parse"); + strip_volatile_recursive(&mut json); + stripped.insert(json.to_string()); + } + + assert_eq!( + stripped.len(), + 1, + "VerifyResult must be byte-identical across {RUN_COUNT_CONFIRMED} runs once volatile \ + timing fields are stripped; got {} distinct values: {:?}", + stripped.len(), + stripped, + ); + + unsafe { + std::env::remove_var("NYX_REPRO_BASE"); + std::env::remove_var("NYX_TELEMETRY_PATH"); + } +} + #[test] fn policy_deny_excerpt_is_stable_across_runs() { // The PolicyDeniedDynamic verdict carries an excerpt scrubbed via