mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
[pitboss/grind] deferred session-0013 (20260517T044708Z-e058)
This commit is contained in:
parent
704f437cce
commit
36de3afef5
3 changed files with 463 additions and 11 deletions
|
|
@ -243,6 +243,18 @@ pub struct SandboxOptions {
|
|||
/// today's behaviour; opt-in callers (interpreted-language harness
|
||||
/// builders) set the field when an interpreter is on the run path.
|
||||
pub bind_mount_host_libs: bool,
|
||||
/// Phase 20 follow-up (Track E.4 ablation harness): when `Some`, the
|
||||
/// Linux process backend skips or extends individual hardening
|
||||
/// primitives so the escape-fixture matrix can verify "removing any
|
||||
/// one primitive flips at least one fixture red". Always `None` in
|
||||
/// production — the field is marked `#[doc(hidden)]` so it does not
|
||||
/// surface in the public API but is reachable from integration tests
|
||||
/// in sibling crates (`tests/sandbox_escape_suite.rs`,
|
||||
/// `tests/sandbox_hardening_linux.rs`). Ignored on macOS and by
|
||||
/// every non-process backend. See [`AblationMask`] for the per-
|
||||
/// primitive toggles.
|
||||
#[doc(hidden)]
|
||||
pub ablation: Option<AblationMask>,
|
||||
/// Phase 30 (Track C observability): optional [`VerifyTrace`] handle
|
||||
/// the runner appends pipeline stages to (`build_started`,
|
||||
/// `build_done`, `sandbox_started`, `oracle_wait`, `oracle_observed`).
|
||||
|
|
@ -276,6 +288,59 @@ impl Default for ProcessHardeningProfile {
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 20 follow-up (Track E.4 ablation harness): selectively skip or
|
||||
/// loosen individual Strict-profile primitives so the escape-fixture
|
||||
/// matrix can prove the acceptance literal "removing any one Phase 17
|
||||
/// hardening primitive flips at least one escape fixture to red".
|
||||
///
|
||||
/// Each boolean field defaults to `false` (no ablation). The Linux
|
||||
/// process backend honours every field that targets a Linux-only
|
||||
/// primitive; macOS / docker / firecracker backends ignore the mask
|
||||
/// entirely because their hardening surface is different.
|
||||
///
|
||||
/// Hidden from the public API via `#[doc(hidden)]` on
|
||||
/// [`SandboxOptions::ablation`] — the production verifier never
|
||||
/// constructs an `AblationMask`. Integration tests in sibling crates
|
||||
/// (`tests/sandbox_escape_suite.rs`) can still set the mask because
|
||||
/// the struct is reachable through the public re-export.
|
||||
///
|
||||
/// Wire-format invariant: each ablated primitive emits
|
||||
/// `PrimitiveStatus::Skipped` in the [`HardeningOutcome`] record, so
|
||||
/// the existing 15-byte encoding does not grow. Ablation-mode tests
|
||||
/// assert on the per-primitive status fields directly.
|
||||
#[doc(hidden)]
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
|
||||
pub struct AblationMask {
|
||||
/// Skip `chroot(2)` + the bind-mount prep that feeds it. Escape
|
||||
/// fixtures that rely on the chroot read `/etc/passwd` from the
|
||||
/// host root when this is set.
|
||||
pub no_chroot: bool,
|
||||
/// Add the socket / connect / bind syscall family back to the
|
||||
/// allowlist regardless of the active `seccomp_caps` cap bits.
|
||||
/// The `raw_socket_bind` escape fixture lands a packet socket
|
||||
/// when this is set.
|
||||
pub no_seccomp_socket: bool,
|
||||
/// Add the setuid / setgid / setreuid / setregid family back to
|
||||
/// the allowlist. The `setuid_zero` escape fixture flips when
|
||||
/// this is set in concert with [`Self::no_userns`] (the
|
||||
/// unprivileged user namespace uid map already blocks the call
|
||||
/// independently).
|
||||
pub no_seccomp_setuid: bool,
|
||||
/// Drop `CLONE_NEWUSER` from the `unshare(2)` flag set. The
|
||||
/// `setuid_zero` and `proc_root_passwd` fixtures flip red when
|
||||
/// the unprivileged user namespace is gone.
|
||||
pub no_userns: bool,
|
||||
/// Drop `CLONE_NEWPID` from the `unshare(2)` flag set. The
|
||||
/// `proc_root_passwd` fixture reads the host PID 1 cmdline when
|
||||
/// the PID namespace is gone.
|
||||
pub no_pidns: bool,
|
||||
/// Skip `prctl(PR_SET_NO_NEW_PRIVS)`. The `chmod_4755` fixture
|
||||
/// flips red when the no-new-privs bit is unset because a setuid
|
||||
/// binary the harness execs after the chmod re-acquires the
|
||||
/// missing privileges.
|
||||
pub no_no_new_privs: bool,
|
||||
}
|
||||
|
||||
impl SandboxOptions {
|
||||
/// Borrow the OOB listener handle when the network policy carries
|
||||
/// one. Returns `None` for every variant except
|
||||
|
|
@ -304,6 +369,7 @@ impl Default for SandboxOptions {
|
|||
seccomp_caps: 0,
|
||||
process_hardening: ProcessHardeningProfile::Standard,
|
||||
bind_mount_host_libs: false,
|
||||
ablation: None,
|
||||
trace: None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@
|
|||
|
||||
use crate::dynamic::sandbox::seccomp;
|
||||
use crate::dynamic::sandbox::seccomp::bpf::SockFilter;
|
||||
use crate::dynamic::sandbox::{ProcessHardeningProfile, SandboxOptions};
|
||||
use crate::dynamic::sandbox::{AblationMask, ProcessHardeningProfile, SandboxOptions};
|
||||
use std::io::Read;
|
||||
use std::os::unix::io::{FromRawFd, RawFd};
|
||||
use std::os::unix::process::CommandExt;
|
||||
|
|
@ -308,10 +308,16 @@ fn apply_no_new_privs() -> PrimitiveStatus {
|
|||
}
|
||||
|
||||
fn apply_unshare() -> PrimitiveStatus {
|
||||
apply_unshare_with_flags(CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS)
|
||||
}
|
||||
|
||||
fn apply_unshare_with_flags(flags: i32) -> PrimitiveStatus {
|
||||
// CLONE_NEWUSER must come first on most modern kernels so the
|
||||
// unprivileged caller can map uid/gid; CLONE_NEWPID + CLONE_NEWNS
|
||||
// then succeed because the new user namespace owns them.
|
||||
let flags = CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS;
|
||||
// then succeed because the new user namespace owns them. Phase 20
|
||||
// ablation drops individual flags via `AblationMask::no_userns` /
|
||||
// `no_pidns` so the escape-fixture matrix can prove the namespace
|
||||
// primitive carries its weight.
|
||||
let ret = unsafe { unshare(flags) };
|
||||
if ret == 0 {
|
||||
PrimitiveStatus::Applied
|
||||
|
|
@ -320,6 +326,22 @@ fn apply_unshare() -> PrimitiveStatus {
|
|||
}
|
||||
}
|
||||
|
||||
/// Compose the `unshare(2)` flag set for a given ablation mask. The
|
||||
/// production path passes `None` and gets the full
|
||||
/// `CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS` set. Tests pass `Some`
|
||||
/// to drop individual namespaces and assert the escape fixture flips.
|
||||
fn unshare_flags_for_ablation(mask: Option<AblationMask>) -> i32 {
|
||||
let m = mask.unwrap_or_default();
|
||||
let mut flags = CLONE_NEWNS;
|
||||
if !m.no_userns {
|
||||
flags |= CLONE_NEWUSER;
|
||||
}
|
||||
if !m.no_pidns {
|
||||
flags |= CLONE_NEWPID;
|
||||
}
|
||||
flags
|
||||
}
|
||||
|
||||
fn apply_chroot(workdir: &[u8]) -> PrimitiveStatus {
|
||||
// `workdir` is NUL-terminated by `canonicalize_workdir` so we can
|
||||
// hand the bytes straight to `chroot(2)` without allocating in
|
||||
|
|
@ -411,9 +433,20 @@ struct PreExecPlan {
|
|||
profile: ProcessHardeningProfileTag,
|
||||
/// Read-only bind-mounts the child applies after `unshare(CLONE_NEWNS)`
|
||||
/// and before `chroot(2)`. Empty when
|
||||
/// [`SandboxOptions::bind_mount_host_libs`] is false or the active
|
||||
/// profile is `Standard` (no namespace to bind into).
|
||||
/// [`SandboxOptions::bind_mount_host_libs`] is false, the active
|
||||
/// profile is `Standard` (no namespace to bind into), or the active
|
||||
/// ablation mask sets `no_chroot` (no `chroot(2)` means the bind
|
||||
/// mounts would just orphan-mount inside the workdir).
|
||||
bind_mounts: Vec<BindMount>,
|
||||
/// `unshare(2)` flag bits the child requests. Computed from
|
||||
/// [`unshare_flags_for_ablation`] so the Phase 20 ablation harness
|
||||
/// can drop `CLONE_NEWUSER` / `CLONE_NEWPID` individually without
|
||||
/// the test re-implementing the bit math.
|
||||
unshare_flags: i32,
|
||||
/// `Some` when the active mask is non-default; consulted in
|
||||
/// [`run_pre_exec_in_child`] to skip individual primitives. `None`
|
||||
/// in production so the hot path is unaffected.
|
||||
ablation: Option<AblationMask>,
|
||||
}
|
||||
|
||||
/// Returned by [`install_pre_exec`]. The caller MUST invoke either
|
||||
|
|
@ -519,9 +552,14 @@ pub fn install_pre_exec(
|
|||
fn run_pre_exec_in_child(plan: &PreExecPlan) -> HardeningOutcome {
|
||||
let mut outcome = HardeningOutcome::default();
|
||||
outcome.profile = plan.profile;
|
||||
let ablation = plan.ablation.unwrap_or_default();
|
||||
|
||||
// ── Always-on: PR_SET_NO_NEW_PRIVS + RLIMIT_AS ───────────────────────
|
||||
outcome.no_new_privs = apply_no_new_privs();
|
||||
outcome.no_new_privs = if ablation.no_no_new_privs {
|
||||
PrimitiveStatus::Skipped
|
||||
} else {
|
||||
apply_no_new_privs()
|
||||
};
|
||||
outcome.rlimit_as = apply_rlimit(RLIMIT_AS, plan.rlimit_as_bytes);
|
||||
|
||||
if matches!(plan.profile, ProcessHardeningProfileTag::Standard) {
|
||||
|
|
@ -531,13 +569,20 @@ fn run_pre_exec_in_child(plan: &PreExecPlan) -> HardeningOutcome {
|
|||
// ── Strict profile: rlimits, unshare, chroot, seccomp ────────────────
|
||||
outcome.rlimit_cpu = apply_rlimit(RLIMIT_CPU, plan.rlimit_cpu_seconds);
|
||||
outcome.rlimit_nofile = apply_rlimit(RLIMIT_NOFILE, plan.rlimit_nofile);
|
||||
outcome.unshare = apply_unshare();
|
||||
// `unshare(2)` always runs even under ablation because the BindMount
|
||||
// step needs `CLONE_NEWNS` to land in a private mount namespace;
|
||||
// userns/pidns are dropped via the flag mask in `build_plan`.
|
||||
outcome.unshare = apply_unshare_with_flags(plan.unshare_flags);
|
||||
// Bind-mount host library paths into the workdir after unshare (so
|
||||
// the new mount namespace catches them) and before chroot (so the
|
||||
// bind sources are still reachable at their absolute host paths).
|
||||
// No-op when `bind_mounts` is empty.
|
||||
apply_bind_mounts(&plan.bind_mounts);
|
||||
outcome.chroot = apply_chroot(&plan.workdir_nul);
|
||||
outcome.chroot = if ablation.no_chroot {
|
||||
PrimitiveStatus::Skipped
|
||||
} else {
|
||||
apply_chroot(&plan.workdir_nul)
|
||||
};
|
||||
// seccomp is applied last so the filter does not block any of the
|
||||
// earlier syscalls (setrlimit, prctl, unshare, chroot, chdir, mount).
|
||||
outcome.seccomp = apply_seccomp(plan.seccomp_program.as_slice());
|
||||
|
|
@ -557,8 +602,15 @@ fn build_plan(opts: &SandboxOptions, workdir: &Path) -> PreExecPlan {
|
|||
|
||||
// Pre-compile the BPF program in the parent so the pre_exec
|
||||
// callback (which must not allocate) can hand it straight to
|
||||
// `prctl(PR_SET_SECCOMP)`.
|
||||
let nrs = seccomp::allowed_syscall_numbers(opts.seccomp_caps);
|
||||
// `prctl(PR_SET_SECCOMP)`. Ablation extras add the socket / setuid
|
||||
// syscall families back to the allowlist so escape fixtures can
|
||||
// prove that the corresponding seccomp slice carries its weight.
|
||||
let ablation = opts.ablation;
|
||||
let extras: Vec<&'static str> = ablation_extras(ablation);
|
||||
let nrs = seccomp::allowed_syscall_numbers_with_extras(
|
||||
opts.seccomp_caps,
|
||||
extras.iter().copied(),
|
||||
);
|
||||
let program = seccomp::bpf::compile(&nrs, seccomp::syscalls::AUDIT_ARCH);
|
||||
|
||||
let profile = match opts.process_hardening {
|
||||
|
|
@ -566,11 +618,16 @@ fn build_plan(opts: &SandboxOptions, workdir: &Path) -> PreExecPlan {
|
|||
ProcessHardeningProfile::Strict => ProcessHardeningProfileTag::Strict,
|
||||
};
|
||||
|
||||
let mask = ablation.unwrap_or_default();
|
||||
// Bind-mounts are only useful when the child will chroot, i.e. under
|
||||
// the Strict profile. Computing them under Standard would create
|
||||
// empty dest dirs in the workdir for no reason.
|
||||
// empty dest dirs in the workdir for no reason. Skipping the
|
||||
// chroot via ablation drops the bind-mounts too — leaving them on
|
||||
// would mount over the host directly inside the unshared mount
|
||||
// namespace, which is not what the ablation harness wants.
|
||||
let bind_mounts = if opts.bind_mount_host_libs
|
||||
&& matches!(profile, ProcessHardeningProfileTag::Strict)
|
||||
&& !mask.no_chroot
|
||||
{
|
||||
compute_host_lib_bind_mounts(workdir)
|
||||
} else {
|
||||
|
|
@ -585,9 +642,30 @@ fn build_plan(opts: &SandboxOptions, workdir: &Path) -> PreExecPlan {
|
|||
seccomp_program: Arc::new(program),
|
||||
profile,
|
||||
bind_mounts,
|
||||
unshare_flags: unshare_flags_for_ablation(ablation),
|
||||
ablation,
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect the syscall-name extras a Phase 20 ablation mask requires.
|
||||
/// Returns an empty Vec when the mask is `None` or default; otherwise
|
||||
/// folds `ABLATION_SOCKET_FAMILY` / `ABLATION_SETUID_FAMILY` from
|
||||
/// [`crate::dynamic::sandbox::seccomp`] into the allowlist seed.
|
||||
fn ablation_extras(mask: Option<AblationMask>) -> Vec<&'static str> {
|
||||
let m = match mask {
|
||||
Some(m) => m,
|
||||
None => return Vec::new(),
|
||||
};
|
||||
let mut out: Vec<&'static str> = Vec::new();
|
||||
if m.no_seccomp_socket {
|
||||
out.extend_from_slice(seccomp::ABLATION_SOCKET_FAMILY);
|
||||
}
|
||||
if m.no_seccomp_setuid {
|
||||
out.extend_from_slice(seccomp::ABLATION_SETUID_FAMILY);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Build the bind-mount list for the dynamic-loader paths an interpreted
|
||||
/// harness needs to find shared libraries from inside the chroot. Each
|
||||
/// entry is `(host_source, workdir_dest)` where `host_source` is a real
|
||||
|
|
@ -816,4 +894,259 @@ mod tests {
|
|||
assert_eq!(twice, b"/lib\0\0");
|
||||
}
|
||||
|
||||
// ── Phase 20 ablation harness ────────────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn ablation_default_mask_matches_full_strict_flags() {
|
||||
// The production path (`opts.ablation == None`) must request the
|
||||
// full namespace set so non-ablation runs do not regress.
|
||||
assert_eq!(
|
||||
unshare_flags_for_ablation(None),
|
||||
CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS,
|
||||
);
|
||||
// A non-None but default-valued mask must behave identically:
|
||||
// the integration test layer can construct an empty mask as a
|
||||
// sentinel without losing any production primitive.
|
||||
assert_eq!(
|
||||
unshare_flags_for_ablation(Some(AblationMask::default())),
|
||||
CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNS,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_userns_drops_clone_newuser_flag() {
|
||||
let flags = unshare_flags_for_ablation(Some(AblationMask {
|
||||
no_userns: true,
|
||||
..AblationMask::default()
|
||||
}));
|
||||
assert_eq!(flags & CLONE_NEWUSER, 0, "CLONE_NEWUSER must be dropped");
|
||||
assert_eq!(flags & CLONE_NEWPID, CLONE_NEWPID, "CLONE_NEWPID must persist");
|
||||
assert_eq!(flags & CLONE_NEWNS, CLONE_NEWNS, "CLONE_NEWNS must persist (bind-mount target)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_pidns_drops_clone_newpid_flag() {
|
||||
let flags = unshare_flags_for_ablation(Some(AblationMask {
|
||||
no_pidns: true,
|
||||
..AblationMask::default()
|
||||
}));
|
||||
assert_eq!(flags & CLONE_NEWPID, 0, "CLONE_NEWPID must be dropped");
|
||||
assert_eq!(flags & CLONE_NEWUSER, CLONE_NEWUSER, "CLONE_NEWUSER must persist");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_userns_and_no_pidns_keeps_only_newns() {
|
||||
// Even with both namespace ablations set, CLONE_NEWNS must
|
||||
// remain so the bind-mount step has a private mount namespace
|
||||
// to land in. Dropping NEWNS too would mount host libs into
|
||||
// the live host namespace — a serious test-side foot-gun.
|
||||
let flags = unshare_flags_for_ablation(Some(AblationMask {
|
||||
no_userns: true,
|
||||
no_pidns: true,
|
||||
..AblationMask::default()
|
||||
}));
|
||||
assert_eq!(flags, CLONE_NEWNS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_chroot_drops_bind_mounts_from_plan() {
|
||||
// bind_mount_host_libs requested, Strict profile selected — yet
|
||||
// the ablated chroot means we should not pre-create bind dirs in
|
||||
// the workdir. Doing so would leak mount points to the host.
|
||||
let workdir = tempfile::TempDir::new().expect("tempdir");
|
||||
let opts = SandboxOptions {
|
||||
bind_mount_host_libs: true,
|
||||
process_hardening: ProcessHardeningProfile::Strict,
|
||||
ablation: Some(AblationMask {
|
||||
no_chroot: true,
|
||||
..AblationMask::default()
|
||||
}),
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
let plan = build_plan(&opts, workdir.path());
|
||||
assert!(
|
||||
plan.bind_mounts.is_empty(),
|
||||
"no_chroot ablation must zero out bind_mounts; got {} entries",
|
||||
plan.bind_mounts.len(),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_chroot_plan_carries_mask_through_to_pre_exec() {
|
||||
// Verify the mask survives `build_plan` so the pre_exec callback
|
||||
// can inspect it. The pre_exec sequence itself is hard to drive
|
||||
// without an actual fork; the wire-level "Skipped" outcome
|
||||
// assertion lives in `run_pre_exec_outcome_with_no_chroot_mask`.
|
||||
let opts = SandboxOptions {
|
||||
process_hardening: ProcessHardeningProfile::Strict,
|
||||
ablation: Some(AblationMask {
|
||||
no_chroot: true,
|
||||
no_no_new_privs: true,
|
||||
..AblationMask::default()
|
||||
}),
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
let plan = build_plan(&opts, std::path::Path::new("/tmp"));
|
||||
let mask = plan.ablation.expect("plan must carry the mask");
|
||||
assert!(mask.no_chroot);
|
||||
assert!(mask.no_no_new_privs);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_extras_default_is_empty() {
|
||||
assert!(ablation_extras(None).is_empty());
|
||||
assert!(ablation_extras(Some(AblationMask::default())).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_seccomp_socket_extends_allowlist_with_socket_family() {
|
||||
let extras = ablation_extras(Some(AblationMask {
|
||||
no_seccomp_socket: true,
|
||||
..AblationMask::default()
|
||||
}));
|
||||
for needle in ["socket", "bind", "connect", "accept"] {
|
||||
assert!(
|
||||
extras.contains(&needle),
|
||||
"no_seccomp_socket extras must include {needle}, got {extras:?}",
|
||||
);
|
||||
}
|
||||
for forbidden in ["setuid", "setgid"] {
|
||||
assert!(
|
||||
!extras.contains(&forbidden),
|
||||
"no_seccomp_socket extras must not leak setuid family",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_seccomp_setuid_extends_allowlist_with_setuid_family() {
|
||||
let extras = ablation_extras(Some(AblationMask {
|
||||
no_seccomp_setuid: true,
|
||||
..AblationMask::default()
|
||||
}));
|
||||
for needle in ["setuid", "setgid", "setreuid", "setresuid"] {
|
||||
assert!(
|
||||
extras.contains(&needle),
|
||||
"no_seccomp_setuid extras must include {needle}, got {extras:?}",
|
||||
);
|
||||
}
|
||||
for forbidden in ["socket", "bind"] {
|
||||
assert!(
|
||||
!extras.contains(&forbidden),
|
||||
"no_seccomp_setuid extras must not leak socket family",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_seccomp_socket_bpf_includes_socket_syscall() {
|
||||
// Verify the extension reaches the compiled BPF program, not
|
||||
// just the name list. socket() lives in the SSRF cap allowlist
|
||||
// today; without that cap bit set, the production path filters
|
||||
// it. Ablation must add it back via the extras seed.
|
||||
let opts = SandboxOptions {
|
||||
seccomp_caps: 0,
|
||||
process_hardening: ProcessHardeningProfile::Strict,
|
||||
ablation: Some(AblationMask {
|
||||
no_seccomp_socket: true,
|
||||
..AblationMask::default()
|
||||
}),
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
let plan = build_plan(&opts, std::path::Path::new("/tmp"));
|
||||
let socket_nr = seccomp::syscalls::syscall_number("socket")
|
||||
.expect("socket in per-arch syscall map");
|
||||
// BPF compile emits one JEQ per allowed syscall (+ a fixed arch
|
||||
// prelude + a default-deny tail), so encoding socket as a JEQ
|
||||
// instruction's k-field is the load-bearing signal.
|
||||
let program = plan.seccomp_program.as_slice();
|
||||
let landed = program.iter().any(|insn| insn.k == socket_nr);
|
||||
assert!(
|
||||
landed,
|
||||
"BPF program must include socket={} after no_seccomp_socket ablation",
|
||||
socket_nr,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_no_seccomp_setuid_bpf_includes_setuid_syscall() {
|
||||
let opts = SandboxOptions {
|
||||
seccomp_caps: 0,
|
||||
process_hardening: ProcessHardeningProfile::Strict,
|
||||
ablation: Some(AblationMask {
|
||||
no_seccomp_setuid: true,
|
||||
..AblationMask::default()
|
||||
}),
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
let plan = build_plan(&opts, std::path::Path::new("/tmp"));
|
||||
let setuid_nr = seccomp::syscalls::syscall_number("setuid")
|
||||
.expect("setuid in per-arch syscall map");
|
||||
let program = plan.seccomp_program.as_slice();
|
||||
let landed = program.iter().any(|insn| insn.k == setuid_nr);
|
||||
assert!(
|
||||
landed,
|
||||
"BPF program must include setuid={} after no_seccomp_setuid ablation",
|
||||
setuid_nr,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ablation_off_keeps_socket_filtered_when_cap_unset() {
|
||||
// Sanity: without the no_seccomp_socket toggle, socket() must
|
||||
// NOT land in the program when no cap requests it. This is the
|
||||
// tripwire for an accidental "ablation extras always added"
|
||||
// regression.
|
||||
let opts = SandboxOptions {
|
||||
seccomp_caps: 0,
|
||||
process_hardening: ProcessHardeningProfile::Strict,
|
||||
ablation: None,
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
let plan = build_plan(&opts, std::path::Path::new("/tmp"));
|
||||
let socket_nr = seccomp::syscalls::syscall_number("socket")
|
||||
.expect("socket in per-arch syscall map");
|
||||
let landed = plan.seccomp_program.iter().any(|insn| insn.k == socket_nr);
|
||||
assert!(
|
||||
!landed,
|
||||
"production path must filter socket() when no cap requests it",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn run_pre_exec_outcome_with_no_chroot_mask_skips_chroot_status() {
|
||||
// Drive `run_pre_exec_in_child` directly so we exercise the
|
||||
// ablation-aware status assignment without actually fork+exec.
|
||||
// The pre_exec sequence is allocator-free but ordinary Rust on
|
||||
// the parent thread — its only side effect under test is the
|
||||
// returned HardeningOutcome record, which is what tabulators
|
||||
// and ablation assertions consume.
|
||||
let plan = PreExecPlan {
|
||||
rlimit_cpu_seconds: 1,
|
||||
rlimit_nofile: 256,
|
||||
rlimit_as_bytes: 4096_u64 * 1024 * 1024,
|
||||
workdir_nul: b"/tmp\0".to_vec(),
|
||||
seccomp_program: Arc::new(Vec::new()),
|
||||
profile: ProcessHardeningProfileTag::Strict,
|
||||
bind_mounts: Vec::new(),
|
||||
unshare_flags: 0,
|
||||
ablation: Some(AblationMask {
|
||||
no_chroot: true,
|
||||
no_no_new_privs: true,
|
||||
..AblationMask::default()
|
||||
}),
|
||||
};
|
||||
let outcome = run_pre_exec_in_child(&plan);
|
||||
assert!(
|
||||
matches!(outcome.chroot, PrimitiveStatus::Skipped),
|
||||
"no_chroot mask must yield Skipped, got {:?}",
|
||||
outcome.chroot,
|
||||
);
|
||||
assert!(
|
||||
matches!(outcome.no_new_privs, PrimitiveStatus::Skipped),
|
||||
"no_no_new_privs mask must yield Skipped, got {:?}",
|
||||
outcome.no_new_privs,
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,6 +52,19 @@ unsafe extern "C" {
|
|||
/// `BTreeSet` and resolved to numbers via [`syscall_number`]. Unknown
|
||||
/// names (not in the per-arch table) are silently dropped.
|
||||
pub fn allowed_syscall_numbers(caps: u32) -> Vec<u32> {
|
||||
allowed_syscall_numbers_with_extras(caps, std::iter::empty())
|
||||
}
|
||||
|
||||
/// Same as [`allowed_syscall_numbers`] but additionally folds in every
|
||||
/// name yielded by `extras`. Used by the Phase 20 ablation harness to
|
||||
/// add the socket / setuid families back to the allowlist when a
|
||||
/// per-primitive escape fixture wants to prove that removing the
|
||||
/// corresponding seccomp filter flips the fixture red. Unknown names
|
||||
/// are silently dropped, identical to the base path.
|
||||
pub fn allowed_syscall_numbers_with_extras<I>(caps: u32, extras: I) -> Vec<u32>
|
||||
where
|
||||
I: IntoIterator<Item = &'static str>,
|
||||
{
|
||||
let mut names: BTreeSet<&'static str> = BTreeSet::new();
|
||||
for &n in BASE.iter() {
|
||||
names.insert(n);
|
||||
|
|
@ -63,12 +76,52 @@ pub fn allowed_syscall_numbers(caps: u32) -> Vec<u32> {
|
|||
}
|
||||
}
|
||||
}
|
||||
for n in extras {
|
||||
names.insert(n);
|
||||
}
|
||||
let mut nrs: Vec<u32> = names.into_iter().filter_map(syscall_number).collect();
|
||||
nrs.sort_unstable();
|
||||
nrs.dedup();
|
||||
nrs
|
||||
}
|
||||
|
||||
/// Syscall names re-allowed when [`crate::dynamic::sandbox::AblationMask::no_seccomp_socket`]
|
||||
/// is set. Covers the socket-family entries of every cap allowlist
|
||||
/// plus the raw / packet-socket primitives the
|
||||
/// `tests/sandbox_escape_suite.rs::raw_socket_bind` fixture exercises.
|
||||
pub const ABLATION_SOCKET_FAMILY: &[&str] = &[
|
||||
"socket",
|
||||
"socketpair",
|
||||
"connect",
|
||||
"bind",
|
||||
"listen",
|
||||
"accept",
|
||||
"accept4",
|
||||
"sendto",
|
||||
"recvfrom",
|
||||
"sendmsg",
|
||||
"recvmsg",
|
||||
"shutdown",
|
||||
"getsockname",
|
||||
"getpeername",
|
||||
"getsockopt",
|
||||
"setsockopt",
|
||||
];
|
||||
|
||||
/// Syscall names re-allowed when [`crate::dynamic::sandbox::AblationMask::no_seccomp_setuid`]
|
||||
/// is set. Covers the uid / gid mutation entries the
|
||||
/// `tests/sandbox_escape_suite.rs::setuid_zero` fixture exercises.
|
||||
pub const ABLATION_SETUID_FAMILY: &[&str] = &[
|
||||
"setuid",
|
||||
"setgid",
|
||||
"setreuid",
|
||||
"setregid",
|
||||
"setresuid",
|
||||
"setresgid",
|
||||
"setfsuid",
|
||||
"setfsgid",
|
||||
];
|
||||
|
||||
/// Install a pre-compiled seccomp filter on the calling thread.
|
||||
///
|
||||
/// `program` MUST come from [`bpf::compile`]. Calls
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue