From bcf2b489943aee757eea6b245f212bb233ccc498 Mon Sep 17 00:00:00 2001 From: pitboss Date: Sun, 17 May 2026 08:54:00 -0500 Subject: [PATCH] [pitboss/grind] deferred session-0025 (20260517T044708Z-e058) --- tests/sb_trace_script.rs | 65 +++++ tools/sb-trace.sh | 536 +++++++++++++++++++++++++++++---------- tools/sb-trace/README.md | 36 ++- 3 files changed, 487 insertions(+), 150 deletions(-) create mode 100644 tests/sb_trace_script.rs diff --git a/tests/sb_trace_script.rs b/tests/sb_trace_script.rs new file mode 100644 index 00000000..0d719090 --- /dev/null +++ b/tests/sb_trace_script.rs @@ -0,0 +1,65 @@ +//! `tools/sb-trace.sh` is the corpus walker that generates per-cap +//! seed files for the macOS sandbox-exec deny-default rollout. Its +//! deny-record → allow-rule parser is implemented in bash; this test +//! drives the script's `--selftest` flag so the parser stays exercised +//! in CI on every host, including Linux runners that never run the +//! macOS-specific portion of the script. +//! +//! The selftest is a no-op when `bash` is not on PATH; CI rows that +//! lack a POSIX shell skip rather than fail. + +use std::path::PathBuf; +use std::process::Command; + +fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) +} + +fn find_in_path(name: &str) -> Option { + let path = std::env::var_os("PATH")?; + for dir in std::env::split_paths(&path) { + let candidate = dir.join(name); + if candidate.is_file() { + return Some(candidate); + } + } + None +} + +#[test] +fn sb_trace_selftest_passes() { + let script = repo_root().join("tools").join("sb-trace.sh"); + assert!( + script.exists(), + "tools/sb-trace.sh missing at {}", + script.display() + ); + + let bash = match find_in_path("bash") { + Some(p) => p, + None => { + eprintln!("SKIP: bash not on PATH; sb-trace.sh selftest cannot run"); + return; + } + }; + + let output = Command::new(&bash) + .arg(&script) + .arg("--selftest") + .output() + .expect("invoke bash tools/sb-trace.sh --selftest"); + + assert!( + output.status.success(), + "tools/sb-trace.sh --selftest failed: status={:?}\nstdout={}\nstderr={}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + ); + + let stdout = String::from_utf8_lossy(&output.stdout); + assert!( + stdout.contains("sb-trace selftest: all OK"), + "expected selftest success banner; stdout was: {stdout}", + ); +} diff --git a/tools/sb-trace.sh b/tools/sb-trace.sh index 35b28558..0784b5ba 100755 --- a/tools/sb-trace.sh +++ b/tools/sb-trace.sh @@ -1,223 +1,481 @@ #!/usr/bin/env bash -# tools/sb-trace.sh — corpus-walking seed generator for the macOS +# tools/sb-trace.sh — iterative-permit seed generator for the macOS # sandbox-exec deny-default rollout (Phase 18 follow-up path (a)). # -# What it does +# How it works # ------------ -# For each `.sb` profile shipped under `src/dynamic/sandbox_profiles/`, -# this script re-runs the profile in deny-default mode against the -# per-language harness corpus under `tests/dynamic_fixtures/`, -# captures the kernel's deny trace, and writes one -# `tools/sb-trace/{cap}.allow` seed file with the minimum allow rules -# the interpreter cold-start needs. +# Apple removed the `(trace "")` directive's file-emission in a +# recent macOS release while keeping the directive syntactically valid, +# so the older "set a trace path, run probe, parse trace file" workflow +# captures nothing on macOS 26+. This script substitutes an iterative +# loop driven by `log show`: # -# The seed files are consumed by `src/dynamic/sandbox/process_macos.rs` -# at runtime when `NYX_SB_DENY_DEFAULT=1` is set; the splice path -# replaces the baked `(allow default)` with `(deny default)` and -# appends the seed body verbatim. +# 1. Materialise the named `.sb` profile with `(allow default)` +# rewritten to `(deny default)` plus all `(allow ...)` rules the +# loop has accumulated so far. +# 2. Run the per-language probe under `sandbox-exec -f` against that +# profile. Capture the resulting PID. +# 3. Query `log show --predicate 'eventMessage CONTAINS "() deny"'` +# for the deny records the kernel logged against our process. +# 4. Convert each deny record into a corresponding `(allow ...)` rule +# and append it to the accumulated rule set. +# 5. Repeat until no new deny records appear (either the probe ran +# cleanly under the accumulated allows or the kernel deduplicated +# everything new). Emit the rule set as the seed. +# +# The PID-targeted log query sidesteps the kernel's per-tuple dedup +# window: every iteration's probe runs as a new process with a fresh +# PID, so the kernel emits fresh records each time even if the +# operation tuples repeat. # # Usage # ----- # tools/sb-trace.sh # walk every profile + every lang fixture -# tools/sb-trace.sh cmdi # just the cmdi profile +# tools/sb-trace.sh cmdi # just the cmdi profile, every lang # tools/sb-trace.sh cmdi python # cmdi + python only +# tools/sb-trace.sh --selftest # rule-parser unit tests # # Requirements # ------------ -# * macOS host with `/usr/bin/sandbox-exec` available +# * macOS host with `/usr/bin/sandbox-exec` + `/usr/bin/log` available. # * `python3`, `node`, `ruby`, `php`, `java` resolvable via $PATH for -# every language whose fixtures you want to walk +# every language whose fixtures you want to walk. Missing +# interpreters are skipped with a warning. # # Output # ------ -# tools/sb-trace/.allow — generated seed, hand-review -# tools/sb-trace/.trace.raw — full raw deny trace, for audit +# tools/sb-trace/.allow — generated seed, hand-review. # -# The seed files are intended to be committed; the .trace.raw files -# are .gitignore'd because they capture host-specific paths. +# The seeds are intended to be committed. Hand-review each one to: +# * regex-anonymise host-specific user paths (`/Users//...` → +# `^/Users/[^/]+/...`) +# * collapse related rules onto one `(allow op a b c ...)` directive +# when several rules share an operation. set -euo pipefail ROOT="$(cd "$(dirname "$0")/.." && pwd)" SEED_DIR="$ROOT/tools/sb-trace" PROFILE_DIR="$ROOT/src/dynamic/sandbox_profiles" -FIXTURE_ROOT="$ROOT/tests/dynamic_fixtures" + +MAX_ITERATIONS="${SB_TRACE_MAX_ITERATIONS:-200}" +LOG_WAIT="${SB_TRACE_LOG_WAIT_SECONDS:-1.5}" + +# Self-test mode short-circuits the macOS-host plumbing so the parser +# can be exercised in CI on any platform. +if [[ "${1:-}" == "--selftest" ]]; then + selftest_mode=1 +else + selftest_mode=0 +fi + +# ── deny → allow rule parser ───────────────────────────────────────────────── +# +# Format of a kernel sandbox deny record (as it appears in `log show`'s +# `eventMessage` field): +# +# Sandbox: () deny() +# +# `` is positional — everything after the operation token, up to +# the end of the message. It may contain spaces (file paths with +# embedded whitespace). Operation classes map to different +# sandbox-exec rule filters: +# +# file-read*, file-write*, file-ioctl, file-* (most) → (literal "") +# mach-lookup → (global-name "") +# sysctl-read, sysctl-write → (sysctl-name "") +# ipc-posix-shm-read*, ipc-posix-shm-write* → (ipc-posix-name "") +# iokit-open → (iokit-user-client-class "") +# network-outbound, network-inbound, network-bind → (literal "") if path-like +# process-fork, process-exec*, signal, pseudo-tty, +# sysctl-*, system-* → bare (allow ) +# +# Unknown operations fall through to bare allow with a `;; TODO review` +# comment so the operator notices on hand-review. + +deny_to_allow_rule() { + local line="$1" + # Strip everything up to and including "deny(N) ". + local rest="${line#*Sandbox: }" + rest="${rest#*deny(}" + rest="${rest#*) }" + + # First whitespace-delimited token is the operation, the rest is the target. + local op="${rest%% *}" + local target="" + if [[ "$rest" == *" "* ]]; then + target="${rest#* }" + fi + + # Strip a trailing CR that some log timestamps emit. + target="${target%$'\r'}" + + case "$op" in + file-read*|file-write*|file-ioctl|file-issue-extension|file-map-executable|file-mount*|file-revoke|file-test-existence|file-chroot|file-clone) + printf '(allow %s (literal "%s"))\n' "$op" "$(escape_quotes "$target")" + ;; + mach-lookup|mach-register|mach-priv-task-port|mach-task-name) + printf '(allow %s (global-name "%s"))\n' "$op" "$(escape_quotes "$target")" + ;; + sysctl-read|sysctl-write) + printf '(allow %s (sysctl-name "%s"))\n' "$op" "$(escape_quotes "$target")" + ;; + ipc-posix-shm-read*|ipc-posix-shm-write*|ipc-posix-shm) + printf '(allow %s (ipc-posix-name "%s"))\n' "$op" "$(escape_quotes "$target")" + ;; + iokit-open|iokit-set-properties|iokit-get-properties) + printf '(allow %s (iokit-user-client-class "%s"))\n' "$op" "$(escape_quotes "$target")" + ;; + network-outbound|network-inbound|network-bind) + if [[ "$target" == /* ]]; then + printf '(allow %s (literal "%s"))\n' "$op" "$(escape_quotes "$target")" + else + printf '(allow %s)\n' "$op" + fi + ;; + process-fork|process-exec*|process-info*|signal|pseudo-tty|system-*|sysctl-*) + printf '(allow %s)\n' "$op" + ;; + "") + # Unrecognised structure — emit nothing. + ;; + *) + printf ';; TODO review unfamiliar op: %s %s\n(allow %s)\n' \ + "$op" "$target" "$op" + ;; + esac +} + +# Escape `"` and `\` for safe embedding inside a sandbox-exec string literal. +escape_quotes() { + local s="$1" + s="${s//\\/\\\\}" + s="${s//\"/\\\"}" + printf '%s' "$s" +} + +# ── Self-test ──────────────────────────────────────────────────────────────── + +assert_rule() { + local label="$1" + local input="$2" + local expected="$3" + local got + got="$(deny_to_allow_rule "$input")" + # Trim trailing newline from `got` for comparison. + got="${got%$'\n'}" + if [[ "$got" != "$expected" ]]; then + printf '[FAIL] %s\n input: %s\n expected: %s\n got: %s\n' \ + "$label" "$input" "$expected" "$got" >&2 + return 1 + fi + printf '[PASS] %s\n' "$label" +} + +run_selftest() { + local fails=0 + assert_rule "file-read-data" \ + "kernel: (Sandbox) Sandbox: python3(54920) deny(1) file-read-data /etc/hosts" \ + '(allow file-read-data (literal "/etc/hosts"))' || ((fails++)) + + assert_rule "file-read-data-root" \ + "Sandbox: python3(54920) deny(1) file-read-data /" \ + '(allow file-read-data (literal "/"))' || ((fails++)) + + assert_rule "sysctl-read" \ + "Sandbox: python3(54920) deny(1) sysctl-read security.mac.lockdown_mode_state" \ + '(allow sysctl-read (sysctl-name "security.mac.lockdown_mode_state"))' || ((fails++)) + + assert_rule "mach-lookup" \ + "Sandbox: contactsd(54920) deny(1) mach-lookup com.apple.tccd.system" \ + '(allow mach-lookup (global-name "com.apple.tccd.system"))' || ((fails++)) + + assert_rule "ipc-posix-shm-read" \ + "Sandbox: python3(54920) deny(1) ipc-posix-shm-read-data apple.shm.notification_center" \ + '(allow ipc-posix-shm-read-data (ipc-posix-name "apple.shm.notification_center"))' || ((fails++)) + + assert_rule "network-outbound-path" \ + "Sandbox: python3(54920) deny(1) network-outbound /private/var/run/syslog" \ + '(allow network-outbound (literal "/private/var/run/syslog"))' || ((fails++)) + + assert_rule "network-outbound-host" \ + "Sandbox: python3(54920) deny(1) network-outbound 1.2.3.4:80" \ + '(allow network-outbound)' || ((fails++)) + + assert_rule "process-fork" \ + "Sandbox: python3(54920) deny(1) process-fork" \ + '(allow process-fork)' || ((fails++)) + + assert_rule "process-exec-star" \ + "Sandbox: python3(54920) deny(1) process-exec* /bin/ls" \ + '(allow process-exec*)' || ((fails++)) + + assert_rule "iokit-open" \ + "Sandbox: python3(54920) deny(1) iokit-open IOUserClientCrossEndpoint" \ + '(allow iokit-open (iokit-user-client-class "IOUserClientCrossEndpoint"))' || ((fails++)) + + assert_rule "path-with-space" \ + 'Sandbox: python3(54920) deny(1) file-read-data /Users/me/has spaces/file' \ + '(allow file-read-data (literal "/Users/me/has spaces/file"))' || ((fails++)) + + assert_rule "path-with-quote" \ + 'Sandbox: python3(54920) deny(1) file-read-data /a"b' \ + '(allow file-read-data (literal "/a\"b"))' || ((fails++)) + + if (( fails > 0 )); then + printf '\nsb-trace selftest: %d failure(s)\n' "$fails" >&2 + return 1 + fi + printf '\nsb-trace selftest: all OK\n' +} + +if (( selftest_mode )); then + run_selftest + exit $? +fi + +# ── macOS-host guards ──────────────────────────────────────────────────────── if [[ "$(uname -s)" != "Darwin" ]]; then echo "sb-trace: must run on macOS (uname=$(uname -s))" >&2 exit 2 fi -if ! command -v /usr/bin/sandbox-exec >/dev/null 2>&1; then +if [[ ! -x /usr/bin/sandbox-exec ]]; then echo "sb-trace: /usr/bin/sandbox-exec missing" >&2 exit 2 fi +if [[ ! -x /usr/bin/log ]]; then + echo "sb-trace: /usr/bin/log missing" >&2 + exit 2 +fi + mkdir -p "$SEED_DIR" -# ── Profile + language coverage ────────────────────────────────────────────── +# ── Probe selection ────────────────────────────────────────────────────────── ALL_PROFILES=(base cmdi path_traversal ssrf deserialize xxe) ALL_LANGS=(python javascript ruby php java) -selected_profiles=() -selected_langs=() - +declare -a selected_profiles selected_langs if [[ $# -ge 1 ]]; then - selected_profiles+=("$1") + selected_profiles=("$1") else selected_profiles=("${ALL_PROFILES[@]}") fi - if [[ $# -ge 2 ]]; then - selected_langs+=("$2") + selected_langs=("$2") else selected_langs=("${ALL_LANGS[@]}") fi -# ── Per-language probe ─────────────────────────────────────────────────────── -# -# Each probe runs the language's interpreter cold-start path (import -# the standard libraries the harness needs). The probes are -# intentionally minimal: they exercise filesystem reads of stdlib / -# package manager locations + a `mach-lookup` for the system -# notification center, which is what the trace needs to enumerate. - +# Per-language probe command. Each probe exercises the interpreter's +# cold-start path with the minimum import set the dynamic harness +# needs. Probe argv is written into the global `PROBE_ARGV` array (one +# token per element) on success; on missing interpreter the function +# returns 1 and leaves `PROBE_ARGV` cleared. +PROBE_ARGV=() probe_command_for() { - local lang="$1" - case "$lang" in + PROBE_ARGV=() + case "$1" in python) - echo "/usr/bin/python3" "-c" "import socket,subprocess,os,sys,json" + command -v python3 >/dev/null 2>&1 || return 1 + PROBE_ARGV=(python3 -c 'import os, sys, json, socket, subprocess') ;; javascript) - command -v node >/dev/null 2>&1 || { echo ""; return; } - echo "node" "-e" "require('fs');require('os');require('child_process');require('http');" + command -v node >/dev/null 2>&1 || return 1 + PROBE_ARGV=(node -e "require('fs');require('os');require('http');require('child_process')") ;; ruby) - command -v ruby >/dev/null 2>&1 || { echo ""; return; } - echo "ruby" "-e" "require 'json';require 'socket';require 'net/http';require 'open3'" + command -v ruby >/dev/null 2>&1 || return 1 + PROBE_ARGV=(ruby -e "require 'json'; require 'socket'; require 'net/http'; require 'open3'") ;; php) - command -v php >/dev/null 2>&1 || { echo ""; return; } - echo "php" "-r" "echo phpversion();" + command -v php >/dev/null 2>&1 || return 1 + PROBE_ARGV=(php -r 'echo phpversion();') ;; java) - command -v java >/dev/null 2>&1 || { echo ""; return; } - echo "java" "--version" + command -v java >/dev/null 2>&1 || return 1 + PROBE_ARGV=(java --version) ;; *) - echo "" + return 1 ;; esac } -# ── Trace helper ───────────────────────────────────────────────────────────── -# -# Builds a deny-default variant of the named profile, runs the probe -# under it, captures the sandbox trace via the `(with trace)` directive, -# and prints any deny lines for further processing. +# ── Iterative loop ─────────────────────────────────────────────────────────── -trace_one() { +# Run one probe under the given (already materialised) profile and return +# the kernel deny lines logged against the probe's PID, one per line. +run_probe_capture_denies() { + local profile_path="$1" + shift + local -a probe_argv=("$@") + + # Spawn the probe in the background so we can capture its PID. + /usr/bin/sandbox-exec -f "$profile_path" -D WORKDIR=/tmp "${probe_argv[@]}" \ + >/dev/null 2>/dev/null & + local probe_pid=$! + + # Wait for the probe to finish. Don't propagate its exit code — many + # operations under deny-default are silently degraded by the + # interpreter (a denied sysctl-read just returns ENOENT, the + # interpreter handles it gracefully). + wait "$probe_pid" 2>/dev/null || true + + # Wait for the kernel's log queue to drain. Empirically a few hundred + # milliseconds suffice on macOS 26. + sleep "$LOG_WAIT" + + # Query log for deny lines targeting our PID. Use both the procname + # token "() deny" (more selective than just the pid) and the + # `--style ndjson` flag for parseable output. We re-extract + # `eventMessage` via a simple field grep because jq isn't required on + # every macOS host. + /usr/bin/log show \ + --predicate "eventMessage CONTAINS \"(${probe_pid}) deny\"" \ + --info --debug --last 30s 2>/dev/null \ + | awk ' + /Sandbox: .*\([0-9]+\) deny\(/ { + sub(/^.*Sandbox:/, "Sandbox:") + print + } + ' +} + +iterate_one_profile() { local profile_name="$1" - local lang="$2" - local probe_cmd - probe_cmd="$(probe_command_for "$lang")" - if [[ -z "$probe_cmd" ]]; then - echo "sb-trace: skipping $lang (interpreter missing)" >&2 - return 0 - fi + shift + local -a langs=("$@") - local source="$PROFILE_DIR/$profile_name.sb" - if [[ ! -f "$source" ]]; then - echo "sb-trace: profile $profile_name missing at $source" >&2 + local source_path="$PROFILE_DIR/$profile_name.sb" + if [[ ! -f "$source_path" ]]; then + echo "sb-trace: profile $profile_name missing at $source_path" >&2 return 1 fi - local tmp_profile - tmp_profile="$(mktemp -t "sb-trace-$profile_name.XXXXXX.sb")" - local trace_file - trace_file="$(mktemp -t "sb-trace-$profile_name.XXXXXX.trace")" + local base + base="$(sed 's/(allow default)/(deny default)/' "$source_path")" - # Rewrite (allow default) -> (deny default), append a trace directive. - # `(trace "...")` emits one s-expression record per sandbox decision. - sed 's/(allow default)/(deny default)/' "$source" >"$tmp_profile" - printf '\n(trace "%s")\n' "$trace_file" >>"$tmp_profile" - - # Run the probe under the new profile. Exit code is ignored — the - # interpreter is expected to fail under deny-default; what we want is - # the captured trace. - /usr/bin/sandbox-exec -f "$tmp_profile" -D WORKDIR=/tmp -- $probe_cmd >/dev/null 2>&1 || true - - if [[ -s "$trace_file" ]]; then - cat "$trace_file" - fi - - rm -f "$tmp_profile" "$trace_file" -} - -# ── Trace summariser ───────────────────────────────────────────────────────── -# -# The sandbox-exec trace format records one s-expression per decision. -# We extract the deny records, normalise the per-host paths into -# parameterised allow rules, and dedupe. - -summarise_traces() { - awk ' - /\(deny / { - sub(/.*\(deny /, "") - sub(/\).*/, "") - print - } - ' | sort -u -} - -# ── Emit seed for one profile ──────────────────────────────────────────────── - -emit_seed() { - local profile_name="$1" - shift - local langs=("$@") - - local raw="$SEED_DIR/$profile_name.trace.raw" - : >"$raw" + # Per-cap accumulators. + local -a accumulated_rules=() + local -a accumulated_keys=() + local total_iters=0 for lang in "${langs[@]}"; do - echo ";; ── trace from $lang probe ───────────────────────────" >>"$raw" - trace_one "$profile_name" "$lang" >>"$raw" || true + if ! probe_command_for "$lang"; then + echo "sb-trace: skipping $lang (interpreter missing or unsupported)" >&2 + continue + fi + local -a argv=("${PROBE_ARGV[@]}") + if (( ${#argv[@]} == 0 )); then + echo "sb-trace: skipping $lang (empty argv)" >&2 + continue + fi + + local iteration=0 + while (( iteration < MAX_ITERATIONS )); do + iteration=$((iteration + 1)) + total_iters=$((total_iters + 1)) + + # Materialise tmp profile = base + accumulated rules. + local tmp_profile + tmp_profile="$(mktemp -t "sb-trace-$profile_name.XXXXXX.sb")" + { + printf '%s\n' "$base" + printf ';; sb-trace iterative seeds (lang=%s iter=%d)\n' \ + "$lang" "$iteration" + local r + for r in "${accumulated_rules[@]+"${accumulated_rules[@]}"}"; do + printf '%s\n' "$r" + done + } >"$tmp_profile" + + # Run probe, collect deny lines. + local denies + denies="$(run_probe_capture_denies "$tmp_profile" "${argv[@]}" || true)" + rm -f "$tmp_profile" + + if [[ -z "$denies" ]]; then + # No new denies for this lang — done. + break + fi + + # Convert denies to allow rules, dedup against accumulated. + local new_in_iter=0 + local line + while IFS= read -r line; do + [[ -z "$line" ]] && continue + local rule + rule="$(deny_to_allow_rule "$line")" + rule="${rule%$'\n'}" + [[ -z "$rule" ]] && continue + # Dedup by exact-rule-text match. + local seen=0 + local k + for k in "${accumulated_keys[@]+"${accumulated_keys[@]}"}"; do + if [[ "$k" == "$rule" ]]; then + seen=1; break + fi + done + if (( ! seen )); then + accumulated_rules+=("$rule") + accumulated_keys+=("$rule") + new_in_iter=$((new_in_iter + 1)) + fi + done <<<"$denies" + + if (( new_in_iter == 0 )); then + # Denies present but all already-known — kernel dedup, or + # repeats of rules we've already issued. Bail to avoid + # infinite loops. + break + fi + done done - if [[ ! -s "$raw" ]]; then - echo "sb-trace: no deny traces captured for $profile_name" >&2 - return 0 - fi - - local seed="$SEED_DIR/$profile_name.allow" + local seed_path="$SEED_DIR/$profile_name.allow" { - echo ";; tools/sb-trace/$profile_name.allow" - echo ";; Generated by tools/sb-trace.sh against per-language harness corpus." - echo ";; Hand-review before commit: paths under \$HOME need to be regex'd" - echo ";; rather than literalised so the seed survives a different host's" - echo ";; \$HOME layout." - echo ";;" - echo ";; Languages walked: ${langs[*]}" - echo ";; Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ)" - echo - summarise_traces <"$raw" | sed 's/^/(allow /;s/$/)/' - } >"$seed" + printf ';; tools/sb-trace/%s.allow\n' "$profile_name" + printf ';; Generated %s by tools/sb-trace.sh (iterative-permit loop)\n' \ + "$(date -u +%Y-%m-%dT%H:%M:%SZ)" + printf ';; Languages walked: %s\n' "${langs[*]}" + printf ';; Total probe iterations: %d\n' "$total_iters" + printf ';;\n' + printf ';; Hand-review before commit:\n' + printf ';; * regex-anonymise host-specific paths under /Users//...\n' + printf ';; into ^/Users/[^/]+/... so the seed survives a different\n' + printf ';; operator host\n' + printf ';; * collapse same-op rules onto one (allow op a b c ...)\n' + printf ';; directive when the targets share semantics\n' + printf '\n' + if (( ${#accumulated_rules[@]} == 0 )); then + printf ';; (no deny records captured; profile already runs cleanly\n' + printf ';; for the probed languages under (deny default))\n' + else + local r + for r in "${accumulated_rules[@]}"; do + printf '%s\n' "$r" + done + fi + } >"$seed_path" - echo "sb-trace: wrote $seed ($(wc -l <"$seed" | tr -d ' ') lines)" + printf 'sb-trace: wrote %s (%d rule(s) across %d iteration(s))\n' \ + "$seed_path" "${#accumulated_rules[@]}" "$total_iters" } -# ── Main ───────────────────────────────────────────────────────────────────── +# ── Main loop ──────────────────────────────────────────────────────────────── for profile in "${selected_profiles[@]}"; do - emit_seed "$profile" "${selected_langs[@]}" + iterate_one_profile "$profile" "${selected_langs[@]}" done -echo "sb-trace: done." -echo "Next steps:" -echo " 1. Hand-review each tools/sb-trace/*.allow seed" -echo " 2. Replace host-specific literal paths with regex matches" -echo " (e.g. /Users//.pyenv/... -> ^/Users/[^/]+/\\.pyenv/)" -echo " 3. Commit the .allow files; the .trace.raw files are .gitignore'd" -echo " 4. Run nyx with NYX_SB_DENY_DEFAULT=1 to exercise the splice" +printf '\nsb-trace: done.\n' +printf 'Next steps:\n' +printf ' 1. Hand-review each tools/sb-trace/*.allow seed.\n' +printf ' 2. Replace host-specific literal paths with regex matches.\n' +printf ' 3. Commit the .allow files.\n' +printf ' 4. Run nyx with NYX_SB_DENY_DEFAULT=1 + NYX_SB_SEED_DIR pointing at\n' +printf ' tools/sb-trace/ to exercise the splice.\n' diff --git a/tools/sb-trace/README.md b/tools/sb-trace/README.md index cec3fddd..4183399b 100644 --- a/tools/sb-trace/README.md +++ b/tools/sb-trace/README.md @@ -21,25 +21,39 @@ missing. Misconfiguration cannot brick the sandbox-exec backend. ## How the seeds get generated Run `tools/sb-trace.sh` from a macOS host that has the interpreters -on `$PATH`. The script materialises each `.sb` profile in -deny-default form, runs the per-language harness cold-start -(`python3 -c 'import socket,subprocess,...'`, `node -e require(...)`, -etc.) under it, captures the sandbox-exec trace, and emits a -candidate seed. +on `$PATH`. The script materialises each `.sb` profile with +`(allow default)` rewritten to `(deny default)`, runs each +per-language probe under `sandbox-exec`, queries +`log show --predicate 'eventMessage CONTAINS "() deny"'` for the +kernel deny records the probe triggered, converts each deny line +into the matching `(allow ...)` rule, appends it to the profile, and +re-runs the probe. The loop stops when an iteration produces no new +denies (the probe ran cleanly under the accumulated allows) or when +the kernel's per-tuple dedup window swallows every remaining record. -Output goes to this directory: +The PID-targeted log query sidesteps the dedup window: each iteration's +probe runs as a new process with a fresh PID, so the kernel emits a +fresh deny record even when the operation tuple repeats. The older +`(trace "")` mechanism is silently ignored on macOS 26+ and is +no longer used. - tools/sb-trace/.allow (committed) - tools/sb-trace/.trace.raw (audit artifact, gitignored) +Output: + + tools/sb-trace/.allow (committed after hand-review) After a run, hand-review each `.allow` seed before committing. The -script's emitted seeds usually need two passes: +emitted seeds usually need two passes: 1. Replace host-specific literal paths with regex matches. For instance `/Users/eli/.pyenv/versions/3.11/lib/python3.11/...` should become a regex anchored on `^/Users/[^/]+/\\.pyenv/`. -2. Group related `mach-lookup` rules into one allow directive when - they share a service prefix. +2. Group related rules onto one `(allow a b c ...)` directive + when the targets share semantics. + +The parser logic that turns one deny line into one allow rule is +exercised in CI via `tests/sb_trace_script.rs`, which invokes +`tools/sb-trace.sh --selftest` — a mode that runs the parser against +canned input and exits non-zero on any mismatch. ## Activating a seed at runtime