[pitboss] sweep after phase 02: 3 deferred items resolved

This commit is contained in:
pitboss 2026-05-11 23:24:37 -04:00
parent 0bf39047b9
commit 3a4f1b177b
7 changed files with 244 additions and 27 deletions

View file

@ -1,6 +1,6 @@
{
"schema": 1,
"note": "Baseline captured on Apple M1 Pro (darwin/aarch64), nyx v0.7.0, phase-02.",
"note": "ASPIRATIONAL placeholder — values were hand-typed, not captured from a real bench run. Regenerate with: benches/regen_baseline.sh (requires --features dynamic and python3 on PATH). Commit the updated file to establish a real regression reference for M3+.",
"benchmarks": {
"harness_build_cold": {
"mean_ns": 800000,

84
benches/regen_baseline.sh Executable file
View file

@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Regenerate benches/dynamic_bench_baseline.json from a real cargo bench run.
#
# Usage:
# bash benches/regen_baseline.sh
#
# Requirements:
# - python3 on PATH
# - cargo (nightly or stable with edition 2024)
# - Criterion's JSON output (criterion feature already in dev-deps)
#
# The script runs the dynamic bench group, parses Criterion's estimates JSON,
# and overwrites dynamic_bench_baseline.json with real numbers.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
BASELINE_FILE="${SCRIPT_DIR}/dynamic_bench_baseline.json"
echo "Running cargo bench --features dynamic -- dynamic ..."
cargo bench --manifest-path "${REPO_ROOT}/Cargo.toml" \
--features dynamic \
-- dynamic \
2>&1 | tee /tmp/nyx_bench_raw.txt
# Criterion writes estimates to target/criterion/<bench>/<group>/estimates.json.
# Extract mean_ns for each tracked benchmark.
extract_ns() {
local path="$1"
if [[ -f "${path}" ]]; then
python3 -c "
import json, sys
d = json.load(open('${path}'))
mean = d['mean']['point_estimate']
stddev = (d['std_dev']['point_estimate']) if 'std_dev' in d else 0
print(int(mean), int(stddev))
"
else
echo "0 0"
fi
}
TARGET="${REPO_ROOT}/target/criterion"
read COLD_MEAN COLD_STDDEV < <(extract_ns "${TARGET}/harness_build_cold/default/estimates.json")
read WARM_MEAN WARM_STDDEV < <(extract_ns "${TARGET}/harness_build_warm/default/estimates.json")
read RUN_MEAN RUN_STDDEV < <(extract_ns "${TARGET}/sandbox_run_payload/default/estimates.json")
MACHINE="$(uname -m) / $(uname -s)"
NYX_VER="$(cargo metadata --manifest-path "${REPO_ROOT}/Cargo.toml" --no-deps --format-version 1 \
| python3 -c "import json,sys; d=json.load(sys.stdin); print(next(p['version'] for p in d['packages'] if p['name']=='nyx-scanner'))")"
DATE="$(date +%Y-%m-%d)"
cat > "${BASELINE_FILE}" <<EOF
{
"schema": 1,
"note": "Baseline captured on ${MACHINE}, nyx v${NYX_VER}, ${DATE}. Regenerate with: benches/regen_baseline.sh",
"benchmarks": {
"harness_build_cold": {
"mean_ns": ${COLD_MEAN},
"stddev_ns": ${COLD_STDDEV},
"description": "Fresh workdir; spec → BuiltHarness including source gen + disk write."
},
"harness_build_warm": {
"mean_ns": ${WARM_MEAN},
"stddev_ns": ${WARM_STDDEV},
"description": "Workdir already staged; file write skipped by dst.exists() guard."
},
"sandbox_run_payload": {
"mean_ns": ${RUN_MEAN},
"stddev_ns": ${RUN_STDDEV},
"description": "Single process-backend run with sqli payload; includes python3 startup + settrace."
}
},
"regression_thresholds": {
"harness_build_cold": 2.0,
"harness_build_warm": 2.0,
"sandbox_run_payload": 1.5
}
}
EOF
echo "Updated ${BASELINE_FILE}"

View file

@ -143,6 +143,9 @@ pub fn write(
// expected/outcome.json — redacted
let redacted_stdout = redact::redact(&outcome.stdout);
let redacted_stderr = redact::redact(&outcome.stderr);
// duration_ms is omitted from the persisted outcome so that outcome.json is
// byte-identical when regenerated from the repro bundle (§18.2 determinism).
// Wall-clock timing goes to telemetry only.
let outcome_json = serde_json::json!({
"exit_code": outcome.exit_code,
"stdout": String::from_utf8_lossy(&redacted_stdout),
@ -150,7 +153,6 @@ pub fn write(
"timed_out": outcome.timed_out,
"oob_callback_seen": outcome.oob_callback_seen,
"sink_hit": outcome.sink_hit,
"duration_ms": outcome.duration.as_millis(),
});
write_json(&root.join("expected").join("outcome.json"), &outcome_json)?;

View file

@ -5,6 +5,7 @@
//! above it ([`crate::dynamic::verify`]) just calls [`run_spec`] and turns
//! the result into a [`crate::dynamic::report::VerifyResult`].
use crate::dynamic::build_sandbox;
use crate::dynamic::corpus::{benign_payload_for, payloads_for, Oracle, Payload};
use crate::dynamic::harness::{self, HarnessError};
use crate::dynamic::sandbox::{self, SandboxError, SandboxOptions, SandboxOutcome};
@ -65,7 +66,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
// Build harness with retry.
const BACKOFF: [u64; 1] = [1];
let mut build_attempts = 0u32;
let harness = loop {
let mut harness = loop {
build_attempts += 1;
match harness::build(spec) {
Ok(h) => break h,
@ -85,6 +86,31 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
}
};
// Prepare Python venv for build-time isolation and dependency caching.
// Errors from prepare_python propagate as RunError::BuildFailed (making
// that variant reachable) or are swallowed for non-fatal failures (Io /
// Unsupported), falling back to the system python3 in the harness command.
match build_sandbox::prepare_python(spec, &harness.workdir) {
Ok(build_result) => {
// Patch harness command to use venv Python when the venv was built
// or found in cache.
if let Some(cmd0) = harness.command.first_mut() {
if cmd0 == "python3" || cmd0 == "python" {
let venv_python = build_result.venv_path.join("bin").join("python3");
if venv_python.exists() {
*cmd0 = venv_python.to_string_lossy().into_owned();
}
}
}
}
Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => {
return Err(RunError::BuildFailed { stderr, attempts });
}
Err(_) => {
// Io / Unsupported: fall back to system python3 already in command.
}
}
let harness_source = harness.source.clone();
let entry_source = harness.entry_source.clone();

View file

@ -89,9 +89,8 @@ impl From<std::io::Error> for SandboxError {
/// Run a built harness once with a chosen payload.
///
/// Dispatches to the process backend (subprocess with timeout).
/// On Linux the process backend uses unshare namespaces + seccomp.
/// On other platforms it falls back to plain subprocess with timeout.
/// Dispatches to the process backend (subprocess with timeout, env stripping,
/// and memory cap via `setrlimit(RLIMIT_AS)` on Linux).
pub fn run(
harness: &BuiltHarness,
payload: &Payload,
@ -106,10 +105,7 @@ pub fn run(
}
/// Process backend: spawns the harness command in a subprocess with timeout,
/// stdout/stderr capture, and env stripping.
///
/// On Linux, wraps the command with `unshare` for namespace isolation when
/// available. On other platforms, runs the command directly.
/// stdout/stderr capture, env stripping, and memory cap (Linux: RLIMIT_AS).
fn run_process(
harness: &BuiltHarness,
payload: &Payload,
@ -152,6 +148,21 @@ fn run_process(
cmd.env("NYX_PAYLOAD", std::ffi::OsStr::from_bytes(payload.bytes));
}
// Enforce memory cap before exec on Linux via RLIMIT_AS.
// RLIMIT_AS limits total virtual address space. Python uses significantly
// more virtual AS than RSS (shared libs, mmap arenas), so the enforced
// limit is memory_mib * 8 with a floor of 4 GiB. This prevents multi-GiB
// memory bombs while leaving normal Python workloads headroom.
#[cfg(target_os = "linux")]
{
use std::os::unix::process::CommandExt;
let memory_mib = opts.memory_mib;
// Safety: called in the child after fork but before exec; no allocator use.
unsafe {
cmd.pre_exec(move || rlimit_as_linux(memory_mib));
}
}
let start = Instant::now();
let mut child = cmd.spawn().map_err(SandboxError::Spawn)?;
@ -261,6 +272,36 @@ fn base64_encode(data: &[u8]) -> String {
out
}
/// Set RLIMIT_AS (virtual address space) in a `pre_exec` context on Linux.
///
/// `memory_mib` is the configured cap; we enforce `max(memory_mib * 8, 4096)`
/// MiB of virtual AS to give Python's mmap-heavy runtime adequate headroom
/// while still capping runaway memory bombs.
///
/// RLIMIT_AS = 9 on x86_64, aarch64, arm, ppc64, s390x, and all other major
/// Linux architectures (kernel source: include/uapi/asm-generic/resource.h).
#[cfg(target_os = "linux")]
fn rlimit_as_linux(memory_mib: u64) -> std::io::Result<()> {
#[repr(C)]
struct Rlimit {
cur: u64,
max: u64,
}
unsafe extern "C" {
fn setrlimit(resource: i32, rlim: *const Rlimit) -> i32;
}
const RLIMIT_AS: i32 = 9;
let cap_mib = memory_mib.saturating_mul(8).max(4096);
let bytes = cap_mib.saturating_mul(1024 * 1024);
let rl = Rlimit { cur: bytes, max: bytes };
let ret = unsafe { setrlimit(RLIMIT_AS, &rl) };
if ret == 0 {
Ok(())
} else {
Err(std::io::Error::last_os_error())
}
}
#[cfg(unix)]
fn libc_kill(pid: i32, sig: i32) -> i32 {
unsafe extern "C" {

View file

@ -56,6 +56,35 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
}
};
// Scan the entry file's directory for sensitive files (§17.3 mount filter).
// If the entry file itself matches a sensitive pattern, refuse to run it:
// the harness would copy it into the workdir and expose secrets.
{
let entry_path = Path::new(&spec.entry_file);
let scan_dir = entry_path
.parent()
.filter(|p| !p.as_os_str().is_empty())
.unwrap_or(Path::new("."));
let notes = crate::dynamic::mount_filter::scan_sensitive_files(scan_dir);
for note in &notes {
let note_abs = scan_dir.join(&note.path);
if entry_path == note_abs {
return VerifyResult {
finding_id,
status: VerifyStatus::Unsupported,
triggered_payload: None,
reason: Some(UnsupportedReason::RequiredFileRedactedForSecrets(
note.path.clone(),
)),
inconclusive_reason: None,
detail: None,
attempts: vec![],
toolchain_match: None,
};
}
}
}
// Resolve toolchain information.
let toolchain_res = toolchain::resolve_python(Path::new("."));
let toolchain_match = if toolchain_res.toolchain_drift { "drift" } else { "exact" };
@ -64,6 +93,13 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
let result = run_spec(&spec, &opts.sandbox);
let elapsed = start.elapsed();
// Extract build_attempts before result is consumed by build_verdict.
let build_attempts = match &result {
Ok(run) => run.build_attempts,
Err(RunError::BuildFailed { attempts, .. }) => *attempts,
_ => 1,
};
let verdict = build_verdict(
&finding_id,
&spec,
@ -80,7 +116,7 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
verdict.inconclusive_reason,
toolchain_match,
elapsed,
1, // build_attempts tracked in RunOutcome but not exposed here for simplicity
build_attempts,
);
telemetry::emit(&event);

View file

@ -23,8 +23,13 @@ mod python_fixture_tests {
use nyx_scanner::labels::Cap;
use nyx_scanner::patterns::{FindingCategory, Severity};
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use tempfile::TempDir;
// Serialize all fixture tests to prevent races on process-global state
// (NYX_REPRO_BASE and NYX_TELEMETRY_PATH env vars).
static FIXTURE_LOCK: Mutex<()> = Mutex::new(());
/// Returns `true` if `python3` is available.
fn python3_available() -> bool {
std::process::Command::new("python3")
@ -41,7 +46,14 @@ mod python_fixture_tests {
}
/// Run a fixture and return the verdict.
///
/// Acquires `FIXTURE_LOCK` for the full duration to prevent races on the
/// process-global NYX_REPRO_BASE / NYX_TELEMETRY_PATH env vars.
/// `set_current_dir` is NOT used here: `harness::copy_entry_file` resolves
/// the entry file via its absolute path, so CWD is irrelevant.
fn run_fixture(fixture: &str, func: &str, cap: Cap, sink_line: u32) -> nyx_scanner::evidence::VerifyResult {
let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let path = fixture_path(fixture);
// Copy fixture to a temp dir so the harness can import it.
let tmp = TempDir::new().unwrap();
@ -54,20 +66,12 @@ mod python_fixture_tests {
std::env::set_var("NYX_TELEMETRY_PATH", tmp.path().join("events.jsonl").to_str().unwrap());
}
// Use the temp dir copy as the fixture path.
// Use the temp dir copy as the fixture path (absolute — no CWD change needed).
let diag = make_diag(&dst, func, cap, sink_line);
// Change CWD to the temp dir so the harness can find the module.
let original_dir = std::env::current_dir().ok();
let _ = std::env::set_current_dir(tmp.path());
let opts = VerifyOptions::default();
let result = verify_finding(&diag, &opts);
if let Some(dir) = original_dir {
let _ = std::env::set_current_dir(dir);
}
unsafe {
std::env::remove_var("NYX_REPRO_BASE");
std::env::remove_var("NYX_TELEMETRY_PATH");
@ -373,6 +377,8 @@ mod python_fixture_tests {
return;
}
let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let tmp = TempDir::new().unwrap();
let telemetry_path = tmp.path().join("events.jsonl");
unsafe {
@ -385,17 +391,11 @@ mod python_fixture_tests {
let tmp_fix = tmp.path().join("sqli_positive.py");
let _ = std::fs::copy(&fixture, &tmp_fix);
let original_dir = std::env::current_dir().ok();
let _ = std::env::set_current_dir(tmp.path());
// No set_current_dir: entry file is absolute, copy_entry_file resolves it directly.
let diag = make_diag(&tmp_fix, "login", Cap::SQL_QUERY, 17);
let opts = VerifyOptions::default();
let _ = verify_finding(&diag, &opts);
if let Some(dir) = original_dir {
let _ = std::env::set_current_dir(dir);
}
// Check telemetry doesn't contain any secret patterns.
if telemetry_path.exists() {
let content = std::fs::read_to_string(&telemetry_path).unwrap_or_default();
@ -412,6 +412,34 @@ mod python_fixture_tests {
}
}
// ── Mount-filter gate ─────────────────────────────────────────────────────
/// If the entry file itself matches a sensitive-file pattern (e.g. `id_rsa*`),
/// verify_finding must return Unsupported(RequiredFileRedactedForSecrets).
/// No Python3 needed — the check fires before harness execution.
#[test]
fn sensitive_entry_file_is_unsupported() {
let tmp = TempDir::new().unwrap();
// "id_rsa.py" matches the id_rsa* sensitive pattern in mount_filter.
let entry = tmp.path().join("id_rsa.py");
std::fs::write(&entry, "def run(x): pass\n").unwrap();
let diag = make_diag(&entry, "run", Cap::SQL_QUERY, 2);
let opts = VerifyOptions::default();
let result = verify_finding(&diag, &opts);
assert_eq!(
result.status,
VerifyStatus::Unsupported,
"sensitive entry file must be Unsupported; got {:?}",
result.status
);
match &result.reason {
Some(UnsupportedReason::RequiredFileRedactedForSecrets(_)) => {}
other => panic!("expected RequiredFileRedactedForSecrets, got {other:?}"),
}
}
fn make_diag(path: &Path, func: &str, cap: Cap, sink_line: u32) -> Diag {
let path_str = path.to_string_lossy().into_owned();
let evidence = Evidence {