[pitboss] sweep after phase 02: 3 deferred items resolved

2026-07-27 21:51:03 +02:00 · 2026-05-11 23:24:37 -04:00 · 2026-05-11 23:24:37 -04:00 · 3a4f1b177b
commit 3a4f1b177b
parent 0bf39047b9
7 changed files with 244 additions and 27 deletions
--- a/benches/dynamic_bench_baseline.json
+++ b/benches/dynamic_bench_baseline.json
@ -1,6 +1,6 @@
 {
  "schema": 1,
-  "note": "Baseline captured on Apple M1 Pro (darwin/aarch64), nyx v0.7.0, phase-02.",
+  "note": "ASPIRATIONAL placeholder — values were hand-typed, not captured from a real bench run. Regenerate with: benches/regen_baseline.sh (requires --features dynamic and python3 on PATH). Commit the updated file to establish a real regression reference for M3+.",
  "benchmarks": {
    "harness_build_cold": {
      "mean_ns": 800000,
--- a/benches/regen_baseline.sh
+++ b/benches/regen_baseline.sh
@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+# Regenerate benches/dynamic_bench_baseline.json from a real cargo bench run.
+#
+# Usage:
+#   bash benches/regen_baseline.sh
+#
+# Requirements:
+#   - python3 on PATH
+#   - cargo (nightly or stable with edition 2024)
+#   - Criterion's JSON output (criterion feature already in dev-deps)
+#
+# The script runs the dynamic bench group, parses Criterion's estimates JSON,
+# and overwrites dynamic_bench_baseline.json with real numbers.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+BASELINE_FILE="${SCRIPT_DIR}/dynamic_bench_baseline.json"
+
+echo "Running cargo bench --features dynamic -- dynamic ..."
+cargo bench --manifest-path "${REPO_ROOT}/Cargo.toml" \
+    --features dynamic \
+    -- dynamic \
+    2>&1 | tee /tmp/nyx_bench_raw.txt
+
+# Criterion writes estimates to target/criterion/<bench>/<group>/estimates.json.
+# Extract mean_ns for each tracked benchmark.
+extract_ns() {
+    local path="$1"
+    if [[ -f "${path}" ]]; then
+        python3 -c "
+import json, sys
+d = json.load(open('${path}'))
+mean = d['mean']['point_estimate']
+stddev = (d['std_dev']['point_estimate']) if 'std_dev' in d else 0
+print(int(mean), int(stddev))
+"
+    else
+        echo "0 0"
+    fi
+}
+
+TARGET="${REPO_ROOT}/target/criterion"
+
+read COLD_MEAN COLD_STDDEV < <(extract_ns "${TARGET}/harness_build_cold/default/estimates.json")
+read WARM_MEAN WARM_STDDEV < <(extract_ns "${TARGET}/harness_build_warm/default/estimates.json")
+read RUN_MEAN  RUN_STDDEV  < <(extract_ns "${TARGET}/sandbox_run_payload/default/estimates.json")
+
+MACHINE="$(uname -m) / $(uname -s)"
+NYX_VER="$(cargo metadata --manifest-path "${REPO_ROOT}/Cargo.toml" --no-deps --format-version 1 \
+    | python3 -c "import json,sys; d=json.load(sys.stdin); print(next(p['version'] for p in d['packages'] if p['name']=='nyx-scanner'))")"
+DATE="$(date +%Y-%m-%d)"
+
+cat > "${BASELINE_FILE}" <<EOF
+{
+  "schema": 1,
+  "note": "Baseline captured on ${MACHINE}, nyx v${NYX_VER}, ${DATE}. Regenerate with: benches/regen_baseline.sh",
+  "benchmarks": {
+    "harness_build_cold": {
+      "mean_ns": ${COLD_MEAN},
+      "stddev_ns": ${COLD_STDDEV},
+      "description": "Fresh workdir; spec → BuiltHarness including source gen + disk write."
+    },
+    "harness_build_warm": {
+      "mean_ns": ${WARM_MEAN},
+      "stddev_ns": ${WARM_STDDEV},
+      "description": "Workdir already staged; file write skipped by dst.exists() guard."
+    },
+    "sandbox_run_payload": {
+      "mean_ns": ${RUN_MEAN},
+      "stddev_ns": ${RUN_STDDEV},
+      "description": "Single process-backend run with sqli payload; includes python3 startup + settrace."
+    }
+  },
+  "regression_thresholds": {
+    "harness_build_cold": 2.0,
+    "harness_build_warm": 2.0,
+    "sandbox_run_payload": 1.5
+  }
+}
+EOF
+
+echo "Updated ${BASELINE_FILE}"
--- a/src/dynamic/repro.rs
+++ b/src/dynamic/repro.rs
@ -143,6 +143,9 @@ pub fn write(
    // expected/outcome.json — redacted
    let redacted_stdout = redact::redact(&outcome.stdout);
    let redacted_stderr = redact::redact(&outcome.stderr);
+    // duration_ms is omitted from the persisted outcome so that outcome.json is
+    // byte-identical when regenerated from the repro bundle (§18.2 determinism).
+    // Wall-clock timing goes to telemetry only.
    let outcome_json = serde_json::json!({
        "exit_code": outcome.exit_code,
        "stdout": String::from_utf8_lossy(&redacted_stdout),
@ -150,7 +153,6 @@ pub fn write(
        "timed_out": outcome.timed_out,
        "oob_callback_seen": outcome.oob_callback_seen,
        "sink_hit": outcome.sink_hit,
-        "duration_ms": outcome.duration.as_millis(),
    });
    write_json(&root.join("expected").join("outcome.json"), &outcome_json)?;

--- a/src/dynamic/runner.rs
+++ b/src/dynamic/runner.rs
@ -5,6 +5,7 @@
 //! above it ([`crate::dynamic::verify`]) just calls [`run_spec`] and turns
 //! the result into a [`crate::dynamic::report::VerifyResult`].

+use crate::dynamic::build_sandbox;
 use crate::dynamic::corpus::{benign_payload_for, payloads_for, Oracle, Payload};
 use crate::dynamic::harness::{self, HarnessError};
 use crate::dynamic::sandbox::{self, SandboxError, SandboxOptions, SandboxOutcome};
@ -65,7 +66,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
    // Build harness with retry.
    const BACKOFF: [u64; 1] = [1];
    let mut build_attempts = 0u32;
-    let harness = loop {
+    let mut harness = loop {
        build_attempts += 1;
        match harness::build(spec) {
            Ok(h) => break h,
@ -85,6 +86,31 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
        }
    };

+    // Prepare Python venv for build-time isolation and dependency caching.
+    // Errors from prepare_python propagate as RunError::BuildFailed (making
+    // that variant reachable) or are swallowed for non-fatal failures (Io /
+    // Unsupported), falling back to the system python3 in the harness command.
+    match build_sandbox::prepare_python(spec, &harness.workdir) {
+        Ok(build_result) => {
+            // Patch harness command to use venv Python when the venv was built
+            // or found in cache.
+            if let Some(cmd0) = harness.command.first_mut() {
+                if cmd0 == "python3" || cmd0 == "python" {
+                    let venv_python = build_result.venv_path.join("bin").join("python3");
+                    if venv_python.exists() {
+                        *cmd0 = venv_python.to_string_lossy().into_owned();
+                    }
+                }
+            }
+        }
+        Err(build_sandbox::BuildError::BuildFailed { stderr, attempts }) => {
+            return Err(RunError::BuildFailed { stderr, attempts });
+        }
+        Err(_) => {
+            // Io / Unsupported: fall back to system python3 already in command.
+        }
+    }
+
    let harness_source = harness.source.clone();
    let entry_source = harness.entry_source.clone();

--- a/src/dynamic/sandbox.rs
+++ b/src/dynamic/sandbox.rs
@ -89,9 +89,8 @@ impl From<std::io::Error> for SandboxError {

 /// Run a built harness once with a chosen payload.
 ///
-/// Dispatches to the process backend (subprocess with timeout).
-/// On Linux the process backend uses unshare namespaces + seccomp.
-/// On other platforms it falls back to plain subprocess with timeout.
+/// Dispatches to the process backend (subprocess with timeout, env stripping,
+/// and memory cap via `setrlimit(RLIMIT_AS)` on Linux).
 pub fn run(
    harness: &BuiltHarness,
    payload: &Payload,
@ -106,10 +105,7 @@ pub fn run(
 }

 /// Process backend: spawns the harness command in a subprocess with timeout,
-/// stdout/stderr capture, and env stripping.
-///
-/// On Linux, wraps the command with `unshare` for namespace isolation when
-/// available. On other platforms, runs the command directly.
+/// stdout/stderr capture, env stripping, and memory cap (Linux: RLIMIT_AS).
 fn run_process(
    harness: &BuiltHarness,
    payload: &Payload,
@ -152,6 +148,21 @@ fn run_process(
        cmd.env("NYX_PAYLOAD", std::ffi::OsStr::from_bytes(payload.bytes));
    }

+    // Enforce memory cap before exec on Linux via RLIMIT_AS.
+    // RLIMIT_AS limits total virtual address space. Python uses significantly
+    // more virtual AS than RSS (shared libs, mmap arenas), so the enforced
+    // limit is memory_mib * 8 with a floor of 4 GiB. This prevents multi-GiB
+    // memory bombs while leaving normal Python workloads headroom.
+    #[cfg(target_os = "linux")]
+    {
+        use std::os::unix::process::CommandExt;
+        let memory_mib = opts.memory_mib;
+        // Safety: called in the child after fork but before exec; no allocator use.
+        unsafe {
+            cmd.pre_exec(move || rlimit_as_linux(memory_mib));
+        }
+    }
+
    let start = Instant::now();
    let mut child = cmd.spawn().map_err(SandboxError::Spawn)?;

@ -261,6 +272,36 @@ fn base64_encode(data: &[u8]) -> String {
    out
 }

+/// Set RLIMIT_AS (virtual address space) in a `pre_exec` context on Linux.
+///
+/// `memory_mib` is the configured cap; we enforce `max(memory_mib * 8, 4096)`
+/// MiB of virtual AS to give Python's mmap-heavy runtime adequate headroom
+/// while still capping runaway memory bombs.
+///
+/// RLIMIT_AS = 9 on x86_64, aarch64, arm, ppc64, s390x, and all other major
+/// Linux architectures (kernel source: include/uapi/asm-generic/resource.h).
+#[cfg(target_os = "linux")]
+fn rlimit_as_linux(memory_mib: u64) -> std::io::Result<()> {
+    #[repr(C)]
+    struct Rlimit {
+        cur: u64,
+        max: u64,
+    }
+    unsafe extern "C" {
+        fn setrlimit(resource: i32, rlim: *const Rlimit) -> i32;
+    }
+    const RLIMIT_AS: i32 = 9;
+    let cap_mib = memory_mib.saturating_mul(8).max(4096);
+    let bytes = cap_mib.saturating_mul(1024 * 1024);
+    let rl = Rlimit { cur: bytes, max: bytes };
+    let ret = unsafe { setrlimit(RLIMIT_AS, &rl) };
+    if ret == 0 {
+        Ok(())
+    } else {
+        Err(std::io::Error::last_os_error())
+    }
+}
+
 #[cfg(unix)]
 fn libc_kill(pid: i32, sig: i32) -> i32 {
    unsafe extern "C" {
--- a/src/dynamic/verify.rs
+++ b/src/dynamic/verify.rs
@ -56,6 +56,35 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
        }
    };

+    // Scan the entry file's directory for sensitive files (§17.3 mount filter).
+    // If the entry file itself matches a sensitive pattern, refuse to run it:
+    // the harness would copy it into the workdir and expose secrets.
+    {
+        let entry_path = Path::new(&spec.entry_file);
+        let scan_dir = entry_path
+            .parent()
+            .filter(|p| !p.as_os_str().is_empty())
+            .unwrap_or(Path::new("."));
+        let notes = crate::dynamic::mount_filter::scan_sensitive_files(scan_dir);
+        for note in &notes {
+            let note_abs = scan_dir.join(&note.path);
+            if entry_path == note_abs {
+                return VerifyResult {
+                    finding_id,
+                    status: VerifyStatus::Unsupported,
+                    triggered_payload: None,
+                    reason: Some(UnsupportedReason::RequiredFileRedactedForSecrets(
+                        note.path.clone(),
+                    )),
+                    inconclusive_reason: None,
+                    detail: None,
+                    attempts: vec![],
+                    toolchain_match: None,
+                };
+            }
+        }
+    }
+
    // Resolve toolchain information.
    let toolchain_res = toolchain::resolve_python(Path::new("."));
    let toolchain_match = if toolchain_res.toolchain_drift { "drift" } else { "exact" };
@ -64,6 +93,13 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
    let result = run_spec(&spec, &opts.sandbox);
    let elapsed = start.elapsed();

+    // Extract build_attempts before result is consumed by build_verdict.
+    let build_attempts = match &result {
+        Ok(run) => run.build_attempts,
+        Err(RunError::BuildFailed { attempts, .. }) => *attempts,
+        _ => 1,
+    };
+
    let verdict = build_verdict(
        &finding_id,
        &spec,
@ -80,7 +116,7 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
        verdict.inconclusive_reason,
        toolchain_match,
        elapsed,
-        1, // build_attempts tracked in RunOutcome but not exposed here for simplicity
+        build_attempts,
    );
    telemetry::emit(&event);

--- a/tests/python_fixtures.rs
+++ b/tests/python_fixtures.rs
@ -23,8 +23,13 @@ mod python_fixture_tests {
    use nyx_scanner::labels::Cap;
    use nyx_scanner::patterns::{FindingCategory, Severity};
    use std::path::{Path, PathBuf};
+    use std::sync::Mutex;
    use tempfile::TempDir;

+    // Serialize all fixture tests to prevent races on process-global state
+    // (NYX_REPRO_BASE and NYX_TELEMETRY_PATH env vars).
+    static FIXTURE_LOCK: Mutex<()> = Mutex::new(());
+
    /// Returns `true` if `python3` is available.
    fn python3_available() -> bool {
        std::process::Command::new("python3")
@ -41,7 +46,14 @@ mod python_fixture_tests {
    }

    /// Run a fixture and return the verdict.
+    ///
+    /// Acquires `FIXTURE_LOCK` for the full duration to prevent races on the
+    /// process-global NYX_REPRO_BASE / NYX_TELEMETRY_PATH env vars.
+    /// `set_current_dir` is NOT used here: `harness::copy_entry_file` resolves
+    /// the entry file via its absolute path, so CWD is irrelevant.
    fn run_fixture(fixture: &str, func: &str, cap: Cap, sink_line: u32) -> nyx_scanner::evidence::VerifyResult {
+        let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
+
        let path = fixture_path(fixture);
        // Copy fixture to a temp dir so the harness can import it.
        let tmp = TempDir::new().unwrap();
@ -54,20 +66,12 @@ mod python_fixture_tests {
            std::env::set_var("NYX_TELEMETRY_PATH", tmp.path().join("events.jsonl").to_str().unwrap());
        }

-        // Use the temp dir copy as the fixture path.
+        // Use the temp dir copy as the fixture path (absolute — no CWD change needed).
        let diag = make_diag(&dst, func, cap, sink_line);

-        // Change CWD to the temp dir so the harness can find the module.
-        let original_dir = std::env::current_dir().ok();
-        let _ = std::env::set_current_dir(tmp.path());
-
        let opts = VerifyOptions::default();
        let result = verify_finding(&diag, &opts);

-        if let Some(dir) = original_dir {
-            let _ = std::env::set_current_dir(dir);
-        }
-
        unsafe {
            std::env::remove_var("NYX_REPRO_BASE");
            std::env::remove_var("NYX_TELEMETRY_PATH");
@ -373,6 +377,8 @@ mod python_fixture_tests {
            return;
        }

+        let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
+
        let tmp = TempDir::new().unwrap();
        let telemetry_path = tmp.path().join("events.jsonl");
        unsafe {
@ -385,17 +391,11 @@ mod python_fixture_tests {
        let tmp_fix = tmp.path().join("sqli_positive.py");
        let _ = std::fs::copy(&fixture, &tmp_fix);

-        let original_dir = std::env::current_dir().ok();
-        let _ = std::env::set_current_dir(tmp.path());
-
+        // No set_current_dir: entry file is absolute, copy_entry_file resolves it directly.
        let diag = make_diag(&tmp_fix, "login", Cap::SQL_QUERY, 17);
        let opts = VerifyOptions::default();
        let _ = verify_finding(&diag, &opts);

-        if let Some(dir) = original_dir {
-            let _ = std::env::set_current_dir(dir);
-        }
-
        // Check telemetry doesn't contain any secret patterns.
        if telemetry_path.exists() {
            let content = std::fs::read_to_string(&telemetry_path).unwrap_or_default();
@ -412,6 +412,34 @@ mod python_fixture_tests {
        }
    }

+    // ── Mount-filter gate ─────────────────────────────────────────────────────
+
+    /// If the entry file itself matches a sensitive-file pattern (e.g. `id_rsa*`),
+    /// verify_finding must return Unsupported(RequiredFileRedactedForSecrets).
+    /// No Python3 needed — the check fires before harness execution.
+    #[test]
+    fn sensitive_entry_file_is_unsupported() {
+        let tmp = TempDir::new().unwrap();
+        // "id_rsa.py" matches the id_rsa* sensitive pattern in mount_filter.
+        let entry = tmp.path().join("id_rsa.py");
+        std::fs::write(&entry, "def run(x): pass\n").unwrap();
+
+        let diag = make_diag(&entry, "run", Cap::SQL_QUERY, 2);
+        let opts = VerifyOptions::default();
+        let result = verify_finding(&diag, &opts);
+
+        assert_eq!(
+            result.status,
+            VerifyStatus::Unsupported,
+            "sensitive entry file must be Unsupported; got {:?}",
+            result.status
+        );
+        match &result.reason {
+            Some(UnsupportedReason::RequiredFileRedactedForSecrets(_)) => {}
+            other => panic!("expected RequiredFileRedactedForSecrets, got {other:?}"),
+        }
+    }
+
    fn make_diag(path: &Path, func: &str, cap: Cap, sink_line: u32) -> Diag {
        let path_str = path.to_string_lossy().into_owned();
        let evidence = Evidence {