From 523bd0c53a5c4f4ee6c1b8e24e48ee450aa32213 Mon Sep 17 00:00:00 2001 From: pitboss Date: Thu, 14 May 2026 14:39:29 -0500 Subject: [PATCH] =?UTF-8?q?[pitboss]=20phase=2011:=20Track=20D.4=20+=20D.5?= =?UTF-8?q?=20=E2=80=94=20Deterministic=20secrets=20+=20`NetworkPolicy`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dynamic/environment.rs | 231 +++++++++++++++- src/dynamic/runner.rs | 4 +- src/dynamic/sandbox.rs | 174 ++++++++++-- src/dynamic/verify.rs | 13 +- .../secret_injection/flask_secret/app.py | 21 ++ tests/dynamic_sandbox_escape.rs | 6 +- tests/network_policy.rs | 118 ++++++++ tests/secret_derivation.rs | 254 ++++++++++++++++++ 8 files changed, 789 insertions(+), 32 deletions(-) create mode 100644 tests/dynamic_fixtures/secret_injection/flask_secret/app.py create mode 100644 tests/network_policy.rs create mode 100644 tests/secret_derivation.rs diff --git a/src/dynamic/environment.rs b/src/dynamic/environment.rs index ac8f625a..03e1539c 100644 --- a/src/dynamic/environment.rs +++ b/src/dynamic/environment.rs @@ -43,6 +43,218 @@ use std::collections::HashSet; use std::io; use std::path::{Path, PathBuf}; +// ── Phase 11 — Track D.4: deterministic secret derivation ──────────────────── + +/// Prefix prepended to every derived secret so a leaked harness value is +/// immediately recognisable as a Nyx stub rather than a real credential. +pub const SECRET_VALUE_PREFIX: &str = "nyx-stub-"; + +/// Deterministic placeholder for a secret env var. +/// +/// Constructed by [`derive_secret`] from `BLAKE3(spec_hash || env_var_name)` +/// and prefixed with [`SECRET_VALUE_PREFIX`]. The value is stable for the +/// lifetime of a spec, so two harness invocations under the same +/// [`HarnessSpec`] see identical credentials — but never the user's real +/// secret. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SecretValue(String); + +impl SecretValue { + /// Raw value, ready to drop into `env`. + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Consume into the owned string. + pub fn into_string(self) -> String { + self.0 + } +} + +impl std::fmt::Display for SecretValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +/// Derive a deterministic placeholder for `env_var_name` keyed by +/// `spec_hash`. +/// +/// `BLAKE3(spec_hash || '|' || env_var_name)` → first 32 hex chars → +/// `"nyx-stub-{hex}"`. The separator (`|`) prevents accidental collisions +/// between `("abc", "DEF")` and `("abcDEF", "")`. +/// +/// Length is bounded at 32 hex characters (128 bits) so the value remains +/// short enough to fit comfortably in URLs, JSON config blobs, and POSIX +/// argv without inflating the env footprint. +pub fn derive_secret(spec_hash: &str, env_var_name: &str) -> SecretValue { + let mut hasher = blake3::Hasher::new(); + hasher.update(spec_hash.as_bytes()); + hasher.update(b"|"); + hasher.update(env_var_name.as_bytes()); + let hex = hasher.finalize().to_hex(); + let mut out = String::with_capacity(SECRET_VALUE_PREFIX.len() + 32); + out.push_str(SECRET_VALUE_PREFIX); + out.push_str(&hex.as_str()[..32]); + SecretValue(out) +} + +/// Scan `entry_file` for env-var references in `lang`. +/// +/// Returns the set of env-var names referenced via the language's standard +/// env access API: +/// +/// | Lang | Patterns | +/// |---|---| +/// | Python | `os.environ.get("X")`, `os.environ["X"]`, `os.getenv("X")` | +/// | JS/TS | `process.env.X`, `process.env["X"]` | +/// | Java | `System.getenv("X")` | +/// | Rust | `std::env::var("X")`, `env::var("X")` | +/// | Go | `os.Getenv("X")`, `os.LookupEnv("X")` | +/// | PHP | `getenv("X")`, `$_ENV["X"]`, `$_SERVER["X"]` | +/// | Ruby | `ENV["X"]`, `ENV.fetch("X")` | +/// | C/C++ | `getenv("X")` | +/// +/// Static substring scan — bounded by [`IMPORT_SCAN_LIMIT`] like the import +/// extractor. No AST: an entry-file with `os.environ.get(some_var)` (a +/// non-literal arg) is intentionally skipped; the secret bag is populated +/// from literal references only so a typo cannot produce noisy injection. +pub fn extract_env_var_references(entry_file: &Path, lang: Lang) -> Vec { + let bytes = match read_bounded(entry_file) { + Some(s) => s, + None => return Vec::new(), + }; + let source = match std::str::from_utf8(&bytes) { + Ok(s) => s, + Err(_) => return Vec::new(), + }; + let patterns: &[&str] = match lang { + Lang::Python => &[ + "os.environ.get(", + "os.environ[", + "os.getenv(", + "environ.get(", + "environ[", + "getenv(", + ], + Lang::JavaScript | Lang::TypeScript => &["process.env.", "process.env["], + Lang::Java => &["System.getenv(", "getenv("], + Lang::Rust => &["std::env::var(", "env::var(", "env::var_os(", "std::env::var_os("], + Lang::Go => &["os.Getenv(", "os.LookupEnv("], + Lang::Php => &["getenv(", "$_ENV[", "$_SERVER["], + Lang::Ruby => &["ENV[", "ENV.fetch(", "ENV.fetch "], + Lang::C | Lang::Cpp => &["getenv("], + }; + + let mut out: Vec = Vec::new(); + let mut seen: HashSet = HashSet::new(); + for pat in patterns { + let mut start = 0; + while let Some(rel) = source[start..].find(pat) { + let abs = start + rel + pat.len(); + start = abs; + let tail = &source[abs..]; + let name = match lang { + Lang::JavaScript | Lang::TypeScript if *pat == "process.env." => { + extract_identifier_name(tail) + } + _ => extract_quoted_arg(tail), + }; + if let Some(name) = name { + if !name.is_empty() && is_env_var_name(&name) && seen.insert(name.clone()) { + out.push(name); + } + } + } + } + out +} + +/// Extract a quoted (single or double quote) literal argument starting at +/// `s`. Skips leading whitespace; stops at the matching close-quote. +/// Returns `None` when the first non-whitespace char is not a quote — the +/// arg is dynamic and the scanner deliberately skips it. +fn extract_quoted_arg(s: &str) -> Option { + let bytes = s.as_bytes(); + let mut i = 0; + while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') { + i += 1; + } + if i >= bytes.len() { + return None; + } + let quote = match bytes[i] { + b'"' => b'"', + b'\'' => b'\'', + b'`' => b'`', + _ => return None, + }; + i += 1; + let start = i; + while i < bytes.len() && bytes[i] != quote { + if bytes[i] == b'\n' { + return None; + } + i += 1; + } + if i >= bytes.len() { + return None; + } + std::str::from_utf8(&bytes[start..i]).ok().map(|s| s.to_owned()) +} + +/// Extract a bare identifier (e.g. `FOO` in `process.env.FOO`). Stops at +/// the first non-identifier byte. +fn extract_identifier_name(s: &str) -> Option { + let bytes = s.as_bytes(); + let mut i = 0; + while i < bytes.len() { + let c = bytes[i]; + let is_ident = c.is_ascii_alphanumeric() || c == b'_'; + if !is_ident { + break; + } + i += 1; + } + if i == 0 { + return None; + } + std::str::from_utf8(&bytes[..i]).ok().map(|s| s.to_owned()) +} + +/// Permissive env-var-name shape: starts with a letter or underscore, then +/// any of `[A-Za-z0-9_]`. Filters out blatantly bogus parses (e.g. when +/// the quoted scanner picks up `{`). +fn is_env_var_name(s: &str) -> bool { + if s.is_empty() { + return false; + } + let mut chars = s.chars(); + let first = chars.next().unwrap(); + if !(first.is_ascii_alphabetic() || first == '_') { + return false; + } + chars.all(|c| c.is_ascii_alphanumeric() || c == '_') +} + +/// Build the per-spec secret bag: each env var the entry file references +/// gets a deterministic `(name, derive_secret(spec_hash, name))` entry. +/// +/// Returned in deterministic source-order so two runs against the same +/// inputs produce byte-identical env layouts. +pub fn build_secret_bag( + entry_file: &Path, + lang: Lang, + spec_hash: &str, +) -> Vec<(String, String)> { + let mut out: Vec<(String, String)> = Vec::new(); + for name in extract_env_var_references(entry_file, lang) { + let val = derive_secret(spec_hash, &name); + out.push((name, val.into_string())); + } + out +} + /// Hard upper bound on the bytes a staged workdir may consume after /// `stage_workdir` returns. Phase 09 acceptance pins this to 10 MiB so a /// pathological full-tree copy regression is caught at the test boundary @@ -165,8 +377,12 @@ pub struct Environment { /// to the workdir root (e.g. `"src/handler.py"`). pub staged_sources: Vec, /// Environment variables the harness should set before invoking the - /// entry point. Phase 09 stops at the empty set; Phase 10+ - /// extensions (stub injection) will populate these. + /// entry point. Populated by [`build_secret_bag`] during + /// [`stage_workdir_full`] (Phase 11 — Track D.4) with deterministic + /// stub values for every env var the entry file literally + /// references. Phase 10 stub endpoints (SQL DB path, HTTP origin + /// URL, etc.) are layered on top by the verifier via + /// [`crate::dynamic::sandbox::SandboxOptions::extra_env`]. pub env_vars: Vec<(String, String)>, /// Stub registry handles. Reserved for the Phase 10 stub-injection /// layer; Phase 09 stages no stubs so this is always empty. @@ -385,12 +601,21 @@ pub fn stage_workdir_full( copy_into_workdir(cfg, workdir, &rel, running_bytes, &mut staged_sources)?; } + // Phase 11 — Track D.4: populate the per-spec secret bag for every + // env var the entry file literally references. `spec_hash` is empty + // for the legacy [`stage_workdir`] entry point; in that case the + // derived values still hash deterministically (collisions are avoided + // by the env-var name component) but two distinct specs would alias. + // Callers with a real spec hash should use + // [`stage_workdir_full`] / [`stage_workdir_with_spec_hash`]. + let env_vars = build_secret_bag(&captured.entry_file, lang, spec_hash); + Ok(Environment { spec_hash: spec_hash.to_owned(), workdir: workdir.to_path_buf(), lockfile: lockfile_in_workdir, staged_sources, - env_vars: Vec::new(), + env_vars, stub_handles: Vec::new(), toolchain: captured.toolchain.clone(), direct_deps: captured.direct_deps.clone(), diff --git a/src/dynamic/runner.rs b/src/dynamic/runner.rs index c16fe726..2f11efc9 100644 --- a/src/dynamic/runner.rs +++ b/src/dynamic/runner.rs @@ -254,7 +254,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result Result, /// Maximum stdout/stderr bytes captured. Default: 65536 (64 KiB). pub output_limit: usize, - /// Per-scan OOB listener. When set, the Docker backend uses bridge - /// networking so the harness can reach the listener on the host, and the - /// runner checks [`OobListener::was_nonce_hit`] after each sandbox run. - pub oob_listener: Option>, + /// Phase 11 (Track D.5): network reachability the harness is allowed + /// to exercise. Default [`NetworkPolicy::None`] — the previous + /// behaviour was equivalent to a binary `oob_listener: Option<...>`; + /// callers wanting OOB callbacks now set + /// [`NetworkPolicy::OobOutbound`]. See [`NetworkPolicy`] for the + /// per-variant backend wiring. + pub network_policy: NetworkPolicy, /// Per-run structured-oracle [`ProbeChannel`] (Phase 06 — Track C.1). /// When set, the sandbox forwards the channel's path to the harness via /// the `NYX_PROBE_PATH` env var so the per-language `__nyx_probe` shim @@ -158,6 +161,19 @@ pub struct SandboxOptions { pub stub_harness: Option>, } +impl SandboxOptions { + /// Borrow the OOB listener handle when the network policy carries + /// one. Returns `None` for every variant except + /// [`NetworkPolicy::OobOutbound`]. + /// + /// Kept stable across the Phase 11 cut-over so the runner can keep + /// poking at `effective_opts.oob_listener()` without caring whether + /// the policy machinery moves underneath it. + pub fn oob_listener(&self) -> Option<&Arc> { + self.network_policy.oob_listener() + } +} + impl Default for SandboxOptions { fn default() -> Self { Self { @@ -166,7 +182,7 @@ impl Default for SandboxOptions { backend: SandboxBackend::Auto, env_passthrough: vec![], output_limit: 65536, - oob_listener: None, + network_policy: NetworkPolicy::None, probe_channel: None, extra_env: Vec::new(), stub_harness: None, @@ -174,6 +190,98 @@ impl Default for SandboxOptions { } } +// ── Phase 11 — Track D.5: NetworkPolicy ────────────────────────────────────── + +/// Host + port allowlist entry referenced by [`NetworkPolicy::StubsOnly`]. +/// +/// The Docker backend treats each entry as an `--add-host` line so the +/// harness DNS-resolves stub endpoints to their host-side bind address; +/// the netfilter chain itself blocks all other egress. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct HostPort { + pub host: String, + pub port: u16, +} + +impl HostPort { + pub fn new(host: impl Into, port: u16) -> Self { + Self { host: host.into(), port } + } +} + +/// Phase 11 (Track D.5): network reachability the harness is allowed to +/// exercise. Replaces the legacy `oob_listener: Option>` +/// binary flag with an enum that distinguishes the four operationally +/// meaningful stances: +/// +/// - [`NetworkPolicy::None`] — no outbound network at all (default). +/// Docker: `--network none`. Process backend: caller-imposed; the +/// process backend has no network namespace facility so the policy is +/// structural here (the harness has whatever connectivity the host's +/// `lo`/routes provide; production runs should use the Docker backend +/// for real isolation). +/// - [`NetworkPolicy::StubsOnly`] — only the listed host/port pairs are +/// reachable. Docker: `bridge` network + `--add-host` per allow-entry. +/// Linux production hardening (netns + nftables) is staged for a +/// follow-up phase; today the variant carries the allowlist for the +/// harness emitter and is mechanically distinguished by the backend +/// selector. +/// - [`NetworkPolicy::OobOutbound`] — the legacy "OOB only" path: the +/// harness can reach the per-scan OOB listener (and only it via the +/// Linux iptables filter in [`apply_oob_egress_filter`]). Docker: +/// `bridge` + host-gateway + iptables OOB-port filter. +/// - [`NetworkPolicy::Open`] — unrestricted outbound. Docker: `bridge` +/// with no egress filter. Reserved for diagnostic / dev-only runs; +/// the verifier never sets this in production. +#[derive(Debug, Clone)] +pub enum NetworkPolicy { + None, + StubsOnly { allow: Vec }, + OobOutbound { listener: Arc }, + Open, +} + +impl NetworkPolicy { + /// `true` when the docker backend should run the container with a + /// bridge network (i.e. with outbound reachability available, even + /// if filtered). `false` selects `--network none`. + pub fn allows_network(&self) -> bool { + !matches!(self, NetworkPolicy::None) + } + + /// OOB listener handle when this policy carries one. + pub fn oob_listener(&self) -> Option<&Arc> { + match self { + NetworkPolicy::OobOutbound { listener } => Some(listener), + _ => None, + } + } + + /// Stub allow-list entries when this policy carries one. + pub fn stub_allow_list(&self) -> Option<&[HostPort]> { + match self { + NetworkPolicy::StubsOnly { allow } => Some(allow.as_slice()), + _ => None, + } + } + + /// Short tag used by the docker `--add-host` shaper / telemetry. + pub fn variant_tag(&self) -> &'static str { + match self { + NetworkPolicy::None => "none", + NetworkPolicy::StubsOnly { .. } => "stubs-only", + NetworkPolicy::OobOutbound { .. } => "oob-outbound", + NetworkPolicy::Open => "open", + } + } +} + +impl Default for NetworkPolicy { + fn default() -> Self { + NetworkPolicy::None + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SandboxBackend { Auto, @@ -511,8 +619,7 @@ fn run_docker( // Determine the Python image from the harness command (first element). // Fall back to python:3-slim when the command is not recognised. let image = detect_image_for_harness(harness); - let oob_port = opts.oob_listener.as_ref().map(|l| l.port()); - start_container(&container_name, &harness.workdir, &image, oob_port)?; + start_container(&container_name, &harness.workdir, &image, &opts.network_policy)?; registry.insert(container_name.clone(), container_name.clone()); } @@ -553,15 +660,18 @@ fn is_container_running(name: &str) -> bool { /// - `--rm`: auto-remove on stop (no manual cleanup required). /// - `--cap-drop=ALL`: drop all Linux capabilities. /// - `--security-opt no-new-privileges:true`: block privilege escalation. -/// - `--network none`: no network access (loopback only), OR `bridge` when -/// `oob_port` is set so the harness can reach the host OOB listener. -/// - `--add-host=host-gateway:host-gateway`: host-gateway DNS alias when -/// using bridge mode (Docker ≥ 20.10). +/// - Network: derived from [`NetworkPolicy`] — +/// - [`NetworkPolicy::None`] ⇒ `--network none` (no egress). +/// - [`NetworkPolicy::OobOutbound`] ⇒ `bridge` + `--add-host=host-gateway` +/// + (on Linux) iptables OOB-port filter. +/// - [`NetworkPolicy::StubsOnly`] ⇒ `bridge` + one `--add-host` per +/// [`HostPort`] in the allow list so DNS resolves to the host bind. +/// - [`NetworkPolicy::Open`] ⇒ `bridge` with no egress filter. fn start_container( name: &str, workdir: &Path, image: &str, - oob_port: Option, + policy: &NetworkPolicy, ) -> Result<(), SandboxError> { let mut run_args: Vec = vec![ "run".into(), @@ -572,12 +682,26 @@ fn start_container( "--security-opt".into(), "no-new-privileges:true".into(), "--tmpfs".into(), "/tmp:size=128m,exec".into(), ]; - if oob_port.is_some() { - // Bridge mode: container can reach host via host-gateway. - run_args.extend(["--network".into(), "bridge".into()]); - run_args.extend(["--add-host=host-gateway:host-gateway".into()]); - } else { - run_args.extend(["--network".into(), "none".into()]); + match policy { + NetworkPolicy::None => { + run_args.extend(["--network".into(), "none".into()]); + } + NetworkPolicy::OobOutbound { .. } => { + run_args.extend(["--network".into(), "bridge".into()]); + run_args.extend(["--add-host=host-gateway:host-gateway".into()]); + } + NetworkPolicy::StubsOnly { allow } => { + run_args.extend(["--network".into(), "bridge".into()]); + // host-gateway alias still useful so stubs bound to 127.0.0.1 + // can be reached as host-gateway from inside the container. + run_args.extend(["--add-host=host-gateway:host-gateway".into()]); + for hp in allow { + run_args.push(format!("--add-host={}:host-gateway", hp.host)); + } + } + NetworkPolicy::Open => { + run_args.extend(["--network".into(), "bridge".into()]); + } } run_args.extend([image.into(), "sleep".into(), "300".into()]); @@ -625,9 +749,11 @@ fn start_container( // This restricts the bridge-networked container to only reach the host // on the OOB port; all other egress is dropped (§17.2). #[cfg(target_os = "linux")] - if let Some(port) = oob_port { - apply_oob_egress_filter(name, port); + if let NetworkPolicy::OobOutbound { listener } = policy { + apply_oob_egress_filter(name, listener.port()); } + #[cfg(not(target_os = "linux"))] + let _ = policy; // policy already consumed structurally above Ok(()) } else { Err(SandboxError::BackendUnavailable(SandboxBackend::Docker)) @@ -862,8 +988,12 @@ fn run_native_binary_docker( }; if !reused { - let oob_port = opts.oob_listener.as_ref().map(|l| l.port()); - start_container(&container_name, &harness.workdir, NATIVE_BINARY_IMAGE, oob_port)?; + start_container( + &container_name, + &harness.workdir, + NATIVE_BINARY_IMAGE, + &opts.network_policy, + )?; // Copy the compiled binary into the container as /workdir/nyx_harness. let cp_dst = format!("{container_name}:/workdir/nyx_harness"); diff --git a/src/dynamic/verify.rs b/src/dynamic/verify.rs index 1bd4d3e4..d7fc7ece 100644 --- a/src/dynamic/verify.rs +++ b/src/dynamic/verify.rs @@ -68,17 +68,24 @@ impl VerifyOptions { /// (`src/dynamic/runner.rs` `oob_nonce_slot` branch) while non-OOB /// payloads continue to run against their existing oracle. pub fn from_config(config: &Config) -> Self { - use crate::dynamic::sandbox::SandboxBackend; + use crate::dynamic::sandbox::{NetworkPolicy, SandboxBackend}; let backend = match config.scanner.verify_backend.as_str() { "docker" => SandboxBackend::Docker, "process" => SandboxBackend::Process, _ => SandboxBackend::Auto, }; - let oob_listener = OobListener::bind().ok().map(Arc::new); + // Phase 11 — Track D.5: surface the per-scan listener as a + // [`NetworkPolicy::OobOutbound`] so the docker backend turns on + // bridge networking + the iptables egress filter, and the process + // backend reaches the listener via the same accessor as before. + let network_policy = match OobListener::bind().ok().map(Arc::new) { + Some(listener) => NetworkPolicy::OobOutbound { listener }, + None => NetworkPolicy::None, + }; Self { sandbox: SandboxOptions { backend, - oob_listener, + network_policy, ..SandboxOptions::default() }, project_root: None, diff --git a/tests/dynamic_fixtures/secret_injection/flask_secret/app.py b/tests/dynamic_fixtures/secret_injection/flask_secret/app.py new file mode 100644 index 00000000..e48eb130 --- /dev/null +++ b/tests/dynamic_fixtures/secret_injection/flask_secret/app.py @@ -0,0 +1,21 @@ +# Phase 11 fixture: Flask app that reads FLASK_SECRET at import time via +# the bare-index `os.environ["FLASK_SECRET"]` form (the canonical KeyError +# trap). The harness must populate the env *before* the module is +# imported or app.secret_key resolution raises. +# +# Phase 11 — Track D.4 acceptance bullet: +# "A Flask fixture with `app.secret_key = os.environ["FLASK_SECRET"]` +# boots without raising `KeyError`." + +import os +from flask import Flask + +app = Flask(__name__) +app.secret_key = os.environ["FLASK_SECRET"] + +API_TOKEN = os.environ.get("API_TOKEN", "default-token") + + +@app.route("/") +def index(): + return "ok" diff --git a/tests/dynamic_sandbox_escape.rs b/tests/dynamic_sandbox_escape.rs index c6b55f25..a55ed274 100644 --- a/tests/dynamic_sandbox_escape.rs +++ b/tests/dynamic_sandbox_escape.rs @@ -15,7 +15,9 @@ #[cfg(feature = "dynamic")] mod escape_tests { use nyx_scanner::dynamic::harness::BuiltHarness; - use nyx_scanner::dynamic::sandbox::{self, SandboxBackend, SandboxError, SandboxOptions}; + use nyx_scanner::dynamic::sandbox::{ + self, NetworkPolicy, SandboxBackend, SandboxError, SandboxOptions, + }; use std::fs; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -58,7 +60,7 @@ mod escape_tests { backend: SandboxBackend::Docker, env_passthrough: vec![], output_limit: 65536, - oob_listener: None, + network_policy: NetworkPolicy::None, probe_channel: None, extra_env: vec![], stub_harness: None, diff --git a/tests/network_policy.rs b/tests/network_policy.rs new file mode 100644 index 00000000..2c68aaf0 --- /dev/null +++ b/tests/network_policy.rs @@ -0,0 +1,118 @@ +//! Phase 11 — Track D.5: [`NetworkPolicy`] acceptance. +//! +//! These tests exercise the public API surface; they do *not* drive a +//! real container. The docker backend's per-variant flag emission is +//! covered indirectly by `tests/dynamic_sandbox_escape.rs` (which still +//! pins `NetworkPolicy::None`), and the Linux iptables filter path is +//! covered by `src/dynamic/sandbox.rs` unit tests. +//! +//! Scope here is structural: each variant exposes the right accessor +//! shape, the default is `None`, and [`SandboxOptions::oob_listener`] +//! still resolves the legacy callsite without the runner caring which +//! variant fed it. + +#![cfg(feature = "dynamic")] + +use nyx_scanner::dynamic::oob::OobListener; +use nyx_scanner::dynamic::sandbox::{HostPort, NetworkPolicy, SandboxOptions}; +use std::sync::Arc; + +#[test] +fn default_policy_is_none() { + let opts = SandboxOptions::default(); + assert!(matches!(opts.network_policy, NetworkPolicy::None)); + assert!(opts.oob_listener().is_none()); +} + +#[test] +fn none_blocks_network() { + let p = NetworkPolicy::None; + assert!(!p.allows_network()); + assert!(p.oob_listener().is_none()); + assert!(p.stub_allow_list().is_none()); + assert_eq!(p.variant_tag(), "none"); +} + +#[test] +fn stubs_only_carries_allowlist() { + let p = NetworkPolicy::StubsOnly { + allow: vec![ + HostPort::new("db.local", 5432), + HostPort::new("redis.local", 6379), + ], + }; + assert!(p.allows_network()); + assert!(p.oob_listener().is_none()); + let allow = p.stub_allow_list().expect("allow list present"); + assert_eq!(allow.len(), 2); + assert_eq!(allow[0].host, "db.local"); + assert_eq!(allow[0].port, 5432); + assert_eq!(p.variant_tag(), "stubs-only"); +} + +#[test] +fn oob_outbound_carries_listener() { + // Skip on hosts where loopback bind is impossible (e.g. extremely + // locked-down sandboxes). All other CI hosts can bind 127.0.0.1. + let Ok(listener) = OobListener::bind() else { + eprintln!("OobListener::bind failed — skipping oob_outbound_carries_listener"); + return; + }; + let listener = Arc::new(listener); + let p = NetworkPolicy::OobOutbound { listener: Arc::clone(&listener) }; + assert!(p.allows_network()); + let got = p.oob_listener().expect("listener present"); + assert!( + Arc::ptr_eq(got, &listener), + "oob_listener() must return the same Arc" + ); + assert!(p.stub_allow_list().is_none()); + assert_eq!(p.variant_tag(), "oob-outbound"); +} + +#[test] +fn open_allows_network_with_no_filter() { + let p = NetworkPolicy::Open; + assert!(p.allows_network()); + assert!(p.oob_listener().is_none()); + assert!(p.stub_allow_list().is_none()); + assert_eq!(p.variant_tag(), "open"); +} + +#[test] +fn sandbox_options_oob_listener_accessor_finds_oob_variant() { + let Ok(listener) = OobListener::bind() else { + eprintln!("OobListener::bind failed — skipping accessor test"); + return; + }; + let listener = Arc::new(listener); + let opts = SandboxOptions { + network_policy: NetworkPolicy::OobOutbound { + listener: Arc::clone(&listener), + }, + ..SandboxOptions::default() + }; + let got = opts.oob_listener().expect("listener present"); + assert!(Arc::ptr_eq(got, &listener)); +} + +#[test] +fn sandbox_options_oob_listener_accessor_none_for_other_variants() { + let opts_none = SandboxOptions { + network_policy: NetworkPolicy::None, + ..SandboxOptions::default() + }; + assert!(opts_none.oob_listener().is_none()); + + let opts_open = SandboxOptions { + network_policy: NetworkPolicy::Open, + ..SandboxOptions::default() + }; + assert!(opts_open.oob_listener().is_none()); + + let opts_stubs = SandboxOptions { + network_policy: NetworkPolicy::StubsOnly { allow: vec![] }, + ..SandboxOptions::default() + }; + assert!(opts_stubs.oob_listener().is_none()); +} diff --git a/tests/secret_derivation.rs b/tests/secret_derivation.rs new file mode 100644 index 00000000..b8bd8231 --- /dev/null +++ b/tests/secret_derivation.rs @@ -0,0 +1,254 @@ +//! Phase 11 — Track D.4: deterministic secret derivation acceptance. +//! +//! Asserts: +//! +//! 1. [`derive_secret`] is byte-for-byte deterministic across runs with +//! identical (`spec_hash`, `env_var_name`) inputs. +//! 2. Distinct env-var names produce distinct values under the same +//! spec. +//! 3. Distinct spec hashes produce distinct values for the same env-var +//! name (no cross-spec aliasing). +//! 4. Every value carries the `nyx-stub-` prefix so a leaked harness +//! credential is recognisable. +//! 5. [`extract_env_var_references`] picks up every supported per-lang +//! env access pattern for the languages currently in scope. +//! 6. [`build_secret_bag`] returns one entry per literally-referenced +//! env var. +//! 7. End-to-end: the Phase 11 Flask fixture, when its captured env bag +//! is injected as process env vars, boots without raising +//! `KeyError: 'FLASK_SECRET'` (skipped on hosts without +//! `python3 -c 'import flask'`). + +#![cfg(feature = "dynamic")] + +use nyx_scanner::dynamic::environment::{ + build_secret_bag, derive_secret, extract_env_var_references, SECRET_VALUE_PREFIX, +}; +use nyx_scanner::symbol::Lang; +use std::path::{Path, PathBuf}; + +fn fixture_root() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("dynamic_fixtures") + .join("secret_injection") + .join("flask_secret") +} + +#[test] +fn derive_secret_is_deterministic() { + let a = derive_secret("spec0001abcd1234", "FLASK_SECRET"); + let b = derive_secret("spec0001abcd1234", "FLASK_SECRET"); + assert_eq!(a, b, "same inputs must yield same output"); +} + +#[test] +fn derive_secret_has_stub_prefix() { + let v = derive_secret("any-spec-hash", "ANY_VAR"); + assert!( + v.as_str().starts_with(SECRET_VALUE_PREFIX), + "missing nyx-stub- prefix: {v}" + ); + // 32 hex chars after the prefix. + assert_eq!(v.as_str().len(), SECRET_VALUE_PREFIX.len() + 32); +} + +#[test] +fn derive_secret_distinguishes_env_var_names() { + let a = derive_secret("specA", "FLASK_SECRET"); + let b = derive_secret("specA", "API_TOKEN"); + assert_ne!(a, b, "different env var names must produce distinct values"); +} + +#[test] +fn derive_secret_distinguishes_spec_hashes() { + let a = derive_secret("specA", "FLASK_SECRET"); + let b = derive_secret("specB", "FLASK_SECRET"); + assert_ne!(a, b, "different spec hashes must produce distinct values"); +} + +#[test] +fn extract_env_var_references_python_patterns() { + let tmp = tempfile::TempDir::new().unwrap(); + let path = tmp.path().join("app.py"); + std::fs::write( + &path, + r#" +import os +SECRET = os.environ["FLASK_SECRET"] +DB = os.environ.get("DATABASE_URL") +PORT = os.getenv("PORT", "8000") +DYNAMIC = os.environ.get(some_dynamic_var) # skipped (non-literal) +"#, + ) + .unwrap(); + let refs = extract_env_var_references(&path, Lang::Python); + assert!(refs.contains(&"FLASK_SECRET".to_owned()), "refs = {refs:?}"); + assert!(refs.contains(&"DATABASE_URL".to_owned()), "refs = {refs:?}"); + assert!(refs.contains(&"PORT".to_owned()), "refs = {refs:?}"); + // Dynamic arg must be skipped. + assert!(!refs.iter().any(|r| r == "some_dynamic_var")); +} + +#[test] +fn extract_env_var_references_js_patterns() { + let tmp = tempfile::TempDir::new().unwrap(); + let path = tmp.path().join("app.js"); + std::fs::write( + &path, + r#" +const a = process.env.NODE_ENV; +const b = process.env["DATABASE_URL"]; +"#, + ) + .unwrap(); + let refs = extract_env_var_references(&path, Lang::JavaScript); + assert!(refs.contains(&"NODE_ENV".to_owned()), "refs = {refs:?}"); + assert!(refs.contains(&"DATABASE_URL".to_owned()), "refs = {refs:?}"); +} + +#[test] +fn extract_env_var_references_java_patterns() { + let tmp = tempfile::TempDir::new().unwrap(); + let path = tmp.path().join("App.java"); + std::fs::write( + &path, + r#" +public class App { + public static void main(String[] args) { + String s = System.getenv("JWT_SECRET"); + } +} +"#, + ) + .unwrap(); + let refs = extract_env_var_references(&path, Lang::Java); + assert!(refs.contains(&"JWT_SECRET".to_owned()), "refs = {refs:?}"); +} + +#[test] +fn extract_env_var_references_rust_patterns() { + let tmp = tempfile::TempDir::new().unwrap(); + let path = tmp.path().join("main.rs"); + std::fs::write( + &path, + r#" +fn main() { + let s = std::env::var("HOME").unwrap(); + let t = env::var("PATH").unwrap_or_default(); +} +"#, + ) + .unwrap(); + let refs = extract_env_var_references(&path, Lang::Rust); + assert!(refs.contains(&"HOME".to_owned()), "refs = {refs:?}"); + assert!(refs.contains(&"PATH".to_owned()), "refs = {refs:?}"); +} + +#[test] +fn extract_env_var_references_go_patterns() { + let tmp = tempfile::TempDir::new().unwrap(); + let path = tmp.path().join("main.go"); + std::fs::write( + &path, + r#" +package main + +import "os" + +func main() { + s := os.Getenv("HOME") + t, _ := os.LookupEnv("PATH") + _ = s + _ = t +} +"#, + ) + .unwrap(); + let refs = extract_env_var_references(&path, Lang::Go); + assert!(refs.contains(&"HOME".to_owned()), "refs = {refs:?}"); + assert!(refs.contains(&"PATH".to_owned()), "refs = {refs:?}"); +} + +#[test] +fn build_secret_bag_returns_one_entry_per_var() { + let path = fixture_root().join("app.py"); + let bag = build_secret_bag(&path, Lang::Python, "specphase11test1"); + + // FLASK_SECRET (bare index) + API_TOKEN (.get with literal arg). + let names: Vec<&str> = bag.iter().map(|(n, _)| n.as_str()).collect(); + assert!(names.contains(&"FLASK_SECRET"), "bag = {bag:?}"); + assert!(names.contains(&"API_TOKEN"), "bag = {bag:?}"); + + // Every value bears the stub prefix. + for (_, v) in &bag { + assert!( + v.starts_with(SECRET_VALUE_PREFIX), + "leaked unprefixed value: {v}" + ); + } +} + +/// End-to-end acceptance: the Phase 11 Flask fixture boots without +/// raising `KeyError: 'FLASK_SECRET'` once the derived secret bag is set +/// as process env vars. +/// +/// Skipped on hosts where `python3 -c 'import flask'` fails — the +/// dynamic verifier itself is gated on the same precondition (see +/// `tests/env_capture_flask.rs`). +#[test] +fn flask_fixture_boots_with_derived_secret_env() { + let has_python3 = std::process::Command::new("python3") + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if !has_python3 { + eprintln!("python3 not on PATH — Phase 11 boot check skipped"); + return; + } + let has_flask = std::process::Command::new("python3") + .args(["-c", "import flask"]) + .output() + .map(|o| o.status.success()) + .unwrap_or(false); + if !has_flask { + eprintln!("flask not installed on host — Phase 11 boot check skipped"); + return; + } + + let fixture = fixture_root(); + let app_py = fixture.join("app.py"); + let bag = build_secret_bag(&app_py, Lang::Python, "phase11specabcd1"); + assert!( + bag.iter().any(|(n, _)| n == "FLASK_SECRET"), + "fixture scan missed FLASK_SECRET: bag = {bag:?}" + ); + + // Spawn python3 in the fixture directory, env-clear, layer the bag + // on top, and confirm the module imports without raising. + let mut cmd = std::process::Command::new("python3"); + cmd.args(["-c", "import sys; sys.path.insert(0, '.'); import app; print('OK')"]); + cmd.current_dir(&fixture); + cmd.env_clear(); + // PATH is required so python3 can re-locate its stdlib; the + // verifier's process backend preserves it via env_passthrough. + if let Ok(p) = std::env::var("PATH") { + cmd.env("PATH", p); + } + for (k, v) in &bag { + cmd.env(k, v); + } + let out = cmd.output().expect("invoke python3"); + let stdout = String::from_utf8_lossy(&out.stdout); + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + out.status.success(), + "fixture did not boot with derived secret env: stdout={stdout} stderr={stderr}" + ); + assert!(stdout.contains("OK"), "missing OK marker: {stdout}"); + assert!( + !stderr.contains("KeyError"), + "Phase 11 acceptance violated — KeyError raised: {stderr}" + ); +}