[pitboss] phase 11: Track D.4 + D.5 — Deterministic secrets + NetworkPolicy

This commit is contained in:
pitboss 2026-05-14 14:39:29 -05:00
parent 50f0729d01
commit 523bd0c53a
8 changed files with 789 additions and 32 deletions

View file

@ -43,6 +43,218 @@ use std::collections::HashSet;
use std::io;
use std::path::{Path, PathBuf};
// ── Phase 11 — Track D.4: deterministic secret derivation ────────────────────
/// Prefix prepended to every derived secret so a leaked harness value is
/// immediately recognisable as a Nyx stub rather than a real credential.
pub const SECRET_VALUE_PREFIX: &str = "nyx-stub-";
/// Deterministic placeholder for a secret env var.
///
/// Constructed by [`derive_secret`] from `BLAKE3(spec_hash || env_var_name)`
/// and prefixed with [`SECRET_VALUE_PREFIX`]. The value is stable for the
/// lifetime of a spec, so two harness invocations under the same
/// [`HarnessSpec`] see identical credentials — but never the user's real
/// secret.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct SecretValue(String);
impl SecretValue {
/// Raw value, ready to drop into `env`.
pub fn as_str(&self) -> &str {
&self.0
}
/// Consume into the owned string.
pub fn into_string(self) -> String {
self.0
}
}
impl std::fmt::Display for SecretValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
/// Derive a deterministic placeholder for `env_var_name` keyed by
/// `spec_hash`.
///
/// `BLAKE3(spec_hash || '|' || env_var_name)` → first 32 hex chars →
/// `"nyx-stub-{hex}"`. The separator (`|`) prevents accidental collisions
/// between `("abc", "DEF")` and `("abcDEF", "")`.
///
/// Length is bounded at 32 hex characters (128 bits) so the value remains
/// short enough to fit comfortably in URLs, JSON config blobs, and POSIX
/// argv without inflating the env footprint.
pub fn derive_secret(spec_hash: &str, env_var_name: &str) -> SecretValue {
let mut hasher = blake3::Hasher::new();
hasher.update(spec_hash.as_bytes());
hasher.update(b"|");
hasher.update(env_var_name.as_bytes());
let hex = hasher.finalize().to_hex();
let mut out = String::with_capacity(SECRET_VALUE_PREFIX.len() + 32);
out.push_str(SECRET_VALUE_PREFIX);
out.push_str(&hex.as_str()[..32]);
SecretValue(out)
}
/// Scan `entry_file` for env-var references in `lang`.
///
/// Returns the set of env-var names referenced via the language's standard
/// env access API:
///
/// | Lang | Patterns |
/// |---|---|
/// | Python | `os.environ.get("X")`, `os.environ["X"]`, `os.getenv("X")` |
/// | JS/TS | `process.env.X`, `process.env["X"]` |
/// | Java | `System.getenv("X")` |
/// | Rust | `std::env::var("X")`, `env::var("X")` |
/// | Go | `os.Getenv("X")`, `os.LookupEnv("X")` |
/// | PHP | `getenv("X")`, `$_ENV["X"]`, `$_SERVER["X"]` |
/// | Ruby | `ENV["X"]`, `ENV.fetch("X")` |
/// | C/C++ | `getenv("X")` |
///
/// Static substring scan — bounded by [`IMPORT_SCAN_LIMIT`] like the import
/// extractor. No AST: an entry-file with `os.environ.get(some_var)` (a
/// non-literal arg) is intentionally skipped; the secret bag is populated
/// from literal references only so a typo cannot produce noisy injection.
pub fn extract_env_var_references(entry_file: &Path, lang: Lang) -> Vec<String> {
let bytes = match read_bounded(entry_file) {
Some(s) => s,
None => return Vec::new(),
};
let source = match std::str::from_utf8(&bytes) {
Ok(s) => s,
Err(_) => return Vec::new(),
};
let patterns: &[&str] = match lang {
Lang::Python => &[
"os.environ.get(",
"os.environ[",
"os.getenv(",
"environ.get(",
"environ[",
"getenv(",
],
Lang::JavaScript | Lang::TypeScript => &["process.env.", "process.env["],
Lang::Java => &["System.getenv(", "getenv("],
Lang::Rust => &["std::env::var(", "env::var(", "env::var_os(", "std::env::var_os("],
Lang::Go => &["os.Getenv(", "os.LookupEnv("],
Lang::Php => &["getenv(", "$_ENV[", "$_SERVER["],
Lang::Ruby => &["ENV[", "ENV.fetch(", "ENV.fetch "],
Lang::C | Lang::Cpp => &["getenv("],
};
let mut out: Vec<String> = Vec::new();
let mut seen: HashSet<String> = HashSet::new();
for pat in patterns {
let mut start = 0;
while let Some(rel) = source[start..].find(pat) {
let abs = start + rel + pat.len();
start = abs;
let tail = &source[abs..];
let name = match lang {
Lang::JavaScript | Lang::TypeScript if *pat == "process.env." => {
extract_identifier_name(tail)
}
_ => extract_quoted_arg(tail),
};
if let Some(name) = name {
if !name.is_empty() && is_env_var_name(&name) && seen.insert(name.clone()) {
out.push(name);
}
}
}
}
out
}
/// Extract a quoted (single or double quote) literal argument starting at
/// `s`. Skips leading whitespace; stops at the matching close-quote.
/// Returns `None` when the first non-whitespace char is not a quote — the
/// arg is dynamic and the scanner deliberately skips it.
fn extract_quoted_arg(s: &str) -> Option<String> {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() && (bytes[i] == b' ' || bytes[i] == b'\t') {
i += 1;
}
if i >= bytes.len() {
return None;
}
let quote = match bytes[i] {
b'"' => b'"',
b'\'' => b'\'',
b'`' => b'`',
_ => return None,
};
i += 1;
let start = i;
while i < bytes.len() && bytes[i] != quote {
if bytes[i] == b'\n' {
return None;
}
i += 1;
}
if i >= bytes.len() {
return None;
}
std::str::from_utf8(&bytes[start..i]).ok().map(|s| s.to_owned())
}
/// Extract a bare identifier (e.g. `FOO` in `process.env.FOO`). Stops at
/// the first non-identifier byte.
fn extract_identifier_name(s: &str) -> Option<String> {
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let c = bytes[i];
let is_ident = c.is_ascii_alphanumeric() || c == b'_';
if !is_ident {
break;
}
i += 1;
}
if i == 0 {
return None;
}
std::str::from_utf8(&bytes[..i]).ok().map(|s| s.to_owned())
}
/// Permissive env-var-name shape: starts with a letter or underscore, then
/// any of `[A-Za-z0-9_]`. Filters out blatantly bogus parses (e.g. when
/// the quoted scanner picks up `{`).
fn is_env_var_name(s: &str) -> bool {
if s.is_empty() {
return false;
}
let mut chars = s.chars();
let first = chars.next().unwrap();
if !(first.is_ascii_alphabetic() || first == '_') {
return false;
}
chars.all(|c| c.is_ascii_alphanumeric() || c == '_')
}
/// Build the per-spec secret bag: each env var the entry file references
/// gets a deterministic `(name, derive_secret(spec_hash, name))` entry.
///
/// Returned in deterministic source-order so two runs against the same
/// inputs produce byte-identical env layouts.
pub fn build_secret_bag(
entry_file: &Path,
lang: Lang,
spec_hash: &str,
) -> Vec<(String, String)> {
let mut out: Vec<(String, String)> = Vec::new();
for name in extract_env_var_references(entry_file, lang) {
let val = derive_secret(spec_hash, &name);
out.push((name, val.into_string()));
}
out
}
/// Hard upper bound on the bytes a staged workdir may consume after
/// `stage_workdir` returns. Phase 09 acceptance pins this to 10 MiB so a
/// pathological full-tree copy regression is caught at the test boundary
@ -165,8 +377,12 @@ pub struct Environment {
/// to the workdir root (e.g. `"src/handler.py"`).
pub staged_sources: Vec<PathBuf>,
/// Environment variables the harness should set before invoking the
/// entry point. Phase 09 stops at the empty set; Phase 10+
/// extensions (stub injection) will populate these.
/// entry point. Populated by [`build_secret_bag`] during
/// [`stage_workdir_full`] (Phase 11 — Track D.4) with deterministic
/// stub values for every env var the entry file literally
/// references. Phase 10 stub endpoints (SQL DB path, HTTP origin
/// URL, etc.) are layered on top by the verifier via
/// [`crate::dynamic::sandbox::SandboxOptions::extra_env`].
pub env_vars: Vec<(String, String)>,
/// Stub registry handles. Reserved for the Phase 10 stub-injection
/// layer; Phase 09 stages no stubs so this is always empty.
@ -385,12 +601,21 @@ pub fn stage_workdir_full(
copy_into_workdir(cfg, workdir, &rel, running_bytes, &mut staged_sources)?;
}
// Phase 11 — Track D.4: populate the per-spec secret bag for every
// env var the entry file literally references. `spec_hash` is empty
// for the legacy [`stage_workdir`] entry point; in that case the
// derived values still hash deterministically (collisions are avoided
// by the env-var name component) but two distinct specs would alias.
// Callers with a real spec hash should use
// [`stage_workdir_full`] / [`stage_workdir_with_spec_hash`].
let env_vars = build_secret_bag(&captured.entry_file, lang, spec_hash);
Ok(Environment {
spec_hash: spec_hash.to_owned(),
workdir: workdir.to_path_buf(),
lockfile: lockfile_in_workdir,
staged_sources,
env_vars: Vec::new(),
env_vars,
stub_handles: Vec::new(),
toolchain: captured.toolchain.clone(),
direct_deps: captured.direct_deps.clone(),

View file

@ -254,7 +254,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
for (i, payload) in vuln_payloads.iter().enumerate() {
// Materialise payload bytes (OOB nonce-slot payloads generate a URL).
let (oob_nonce, effective_bytes) = if payload.oob_nonce_slot {
if let Some(ref listener) = effective_opts.oob_listener {
if let Some(listener) = effective_opts.oob_listener() {
let nonce = generate_nonce();
let url = if uses_docker_backend(&effective_opts) {
listener.nonce_url_for_host("host-gateway", &nonce)
@ -280,7 +280,7 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
let mut outcome = sandbox::run(&harness, &effective_bytes, &effective_opts)?;
// For OOB payloads, check the nonce listener and update the outcome flag.
if let (Some(nonce), Some(listener)) = (&oob_nonce, &effective_opts.oob_listener) {
if let (Some(nonce), Some(listener)) = (&oob_nonce, effective_opts.oob_listener()) {
// Poll until the nonce arrives or the budget expires. The sandbox run
// already waited for process exit so the callback should arrive quickly;
// 200 ms covers OS TCP delivery jitter without burning wall-clock at scale.

View file

@ -133,10 +133,13 @@ pub struct SandboxOptions {
pub env_passthrough: Vec<String>,
/// Maximum stdout/stderr bytes captured. Default: 65536 (64 KiB).
pub output_limit: usize,
/// Per-scan OOB listener. When set, the Docker backend uses bridge
/// networking so the harness can reach the listener on the host, and the
/// runner checks [`OobListener::was_nonce_hit`] after each sandbox run.
pub oob_listener: Option<Arc<OobListener>>,
/// Phase 11 (Track D.5): network reachability the harness is allowed
/// to exercise. Default [`NetworkPolicy::None`] — the previous
/// behaviour was equivalent to a binary `oob_listener: Option<...>`;
/// callers wanting OOB callbacks now set
/// [`NetworkPolicy::OobOutbound`]. See [`NetworkPolicy`] for the
/// per-variant backend wiring.
pub network_policy: NetworkPolicy,
/// Per-run structured-oracle [`ProbeChannel`] (Phase 06 — Track C.1).
/// When set, the sandbox forwards the channel's path to the harness via
/// the `NYX_PROBE_PATH` env var so the per-language `__nyx_probe` shim
@ -158,6 +161,19 @@ pub struct SandboxOptions {
pub stub_harness: Option<Arc<crate::dynamic::stubs::StubHarness>>,
}
impl SandboxOptions {
/// Borrow the OOB listener handle when the network policy carries
/// one. Returns `None` for every variant except
/// [`NetworkPolicy::OobOutbound`].
///
/// Kept stable across the Phase 11 cut-over so the runner can keep
/// poking at `effective_opts.oob_listener()` without caring whether
/// the policy machinery moves underneath it.
pub fn oob_listener(&self) -> Option<&Arc<OobListener>> {
self.network_policy.oob_listener()
}
}
impl Default for SandboxOptions {
fn default() -> Self {
Self {
@ -166,7 +182,7 @@ impl Default for SandboxOptions {
backend: SandboxBackend::Auto,
env_passthrough: vec![],
output_limit: 65536,
oob_listener: None,
network_policy: NetworkPolicy::None,
probe_channel: None,
extra_env: Vec::new(),
stub_harness: None,
@ -174,6 +190,98 @@ impl Default for SandboxOptions {
}
}
// ── Phase 11 — Track D.5: NetworkPolicy ──────────────────────────────────────
/// Host + port allowlist entry referenced by [`NetworkPolicy::StubsOnly`].
///
/// The Docker backend treats each entry as an `--add-host` line so the
/// harness DNS-resolves stub endpoints to their host-side bind address;
/// the netfilter chain itself blocks all other egress.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct HostPort {
pub host: String,
pub port: u16,
}
impl HostPort {
pub fn new(host: impl Into<String>, port: u16) -> Self {
Self { host: host.into(), port }
}
}
/// Phase 11 (Track D.5): network reachability the harness is allowed to
/// exercise. Replaces the legacy `oob_listener: Option<Arc<OobListener>>`
/// binary flag with an enum that distinguishes the four operationally
/// meaningful stances:
///
/// - [`NetworkPolicy::None`] — no outbound network at all (default).
/// Docker: `--network none`. Process backend: caller-imposed; the
/// process backend has no network namespace facility so the policy is
/// structural here (the harness has whatever connectivity the host's
/// `lo`/routes provide; production runs should use the Docker backend
/// for real isolation).
/// - [`NetworkPolicy::StubsOnly`] — only the listed host/port pairs are
/// reachable. Docker: `bridge` network + `--add-host` per allow-entry.
/// Linux production hardening (netns + nftables) is staged for a
/// follow-up phase; today the variant carries the allowlist for the
/// harness emitter and is mechanically distinguished by the backend
/// selector.
/// - [`NetworkPolicy::OobOutbound`] — the legacy "OOB only" path: the
/// harness can reach the per-scan OOB listener (and only it via the
/// Linux iptables filter in [`apply_oob_egress_filter`]). Docker:
/// `bridge` + host-gateway + iptables OOB-port filter.
/// - [`NetworkPolicy::Open`] — unrestricted outbound. Docker: `bridge`
/// with no egress filter. Reserved for diagnostic / dev-only runs;
/// the verifier never sets this in production.
#[derive(Debug, Clone)]
pub enum NetworkPolicy {
None,
StubsOnly { allow: Vec<HostPort> },
OobOutbound { listener: Arc<OobListener> },
Open,
}
impl NetworkPolicy {
/// `true` when the docker backend should run the container with a
/// bridge network (i.e. with outbound reachability available, even
/// if filtered). `false` selects `--network none`.
pub fn allows_network(&self) -> bool {
!matches!(self, NetworkPolicy::None)
}
/// OOB listener handle when this policy carries one.
pub fn oob_listener(&self) -> Option<&Arc<OobListener>> {
match self {
NetworkPolicy::OobOutbound { listener } => Some(listener),
_ => None,
}
}
/// Stub allow-list entries when this policy carries one.
pub fn stub_allow_list(&self) -> Option<&[HostPort]> {
match self {
NetworkPolicy::StubsOnly { allow } => Some(allow.as_slice()),
_ => None,
}
}
/// Short tag used by the docker `--add-host` shaper / telemetry.
pub fn variant_tag(&self) -> &'static str {
match self {
NetworkPolicy::None => "none",
NetworkPolicy::StubsOnly { .. } => "stubs-only",
NetworkPolicy::OobOutbound { .. } => "oob-outbound",
NetworkPolicy::Open => "open",
}
}
}
impl Default for NetworkPolicy {
fn default() -> Self {
NetworkPolicy::None
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SandboxBackend {
Auto,
@ -511,8 +619,7 @@ fn run_docker(
// Determine the Python image from the harness command (first element).
// Fall back to python:3-slim when the command is not recognised.
let image = detect_image_for_harness(harness);
let oob_port = opts.oob_listener.as_ref().map(|l| l.port());
start_container(&container_name, &harness.workdir, &image, oob_port)?;
start_container(&container_name, &harness.workdir, &image, &opts.network_policy)?;
registry.insert(container_name.clone(), container_name.clone());
}
@ -553,15 +660,18 @@ fn is_container_running(name: &str) -> bool {
/// - `--rm`: auto-remove on stop (no manual cleanup required).
/// - `--cap-drop=ALL`: drop all Linux capabilities.
/// - `--security-opt no-new-privileges:true`: block privilege escalation.
/// - `--network none`: no network access (loopback only), OR `bridge` when
/// `oob_port` is set so the harness can reach the host OOB listener.
/// - `--add-host=host-gateway:host-gateway`: host-gateway DNS alias when
/// using bridge mode (Docker ≥ 20.10).
/// - Network: derived from [`NetworkPolicy`] —
/// - [`NetworkPolicy::None`] ⇒ `--network none` (no egress).
/// - [`NetworkPolicy::OobOutbound`] ⇒ `bridge` + `--add-host=host-gateway`
/// + (on Linux) iptables OOB-port filter.
/// - [`NetworkPolicy::StubsOnly`] ⇒ `bridge` + one `--add-host` per
/// [`HostPort`] in the allow list so DNS resolves to the host bind.
/// - [`NetworkPolicy::Open`] ⇒ `bridge` with no egress filter.
fn start_container(
name: &str,
workdir: &Path,
image: &str,
oob_port: Option<u16>,
policy: &NetworkPolicy,
) -> Result<(), SandboxError> {
let mut run_args: Vec<String> = vec![
"run".into(),
@ -572,12 +682,26 @@ fn start_container(
"--security-opt".into(), "no-new-privileges:true".into(),
"--tmpfs".into(), "/tmp:size=128m,exec".into(),
];
if oob_port.is_some() {
// Bridge mode: container can reach host via host-gateway.
run_args.extend(["--network".into(), "bridge".into()]);
run_args.extend(["--add-host=host-gateway:host-gateway".into()]);
} else {
run_args.extend(["--network".into(), "none".into()]);
match policy {
NetworkPolicy::None => {
run_args.extend(["--network".into(), "none".into()]);
}
NetworkPolicy::OobOutbound { .. } => {
run_args.extend(["--network".into(), "bridge".into()]);
run_args.extend(["--add-host=host-gateway:host-gateway".into()]);
}
NetworkPolicy::StubsOnly { allow } => {
run_args.extend(["--network".into(), "bridge".into()]);
// host-gateway alias still useful so stubs bound to 127.0.0.1
// can be reached as host-gateway from inside the container.
run_args.extend(["--add-host=host-gateway:host-gateway".into()]);
for hp in allow {
run_args.push(format!("--add-host={}:host-gateway", hp.host));
}
}
NetworkPolicy::Open => {
run_args.extend(["--network".into(), "bridge".into()]);
}
}
run_args.extend([image.into(), "sleep".into(), "300".into()]);
@ -625,9 +749,11 @@ fn start_container(
// This restricts the bridge-networked container to only reach the host
// on the OOB port; all other egress is dropped (§17.2).
#[cfg(target_os = "linux")]
if let Some(port) = oob_port {
apply_oob_egress_filter(name, port);
if let NetworkPolicy::OobOutbound { listener } = policy {
apply_oob_egress_filter(name, listener.port());
}
#[cfg(not(target_os = "linux"))]
let _ = policy; // policy already consumed structurally above
Ok(())
} else {
Err(SandboxError::BackendUnavailable(SandboxBackend::Docker))
@ -862,8 +988,12 @@ fn run_native_binary_docker(
};
if !reused {
let oob_port = opts.oob_listener.as_ref().map(|l| l.port());
start_container(&container_name, &harness.workdir, NATIVE_BINARY_IMAGE, oob_port)?;
start_container(
&container_name,
&harness.workdir,
NATIVE_BINARY_IMAGE,
&opts.network_policy,
)?;
// Copy the compiled binary into the container as /workdir/nyx_harness.
let cp_dst = format!("{container_name}:/workdir/nyx_harness");

View file

@ -68,17 +68,24 @@ impl VerifyOptions {
/// (`src/dynamic/runner.rs` `oob_nonce_slot` branch) while non-OOB
/// payloads continue to run against their existing oracle.
pub fn from_config(config: &Config) -> Self {
use crate::dynamic::sandbox::SandboxBackend;
use crate::dynamic::sandbox::{NetworkPolicy, SandboxBackend};
let backend = match config.scanner.verify_backend.as_str() {
"docker" => SandboxBackend::Docker,
"process" => SandboxBackend::Process,
_ => SandboxBackend::Auto,
};
let oob_listener = OobListener::bind().ok().map(Arc::new);
// Phase 11 — Track D.5: surface the per-scan listener as a
// [`NetworkPolicy::OobOutbound`] so the docker backend turns on
// bridge networking + the iptables egress filter, and the process
// backend reaches the listener via the same accessor as before.
let network_policy = match OobListener::bind().ok().map(Arc::new) {
Some(listener) => NetworkPolicy::OobOutbound { listener },
None => NetworkPolicy::None,
};
Self {
sandbox: SandboxOptions {
backend,
oob_listener,
network_policy,
..SandboxOptions::default()
},
project_root: None,