diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index 1466cbb7..77d6c81f 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -674,20 +674,33 @@ pub fn emit(spec: &HarnessSpec) -> Result { /// Phase 03 — Track J.1 deserialize harness for Java. /// -/// Emits a `NyxHarness.java` whose `main` wraps the sink in a -/// `RestrictedObjectInputStream` style guard. The shim parses the -/// payload (`NYX_GADGET_CLASS:`); any class outside the -/// allowlist (`java.lang.Integer`, `java.lang.String`) writes a +/// Forges a minimal valid Java serialization stream for the marker +/// class name carried by `NYX_PAYLOAD`, then runs it through a +/// `RestrictedObjectInputStream` subclass whose `resolveClass` override +/// enforces a static allowlist (`java.lang.Integer`, `java.lang.String`). +/// When `resolveClass` sees a non-allowlisted class it writes a /// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with -/// `gadget_chain_invoked: true` to `NYX_PROBE_PATH` and aborts the -/// chain — this is the resolveClass-driven boundary the brief calls -/// out. +/// `gadget_chain_invoked: true` and throws `InvalidClassException` to +/// abort — matching the JEP-290 / Look-Ahead-OIS hardening pattern +/// real applications use. The blob is built from raw stream bytes +/// (TC_OBJECT → TC_CLASSDESC → class name → SUID → flags → no +/// fields → TC_ENDBLOCKDATA → TC_NULL super) so the resolveClass +/// boundary fires for both vuln and benign payloads; downstream +/// instantiation failures (e.g. `serialVersionUID` mismatch on the +/// allow-listed payload) are caught and treated as non-probe paths. pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); let source = format!( r#"// Nyx dynamic harness — deserialize (Phase 03 / Track J.1). +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; import java.io.FileWriter; import java.io.IOException; +import java.io.InputStream; +import java.io.InvalidClassException; +import java.io.ObjectInputStream; +import java.io.ObjectStreamClass; import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -720,16 +733,59 @@ public class NyxHarness {{ }} }} + static class NyxRestrictedOIS extends ObjectInputStream {{ + NyxRestrictedOIS(InputStream in) throws IOException {{ super(in); }} + @Override + protected Class resolveClass(ObjectStreamClass desc) + throws IOException, ClassNotFoundException {{ + String name = desc.getName(); + if (!NYX_ALLOWLIST.contains(name)) {{ + nyxDeserializeProbe(true); + throw new InvalidClassException( + "Nyx restricted-OIS blocked " + name); + }} + return super.resolveClass(desc); + }} + }} + + static byte[] nyxForgeClassDescriptor(String className) throws IOException {{ + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + DataOutputStream dos = new DataOutputStream(baos); + dos.writeShort((short) 0xACED); // STREAM_MAGIC + dos.writeShort((short) 0x0005); // STREAM_VERSION + dos.writeByte(0x73); // TC_OBJECT + dos.writeByte(0x72); // TC_CLASSDESC + dos.writeUTF(className); + dos.writeLong(0L); // serialVersionUID + dos.writeByte(0x02); // SC_SERIALIZABLE + dos.writeShort(0); // 0 fields + dos.writeByte(0x78); // TC_ENDBLOCKDATA + dos.writeByte(0x70); // TC_NULL (no super class) + return baos.toByteArray(); + }} + public static void main(String[] args) {{ String payload = System.getenv("NYX_PAYLOAD"); if (payload == null) payload = ""; String prefix = "NYX_GADGET_CLASS:"; if (payload.startsWith(prefix)) {{ String cls = payload.substring(prefix.length()); - if (!NYX_ALLOWLIST.contains(cls)) {{ - // RestrictedObjectInputStream.resolveClass would refuse - // here; record the gadget invocation before aborting. - nyxDeserializeProbe(true); + try {{ + byte[] blob = nyxForgeClassDescriptor(cls); + NyxRestrictedOIS ois = new NyxRestrictedOIS( + new ByteArrayInputStream(blob)); + try {{ + ois.readObject(); + }} finally {{ + try {{ ois.close(); }} catch (IOException ignored) {{}} + }} + }} catch (InvalidClassException e) {{ + // Restricted block — probe already written above. + }} catch (Throwable t) {{ + // Allow-listed but downstream instantiation fails (the + // minimal stream omits the field bytes the real class + // expects). resolveClass already fired; treat as a + // non-probe path. }} }} // Sink-reachability sentinel — runner's `vuln_fired && sink_hit` diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 4d311a59..8fe1a0a6 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -522,12 +522,18 @@ pub fn emit(spec: &HarnessSpec) -> Result { /// Phase 03 — Track J.1 deserialize harness for PHP. /// -/// Wraps a call to `unserialize($input, ['allowed_classes' => false])`. -/// The shim parses the payload's `NYX_GADGET_CLASS:` marker; -/// when the marker class is outside the allowlist (`__primitive_int`) -/// the shim writes a [`crate::dynamic::probe::ProbeKind::Deserialize`] -/// probe with `gadget_chain_invoked: true` — simulating the -/// `__wakeup` observer firing on a `__PHP_Incomplete_Class`. +/// Forges a minimal valid PHP serialized object blob +/// (`O::"":0:{{}}`) from the marker carried by +/// `NYX_PAYLOAD`, then runs it through `unserialize` with the +/// `allowed_classes` option set to a static allowlist +/// (`__primitive_int`, `__primitive_string`). When the resulting +/// object is `__PHP_Incomplete_Class` and its preserved class name is +/// outside the allowlist, the shim writes a +/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with +/// `gadget_chain_invoked: true` — matching the PHP 7+ hardening +/// pattern (`unserialize($s, ['allowed_classes' => […]])`). Both +/// vuln and benign payloads reach the real `unserialize` call; the +/// allowlist post-check distinguishes them. pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource { let shim = probe_shim(); let body = format!( @@ -549,15 +555,32 @@ function _nyx_deserialize_probe(bool $invoked): void {{ @file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND); }} +function _nyx_incomplete_class_name(object $o): string {{ + // __PHP_Incomplete_Class stores the original class name on a + // private-named property; casting to array surfaces it under the + // documented `__PHP_Incomplete_Class_Name` key. + $arr = (array) $o; + return (string) ($arr['__PHP_Incomplete_Class_Name'] ?? ''); +}} + $payload = (string) (getenv('NYX_PAYLOAD') ?: ''); $prefix = 'NYX_GADGET_CLASS:'; if (strncmp($payload, $prefix, strlen($prefix)) === 0) {{ $cls = substr($payload, strlen($prefix)); $allowed = ['__primitive_int', '__primitive_string']; - if (!in_array($cls, $allowed, true)) {{ - _nyx_deserialize_probe(true); + $blob = 'O:' . strlen($cls) . ':"' . $cls . '":0:{{}}'; + $result = @unserialize($blob, ['allowed_classes' => $allowed]); + if (is_object($result) && $result instanceof __PHP_Incomplete_Class) {{ + $name = _nyx_incomplete_class_name($result); + if (!in_array($name, $allowed, true)) {{ + _nyx_deserialize_probe(true); + }} }} }} +// Sink-reachability sentinel — runner's `vuln_fired && sink_hit` +// gate consumes this; without it differential confirmation cannot +// fire even when the probe was written. +echo "__NYX_SINK_HIT__\n"; "# ); HarnessSource { diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 0942f21a..812d9abf 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -1265,24 +1265,36 @@ fn indent_lines(src: &str, prefix: &str) -> String { /// Phase 03 — Track J.1 deserialize harness for Python. /// -/// Reads the payload (`NYX_GADGET_CLASS:`), constructs a -/// `pickle.Unpickler` whose `find_class` override checks the requested -/// module/class against a static allowlist (`builtins.list`, -/// `builtins.dict`, `builtins.int`). Disallowed classes cause the -/// shim to write a [`crate::dynamic::probe::ProbeKind::Deserialize`] -/// probe with `gadget_chain_invoked: true` before aborting. Wraps the -/// probe shim so the probe channel infrastructure works uniformly +/// Reads the payload (`NYX_GADGET_CLASS:.`), forges a +/// minimal real pickle stream containing a `GLOBAL` opcode for that +/// class, and runs it through a `pickle.Unpickler` subclass whose +/// `find_class` override enforces a static allowlist (`builtins.list`, +/// `builtins.dict`, `builtins.int`, `builtins.str`). When the +/// override sees a non-allowlisted class it writes a +/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with +/// `gadget_chain_invoked: true` and raises `UnpicklingError` to abort +/// the load — matching real-world `RestrictedUnpickler` hardening +/// (e.g. RestrictedPython, MITRE-CWE-502 mitigation guidance). Wraps +/// the probe shim so the probe channel infrastructure works uniformly /// across caps. pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource { let probe = probe_shim(); let body = format!( r#"#!/usr/bin/env python3 """Nyx dynamic harness — deserialize (Phase 03 / Track J.1).""" -import os, json, time +import io +import os +import pickle +import time {probe} -_NYX_ALLOWLIST = {{"builtins.list", "builtins.dict", "builtins.int", "builtins.str"}} +_NYX_ALLOWLIST = {{ + ("builtins", "list"), + ("builtins", "dict"), + ("builtins", "int"), + ("builtins", "str"), +}} def _nyx_deserialize_probe(invoked): rec = {{ @@ -1295,16 +1307,48 @@ def _nyx_deserialize_probe(invoked): }} __nyx_emit(rec) +class _NyxRestrictedUnpickler(pickle.Unpickler): + def find_class(self, module, name): + if (module, name) not in _NYX_ALLOWLIST: + _nyx_deserialize_probe(invoked=True) + raise pickle.UnpicklingError( + "Nyx restricted-unpickler blocked %s.%s" % (module, name) + ) + return super().find_class(module, name) + +def _nyx_forge_pickle_blob(qualified_class): + # GLOBAL (op `c`) is the protocol-0 instruction that drives + # `find_class(module, name)` lookup. Encoding: `c\n\n.` + # — the trailing `.` is STOP. rpartition on the last `.` splits a + # qualified name like `nyx.gadget.RCE` into module=`nyx.gadget`, + # name=`RCE`; a bare name without a dot lands in `builtins`. + module, sep, name = qualified_class.rpartition(".") + if not sep: + module, name = "builtins", qualified_class + return ( + b"c" + + module.encode("utf-8") + + b"\n" + + name.encode("utf-8") + + b"\n." + ) + def _nyx_run(): payload = os.environ.get("NYX_PAYLOAD", "") if not payload.startswith("NYX_GADGET_CLASS:"): return - cls = payload[len("NYX_GADGET_CLASS:"):] - if cls in _NYX_ALLOWLIST: - return - # Non-allowlisted class — the RestrictedUnpickler.find_class - # equivalent records the gadget invocation before aborting. - _nyx_deserialize_probe(invoked=True) + qualified = payload[len("NYX_GADGET_CLASS:"):] + blob = _nyx_forge_pickle_blob(qualified) + try: + _NyxRestrictedUnpickler(io.BytesIO(blob)).load() + except pickle.UnpicklingError: + # Restricted block — probe already written above. + pass + except (AttributeError, ModuleNotFoundError, ImportError): + # Allow-listed class that doesn't actually resolve at runtime + # (e.g. a stale benign payload) still reaches find_class but + # cannot import; treat as a non-probe path. + pass if __name__ == "__main__": _nyx_run() diff --git a/src/dynamic/lang/ruby.rs b/src/dynamic/lang/ruby.rs index 91f644a4..50def993 100644 --- a/src/dynamic/lang/ruby.rs +++ b/src/dynamic/lang/ruby.rs @@ -856,12 +856,43 @@ def _nyx_deserialize_probe(invoked) File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }} end +# Forge a Marshal v4.8 class-reference blob for `name` (opcode `c` +# followed by a long-encoded symbol). Marshal.load resolves the class +# via `Object.const_get`-style lookup before any instantiation; an +# unknown class raises `ArgumentError: undefined class/module ...` — +# the same boundary `Marshal.const_defined?`-style hardening checks. +def _nyx_forge_marshal_class_ref(name) + bytes = name.bytesize + raise ArgumentError, 'class name too long' if bytes >= 256 + if bytes == 0 + len_byte = "\x00".b + elsif bytes < 123 + len_byte = [bytes + 5].pack('C') + else + len_byte = "\x01".b + [bytes].pack('C') + end + "\x04\x08c".b + len_byte + name.b +end + allowlist = ['Integer', 'String', 'Array'] payload = ENV['NYX_PAYLOAD'] || '' if payload.start_with?('NYX_GADGET_CLASS:') cls = payload[('NYX_GADGET_CLASS:'.length)..] - unless allowlist.include?(cls) - _nyx_deserialize_probe(true) + begin + Marshal.load(_nyx_forge_marshal_class_ref(cls)) + rescue ArgumentError => e + # `undefined class/module ` — the Marshal class-resolution + # boundary refused the lookup. Real hardening would surface this + # via a `Marshal.const_defined?` pre-check + reject; we record the + # gadget-class invocation here. + if e.message.start_with?('undefined class/module') + _nyx_deserialize_probe(true) + end + rescue TypeError, NameError + # Allow-listed class that exists at load time (e.g. `Integer`) + # resolves cleanly via `Object.const_get` and Marshal returns the + # class object — no rescue path. Other unexpected errors fall + # through without writing a probe. end end # Sink-reachability sentinel — runner's `vuln_fired && sink_hit` diff --git a/tests/python_frameworks_corpus.rs b/tests/python_frameworks_corpus.rs index e684f19d..a0b96efa 100644 --- a/tests/python_frameworks_corpus.rs +++ b/tests/python_frameworks_corpus.rs @@ -8,9 +8,19 @@ //! must produce the same adapter binding shape as the vuln fixtures //! — the adapter only models the route, the differential outcome of //! a verifier run is what distinguishes the two. +//! +//! The `e2e_phase_12` submodule drives `run_spec` on the vuln fixture +//! per framework and asserts `DifferentialVerdict::Confirmed`. These +//! tests rely on `prepare_python` installing the requirements.txt the +//! per-shape emitter stages (Flask / FastAPI+httpx / Django / +//! Starlette+httpx); on hosts where `python3 -m venv` + `pip install` +//! cannot reach a registry the harness build fails and the test +//! silently SKIPs via the established `BuildFailed` pattern. #![cfg(feature = "dynamic")] +mod common; + use nyx_scanner::dynamic::framework::{detect_binding, HttpMethod, ParamSource}; use nyx_scanner::evidence::EntryKind; use nyx_scanner::summary::FuncSummary; @@ -168,3 +178,138 @@ fn fastapi_adapter_runs_before_starlette_for_fastapi_files() { detect_binding(&summary, tree.root_node(), src, Lang::Python).expect("adapter fires"); assert_eq!(binding.adapter, "python-fastapi"); } + +// ── End-to-end Phase 12 acceptance via run_spec ───────────────────────────── +// +// Drives `run_spec` on the per-framework vuln fixtures with +// `Cap::CODE_EXEC` and asserts `DifferentialVerdict::Confirmed`. The +// Python harness emitter writes a `requirements.txt` carrying Flask / +// FastAPI+httpx / Django / Starlette+httpx; `prepare_python` runs +// `pip install -r requirements.txt` inside the per-spec venv before +// the harness boots. Hosts without network access or with pip +// install failures trip the established `RunError::BuildFailed` +// branch and the test silently SKIPs. + +#[cfg(feature = "dynamic")] +mod e2e_phase_12 { + use crate::common::fixture_harness::FIXTURE_LOCK; + use nyx_scanner::dynamic::runner::{run_spec, RunError, RunOutcome}; + use nyx_scanner::dynamic::sandbox::SandboxOptions; + use nyx_scanner::dynamic::spec::{ + default_toolchain_id, EntryKind, HarnessSpec, PayloadSlot, SpecDerivationStrategy, + }; + use nyx_scanner::evidence::DifferentialVerdict; + use nyx_scanner::labels::Cap; + use nyx_scanner::symbol::Lang; + use std::path::PathBuf; + use std::process::Command; + use tempfile::TempDir; + + fn command_available(bin: &str) -> bool { + Command::new(bin) + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) + } + + fn build_spec(fixture_subdir: &str) -> (HarnessSpec, TempDir) { + let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/dynamic_fixtures/python_frameworks") + .join(fixture_subdir) + .join("vuln.py"); + let tmp = TempDir::new().expect("create tempdir"); + let dst = tmp.path().join("vuln.py"); + std::fs::copy(&fixture_src, &dst).expect("copy fixture into tempdir"); + + let entry_file = dst.to_string_lossy().into_owned(); + let mut digest = blake3::Hasher::new(); + digest.update(b"phase12-e2e-python-framework|"); + digest.update(fixture_subdir.as_bytes()); + let spec_hash = format!("{:016x}", { + let bytes = digest.finalize(); + u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap()) + }); + + let spec = HarnessSpec { + finding_id: spec_hash.clone(), + entry_file: entry_file.clone(), + entry_name: "run_cmd".to_owned(), + entry_kind: EntryKind::HttpRoute, + lang: Lang::Python, + toolchain_id: default_toolchain_id(Lang::Python).into(), + payload_slot: PayloadSlot::QueryParam("cmd".to_owned()), + expected_cap: Cap::CODE_EXEC, + constraint_hints: vec![], + sink_file: entry_file, + sink_line: 1, + spec_hash: spec_hash.clone(), + derivation: SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + java_toolchain: nyx_scanner::dynamic::spec::JavaToolchain::default(), + }; + + (spec, tmp) + } + + fn run(fixture_subdir: &str) -> Option { + if !command_available("python3") { + eprintln!("SKIP {fixture_subdir}: missing python3"); + return None; + } + let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let (spec, _tmp) = build_spec(fixture_subdir); + let opts = SandboxOptions { + backend: nyx_scanner::dynamic::sandbox::SandboxBackend::Process, + ..SandboxOptions::default() + }; + match run_spec(&spec, &opts) { + Ok(outcome) => Some(outcome), + Err(RunError::BuildFailed { stderr, attempts }) => { + eprintln!( + "SKIP {fixture_subdir}: harness build failed after {attempts} attempts: {stderr}", + ); + None + } + Err(e) => panic!("run_spec({fixture_subdir}) errored: {e:?}"), + } + } + + fn assert_confirmed(fixture_subdir: &str) { + let Some(outcome) = run(fixture_subdir) else { return }; + assert!( + outcome.triggered_by.is_some(), + "{fixture_subdir} CODE_EXEC vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!( + diff.verdict, + DifferentialVerdict::Confirmed, + "differential verdict must be Confirmed: {diff:?}", + ); + } + + #[test] + fn flask_vuln_confirms_via_run_spec() { + assert_confirmed("flask"); + } + + #[test] + fn fastapi_vuln_confirms_via_run_spec() { + assert_confirmed("fastapi"); + } + + #[test] + fn django_vuln_confirms_via_run_spec() { + assert_confirmed("django"); + } + + #[test] + fn starlette_vuln_confirms_via_run_spec() { + assert_confirmed("starlette"); + } +}