[pitboss] phase 03: Track J.1 + Track L.1 — DESERIALIZE corpus + Java/Python/PHP/Ruby adapters

This commit is contained in:
pitboss 2026-05-17 16:37:20 -05:00
parent 01fcaab310
commit 9dc60b51c0
33 changed files with 1625 additions and 53 deletions

View file

@ -48,6 +48,7 @@ pub mod audit;
pub mod registry;
mod cmdi;
mod deserialize;
mod fmt_string;
mod path_trav;
mod sqli;
@ -55,8 +56,9 @@ mod ssrf;
mod xss;
pub use registry::{
audit_marker_collisions, benign_payload_for, materialise_bytes, payloads_for,
payloads_for_lang, resolve_benign_control, CORPUS, CORPUS_UNSUPPORTED_LANG_NEUTRAL,
audit_marker_collisions, benign_payload_for, benign_payload_for_lang, materialise_bytes,
payloads_for, payloads_for_lang, resolve_benign_control, resolve_benign_control_lang,
CORPUS, CORPUS_UNSUPPORTED_LANG_NEUTRAL,
};
/// Re-exported canonical [`Oracle`] type.
@ -81,7 +83,8 @@ pub use crate::dynamic::oracle::Oracle;
/// | 4 | 2026-05-14 | Phase 07: `benign_control` paired refs + benign payloads added to SQLI / CMDI / SSRF (file-scheme) |
/// | 5 | 2026-05-16 | FMT_STRING SinkCrash payload + benign control (Phase 08 unrelated-crash acceptance fixture) |
/// | 6 | 2026-05-17 | Phase 02 / Track J.0: `(Cap, Lang)` registry refactor; `no_benign_control_rationale` field; compile-time provenance audit |
pub const CORPUS_VERSION: u32 = 6;
/// | 7 | 2026-05-17 | Phase 03 / Track J.1: `DESERIALIZE` cap lit for Java / Python / PHP / Ruby; `ProbeKind::Deserialize` + `ProbePredicate::DeserializeGadgetInvoked` |
pub const CORPUS_VERSION: u32 = 7;
/// Where a payload originated.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]

View file

@ -162,6 +162,41 @@ pub fn audit_cap_coverage_runtime() -> Result<(), String> {
Ok(())
}
/// Track J.0 deferred audit: a non-benign payload's `benign_control.label`
/// must be unique *within its own `(cap, lang)` slice* — and a benign
/// payload's label may not collide with any other benign label inside the
/// same cap across lang slices, otherwise the lang-agnostic union shim
/// could resolve a vuln payload in language A against a benign payload
/// declared in language B (the latent §4.1 bug captured in the deferred
/// queue).
pub fn audit_benign_label_uniqueness_runtime() -> Result<(), String> {
use std::collections::HashMap;
let mut by_cap: HashMap<u32, HashMap<&'static str, crate::symbol::Lang>> = HashMap::new();
for &(cap, lang, slice) in CORPUS.entries {
let bucket = by_cap.entry(cap.bits()).or_default();
for p in slice {
if !p.is_benign {
continue;
}
if let Some(prev_lang) = bucket.insert(p.label, lang) {
if prev_lang != lang {
return Err(format!(
"benign label {:?} for cap {:#x} is registered in both \
{:?} and {:?} lang-agnostic resolve_benign_control \
could match the wrong language",
p.label,
cap.bits(),
prev_lang,
lang,
));
}
}
}
}
Ok(())
}
#[cfg(test)]
mod corpus_registry {
use super::*;
@ -172,5 +207,7 @@ mod corpus_registry {
fn audit() {
audit_benign_controls_runtime().expect("benign_control audit failed");
audit_cap_coverage_runtime().expect("cap coverage audit failed");
audit_benign_label_uniqueness_runtime()
.expect("benign label uniqueness audit failed");
}
}

View file

@ -0,0 +1,66 @@
//! Java `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: a base64-encoded `java.io.ObjectInputStream` byte stream
//! that materialises a gadget class outside the harness's allowlist.
//! The harness's `RestrictedObjectInputStream.resolveClass` intercepts
//! the lookup and emits a `ProbeKind::Deserialize { gadget_chain_invoked
//! = true }` probe before aborting the chain.
//!
//! Benign control: a base64-encoded `ObjectInputStream` byte stream of a
//! single allow-listed `java.lang.Integer`. The class lives inside the
//! resolveClass allowlist so no Deserialize probe is emitted.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
// Marker class name embedded in the serialized stream — the
// harness allowlist contains `java.lang.Integer` and `java.lang.String`
// only. The byte form is a small literal so const-eval can keep it.
bytes: b"NYX_GADGET_CLASS:org.nyx.deserialize.Gadget",
label: "java-deserialize-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/java/vuln.java",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "java-deserialize-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
// Allow-listed payload — the marker carries `java.lang.Integer`,
// which the harness resolveClass accepts without writing a probe.
bytes: b"NYX_GADGET_CLASS:java.lang.Integer",
label: "java-deserialize-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/java/benign.java",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,17 @@
//! Deserialization (`Cap::DESERIALIZE`) per-language payload slices.
//!
//! Phase 03 (Track J.1) lands the first cap end-to-end: Java
//! (`ObjectInputStream.readObject` / `XMLDecoder`), Python (`pickle.loads`
//! / `yaml.unsafe_load`), PHP (`unserialize`), and Ruby (`Marshal.load`
//! / `YAML.load`). Every vuln payload is paired with a benign control
//! whose oracle should *not* fire — the per-language harness shims
//! emit a [`crate::dynamic::probe::ProbeKind::Deserialize`] record with
//! `gadget_chain_invoked: true` when a non-allowlisted gadget class is
//! materialised by the instrumented deserialiser; benign well-formed
//! serialized data does not reach the allowlist boundary and so leaves
//! no Deserialize probe.
pub mod java;
pub mod php;
pub mod python;
pub mod ruby;

View file

@ -0,0 +1,64 @@
//! PHP `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string handed to `unserialize($input)` where the
//! harness wraps the call with `['allowed_classes' => false]` and an
//! observer on `__wakeup`. When `unserialize` materialises a
//! `__PHP_Incomplete_Class` from a non-allowlisted class name, the
//! observer emits a `ProbeKind::Deserialize { gadget_chain_invoked:
//! true }` probe.
//!
//! Benign control: serialised primitive (an `int`) that
//! `unserialize` materialises without engaging the allowlist boundary.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:PHP_Object_Injection_RCE",
label: "php-unserialize-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/php/vuln.php",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "php-unserialize-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
// Allow-listed marker — the harness allowlist accepts
// `__primitive_int` as a no-op type representing a serialised
// integer literal.
bytes: b"NYX_GADGET_CLASS:__primitive_int",
label: "php-unserialize-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/php/benign.php",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,60 @@
//! Python `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string consumed by the harness shim which calls
//! `pickle.Unpickler(...).load()` with `find_class` overridden to record
//! a `ProbeKind::Deserialize { gadget_chain_invoked: true }` whenever a
//! non-allowlisted class is requested. The harness allowlists
//! `builtins.list` / `builtins.dict` / `builtins.int`; the marker class
//! `nyx.gadget.RCE` is outside that set.
//!
//! Benign control: payload requests only allow-listed builtins.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:nyx.gadget.RCE",
label: "python-pickle-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/python/vuln.py",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "python-pickle-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:builtins.list",
label: "python-pickle-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/python/benign.py",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,61 @@
//! Ruby `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string consumed by the harness shim which calls
//! `Marshal.load(input)` with `Marshal.const_defined?`-style
//! instrumentation that records a `ProbeKind::Deserialize {
//! gadget_chain_invoked: true }` probe whenever a non-allowlisted
//! constant is materialised. The harness allowlist contains `Integer`
//! / `String` / `Array`.
//!
//! Benign control: marker requests only the allow-listed `Integer`
//! constant.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:Nyx::Gadget::RCE",
label: "ruby-marshal-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "ruby-marshal-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:Integer",
label: "ruby-marshal-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/ruby/benign.rb",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -23,7 +23,7 @@
use std::collections::HashMap;
use std::sync::OnceLock;
use super::{cmdi, fmt_string, path_trav, sqli, ssrf, xss};
use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, xss};
use super::{CapCorpus, CuratedPayload, Oracle};
use crate::dynamic::oracle::ProbePredicate;
use crate::labels::Cap;
@ -37,7 +37,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits()
| Cap::SHELL_ESCAPE.bits()
| Cap::URL_ENCODE.bits()
| Cap::JSON_PARSE.bits()
| Cap::DESERIALIZE.bits()
| Cap::CRYPTO.bits()
| Cap::UNAUTHORIZED_ID.bits()
| Cap::DATA_EXFIL.bits()
@ -58,6 +57,10 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[
(Cap::SSRF, Lang::Rust, ssrf::rust::PAYLOADS),
(Cap::HTML_ESCAPE, Lang::Rust, xss::rust::PAYLOADS),
(Cap::FMT_STRING, Lang::C, fmt_string::c::PAYLOADS),
(Cap::DESERIALIZE, Lang::Java, deserialize::java::PAYLOADS),
(Cap::DESERIALIZE, Lang::Python, deserialize::python::PAYLOADS),
(Cap::DESERIALIZE, Lang::Php, deserialize::php::PAYLOADS),
(Cap::DESERIALIZE, Lang::Ruby, deserialize::ruby::PAYLOADS),
];
/// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by
@ -114,10 +117,23 @@ pub fn payloads_for(cap: Cap) -> &'static [CuratedPayload] {
}
/// Return the (first) benign control payload for a cap, if one exists.
///
/// Lang-agnostic union shim — searches every registered `(cap, lang)`
/// slice in declaration order. Prefer [`benign_payload_for_lang`] when
/// the caller knows the harness's [`Lang`] so cross-language label
/// collisions (e.g. an `ssrf-benign` label registered for both Rust and
/// Python) cannot resolve to a wrong-language fixture.
pub fn benign_payload_for(cap: Cap) -> Option<&'static CuratedPayload> {
payloads_for(cap).iter().find(|p| p.is_benign)
}
/// Lang-aware [`benign_payload_for`]. Restricts the search to the
/// requested `(cap, lang)` slice so a payload's benign control is
/// always resolved inside the same language vertical.
pub fn benign_payload_for_lang(cap: Cap, lang: Lang) -> Option<&'static CuratedPayload> {
payloads_for_lang(cap, lang).iter().find(|p| p.is_benign)
}
/// Resolve a [`CuratedPayload::benign_control`] reference to the matching
/// benign entry inside the same cap's payload slice (across all langs).
///
@ -126,6 +142,13 @@ pub fn benign_payload_for(cap: Cap) -> Option<&'static CuratedPayload> {
/// non-benign in the corpus. The runner treats the `None` result as
/// `NoControl` and downgrades the verdict to
/// [`crate::evidence::InconclusiveReason::NoBenignControl`].
///
/// Lang-agnostic union shim — kept for the small set of pre-Phase-03
/// callers that do not carry a [`Lang`] at the call site. Prefer
/// [`resolve_benign_control_lang`] in any new code: with multiple
/// `(cap, lang)` slices registered for the same cap, the union shim
/// can match a wrong-language fixture's label and silently confirm
/// against a benign that never ran.
pub fn resolve_benign_control(
vuln_payload: &CuratedPayload,
cap: Cap,
@ -136,6 +159,22 @@ pub fn resolve_benign_control(
.find(|p| p.is_benign && p.label == r.label)
}
/// Lang-aware [`resolve_benign_control`]. Restricts the search to the
/// `(cap, lang)` slice that produced the vuln payload so the
/// differential rule (§4.1) can never compare against a wrong-language
/// benign even when two language slices share a label. Phase 03 wires
/// this through [`crate::dynamic::runner`].
pub fn resolve_benign_control_lang(
vuln_payload: &CuratedPayload,
cap: Cap,
lang: Lang,
) -> Option<&'static CuratedPayload> {
let r = vuln_payload.benign_control?;
payloads_for_lang(cap, lang)
.iter()
.find(|p| p.is_benign && p.label == r.label)
}
/// Materialise the effective bytes for a payload.
///
/// For static payloads (`oob_nonce_slot == false`) returns the `bytes`
@ -237,7 +276,6 @@ mod tests {
Cap::SHELL_ESCAPE,
Cap::URL_ENCODE,
Cap::JSON_PARSE,
Cap::DESERIALIZE,
Cap::CRYPTO,
Cap::UNAUTHORIZED_ID,
Cap::DATA_EXFIL,
@ -275,6 +313,7 @@ mod tests {
Cap::FILE_IO,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
] {
let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign);
assert!(has_vuln, "{cap:?} must have at least one vuln payload");
@ -321,6 +360,7 @@ mod tests {
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
];
for cap in caps {
for p in payloads_for(cap) {
@ -342,6 +382,7 @@ mod tests {
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
];
for cap in caps {
for p in payloads_for(cap) {
@ -450,6 +491,7 @@ mod tests {
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
];
for cap in caps {
for p in payloads_for(cap).iter().filter(|p| p.is_benign) {
@ -474,10 +516,23 @@ mod tests {
#[test]
fn back_compat_union_matches_registered_entry() {
// With one (cap, lang) entry per cap, the union must contain the
// same labels as the underlying slice (byte-identical verdict
// requirement, Phase 02 acceptance).
// For caps with one (cap, lang) entry only, the lang-agnostic
// union must contain the same labels as the underlying slice
// (byte-identical verdict requirement, Phase 02 acceptance).
// Phase 03 introduces multi-lang caps (DESERIALIZE), so single-
// entry caps are filtered separately from the union check.
use std::collections::HashMap;
let mut entries_by_cap: HashMap<u32, Vec<(Lang, &'static [CuratedPayload])>> =
HashMap::new();
for &(cap, lang, slice) in CORPUS.entries {
entries_by_cap.entry(cap.bits()).or_default().push((lang, slice));
}
for (cap_bits, langs) in &entries_by_cap {
if langs.len() != 1 {
continue;
}
let (lang, slice) = langs[0];
let cap = Cap::from_bits_truncate(*cap_bits);
let union = payloads_for(cap);
assert_eq!(
union.len(),
@ -490,4 +545,49 @@ mod tests {
}
}
}
#[test]
fn deserialize_has_per_lang_slices_for_phase_03() {
// Phase 03 (Track J.1) acceptance: DESERIALIZE registers
// payloads in Java / Python / PHP / Ruby and the lang-aware
// lookup never returns empty for any of them.
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
assert!(
!payloads_for_lang(Cap::DESERIALIZE, lang).is_empty(),
"DESERIALIZE must have at least one payload for {lang:?}",
);
}
// Rust / C / Go / JS / TS / Cpp not yet covered — those slices
// remain empty.
for lang in [
Lang::Rust,
Lang::C,
Lang::Cpp,
Lang::Go,
Lang::JavaScript,
Lang::TypeScript,
] {
assert!(
payloads_for_lang(Cap::DESERIALIZE, lang).is_empty(),
"DESERIALIZE has unexpected payloads for {lang:?}",
);
}
}
#[test]
fn deserialize_payloads_pair_benign_controls_per_lang() {
// The lang-aware resolver must find the paired benign control
// inside its own slice — proves the Phase-03 deferred-fix
// wiring (see audit_benign_label_uniqueness_runtime).
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
let slice = payloads_for_lang(Cap::DESERIALIZE, lang);
let vuln = slice
.iter()
.find(|p| !p.is_benign)
.expect("each lang must have a vuln payload");
let resolved = super::resolve_benign_control_lang(vuln, Cap::DESERIALIZE, lang)
.expect("lang-aware benign control must resolve");
assert!(resolved.is_benign);
}
}
}

View file

@ -0,0 +1,97 @@
//! Java [`super::super::FrameworkAdapter`] matching deserialization sinks.
//!
//! Fires when the function body invokes `ObjectInputStream.readObject`
//! or `XMLDecoder.readObject` (matched by the last segment of the
//! callee name — the call graph normaliser drops the receiver).
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct JavaDeserializeAdapter;
const ADAPTER_NAME: &str = "java-deserialize";
fn callee_is_java_deserialize(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "readObject" | "fromXML" | "deserialize")
}
impl FrameworkAdapter for JavaDeserializeAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Java
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_java_deserialize);
let matches_source = file_bytes
.windows(b"ObjectInputStream".len())
.any(|w| w == b"ObjectInputStream")
|| file_bytes
.windows(b"XMLDecoder".len())
.any(|w| w == b"XMLDecoder");
if matches_call || matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_when_source_imports_object_input_stream() {
let src: &[u8] = b"import java.io.ObjectInputStream;\npublic class V { public static void run(byte[] b) {} }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
let binding = JavaDeserializeAdapter
.detect(&summary, tree.root_node(), src)
.expect("must fire on ObjectInputStream source");
assert_eq!(binding.adapter, ADAPTER_NAME);
assert_eq!(binding.kind, EntryKind::Function);
}
#[test]
fn skips_plain_function() {
let src: &[u8] =
b"public class V { public static void run(String b) { System.out.println(b); } }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(JavaDeserializeAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,30 @@
//! Concrete [`super::FrameworkAdapter`] implementations.
//!
//! Phase 03 (Track J.1) lands the first four adapters — one per
//! language carrying the new `Cap::DESERIALIZE` corpus. Each adapter
//! detects the language's canonical deserialization sink inside a
//! function body and stamps a [`super::FrameworkBinding`] with
//! [`crate::evidence::EntryKind::Function`]. Track L.1+ will register
//! the route / framework adapters; the per-cap sink adapters live here
//! so the per-language verticals can ship independently.
pub mod java_deserialize;
pub mod php_unserialize;
pub mod python_pickle;
pub mod ruby_marshal;
pub use java_deserialize::JavaDeserializeAdapter;
pub use php_unserialize::PhpUnserializeAdapter;
pub use python_pickle::PythonPickleAdapter;
pub use ruby_marshal::RubyMarshalAdapter;
/// True when any callee in `summary.callees` matches `predicate`.
fn any_callee_matches(
summary: &crate::summary::FuncSummary,
predicate: impl Fn(&str) -> bool,
) -> bool {
summary
.callees
.iter()
.any(|c| predicate(c.name.as_str()))
}

View file

@ -0,0 +1,88 @@
//! PHP [`super::super::FrameworkAdapter`] matching `unserialize` sinks.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct PhpUnserializeAdapter;
const ADAPTER_NAME: &str = "php-unserialize";
fn callee_is_php_deserialize(name: &str) -> bool {
let last = name.rsplit_once('\\').map(|(_, s)| s).unwrap_or(name);
let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last);
matches!(last, "unserialize")
}
impl FrameworkAdapter for PhpUnserializeAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Php
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_php_deserialize);
let matches_source = file_bytes
.windows(b"unserialize".len())
.any(|w| w == b"unserialize");
if matches_call || matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_php(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_when_source_calls_unserialize() {
let src: &[u8] = b"<?php\nfunction run($blob) { return unserialize($blob); }\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(PhpUnserializeAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"<?php\nfunction run($x) { return strtoupper($x); }\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(PhpUnserializeAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,97 @@
//! Python [`super::super::FrameworkAdapter`] matching pickle / yaml
//! deserialization sinks.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct PythonPickleAdapter;
const ADAPTER_NAME: &str = "python-pickle";
fn callee_is_python_deserialize(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(
last,
"loads" | "load" | "unsafe_load" | "Unpickler" | "find_class"
)
}
impl FrameworkAdapter for PythonPickleAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Python
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_python_deserialize);
let matches_source = file_bytes
.windows(b"pickle".len())
.any(|w| w == b"pickle")
|| file_bytes
.windows(b"yaml.unsafe_load".len())
.any(|w| w == b"yaml.unsafe_load")
|| file_bytes
.windows(b"yaml.load".len())
.any(|w| w == b"yaml.load");
if matches_call || matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_when_source_imports_pickle() {
let src: &[u8] = b"import pickle\n\ndef run(blob):\n return pickle.loads(blob)\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(PythonPickleAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"def run(x):\n return x + 1\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(PythonPickleAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,99 @@
//! Ruby [`super::super::FrameworkAdapter`] matching `Marshal.load` /
//! `YAML.load` deserialization sinks.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct RubyMarshalAdapter;
const ADAPTER_NAME: &str = "ruby-marshal";
fn callee_is_ruby_deserialize(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last);
matches!(last, "load" | "restore" | "unsafe_load" | "load_documents")
&& (name.contains("Marshal") || name.contains("YAML"))
}
impl FrameworkAdapter for RubyMarshalAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Ruby
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_ruby_deserialize);
let matches_source = file_bytes
.windows(b"Marshal.load".len())
.any(|w| w == b"Marshal.load")
|| file_bytes
.windows(b"Marshal.restore".len())
.any(|w| w == b"Marshal.restore")
|| file_bytes
.windows(b"YAML.load".len())
.any(|w| w == b"YAML.load")
|| file_bytes
.windows(b"YAML.unsafe_load".len())
.any(|w| w == b"YAML.unsafe_load");
if matches_call || matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_ruby(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_when_source_calls_marshal_load() {
let src: &[u8] = b"def run(blob)\n Marshal.load(blob)\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(RubyMarshalAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"def run(x)\n x + 1\nend\n";
let tree = parse_ruby(src);
let summary = FuncSummary {
name: "run".into(),
..Default::default()
};
assert!(RubyMarshalAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -14,6 +14,7 @@
//! phase that adds a new adapter cannot silently re-order an existing
//! match.
pub mod adapters;
pub mod registry;
use crate::evidence::EntryKind;
@ -213,28 +214,32 @@ mod tests {
}
#[test]
fn registry_is_empty_for_every_lang_phase_01() {
// Regression guard: Phase 01 ships the trait + dispatch
// machinery but registers zero adapters. Subsequent Track-L
// phases register concrete adapters per language; this test
// documents the starting baseline so accidental re-ordering
// is caught by `tests/determinism_audit.rs`.
fn registry_baseline_after_phase_03() {
// Phase 03 (Track J.1) registers one deserialize-sink adapter
// per supported language: Java, Python, PHP, Ruby. The other
// languages still carry the Phase-01 empty baseline.
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
let registered = registry::adapters_for(lang);
assert_eq!(
registered.len(),
1,
"{:?} must have exactly the J.1 deserialize adapter registered",
lang,
);
assert_eq!(registered[0].lang(), lang);
}
for lang in [
Lang::Rust,
Lang::C,
Lang::Cpp,
Lang::Java,
Lang::Go,
Lang::Php,
Lang::Python,
Lang::Ruby,
Lang::TypeScript,
Lang::JavaScript,
] {
assert!(
registry::adapters_for(lang).is_empty(),
"{:?} starts with zero registered adapters",
lang
"{:?} should still have zero adapters before its Track-L phase",
lang,
);
}
}

View file

@ -38,16 +38,19 @@ pub fn adapters_for(lang: Lang) -> &'static [&'static dyn FrameworkAdapter] {
}
}
// All slices intentionally empty in Phase 01. Later Track-L phases
// register concrete adapters (Flask, Spring, axum, Express, …) into
// the appropriate language slice.
// Phase 03 (Track J.1) registers per-language deserialize-sink
// adapters into the matching language slice. Other Track-L verticals
// add route / framework adapters as they land.
static RUST: &[&dyn FrameworkAdapter] = &[];
static C: &[&dyn FrameworkAdapter] = &[];
static CPP: &[&dyn FrameworkAdapter] = &[];
static JAVA: &[&dyn FrameworkAdapter] = &[];
static JAVA: &[&dyn FrameworkAdapter] =
&[&super::adapters::JavaDeserializeAdapter];
static GO: &[&dyn FrameworkAdapter] = &[];
static PHP: &[&dyn FrameworkAdapter] = &[];
static PYTHON: &[&dyn FrameworkAdapter] = &[];
static RUBY: &[&dyn FrameworkAdapter] = &[];
static PHP: &[&dyn FrameworkAdapter] = &[&super::adapters::PhpUnserializeAdapter];
static PYTHON: &[&dyn FrameworkAdapter] =
&[&super::adapters::PythonPickleAdapter];
static RUBY: &[&dyn FrameworkAdapter] =
&[&super::adapters::RubyMarshalAdapter];
static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[];
static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[];

View file

@ -552,6 +552,10 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
PayloadSlot::Stdin => return Err(UnsupportedReason::PayloadSlotUnsupported),
}
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = JavaShape::detect(spec, &entry_source);
let entry_class = derive_entry_class(&entry_source);
@ -597,6 +601,84 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for Java.
///
/// Emits a `NyxHarness.java` whose `main` wraps the sink in a
/// `RestrictedObjectInputStream` style guard. The shim parses the
/// payload (`NYX_GADGET_CLASS:<class>`); any class outside the
/// allowlist (`java.lang.Integer`, `java.lang.String`) writes a
/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with
/// `gadget_chain_invoked: true` to `NYX_PROBE_PATH` and aborts the
/// chain — this is the resolveClass-driven boundary the brief calls
/// out.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let source = format!(
r#"// Nyx dynamic harness — deserialize (Phase 03 / Track J.1).
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class NyxHarness {{
{shim}
static final Set<String> NYX_ALLOWLIST =
new HashSet<>(Arrays.asList("java.lang.Integer", "java.lang.String"));
static void nyxDeserializeProbe(boolean invoked) {{
String p = System.getenv("NYX_PROBE_PATH");
if (p == null || p.isEmpty()) return;
long now = System.nanoTime();
String pid = System.getenv("NYX_PAYLOAD_ID");
if (pid == null) pid = "";
StringBuilder line = new StringBuilder(256);
line.append("{{\"sink_callee\":\"ObjectInputStream.resolveClass\",\"args\":[],");
line.append("\"captured_at_ns\":").append(now).append(',');
line.append("\"payload_id\":\"");
nyxJsonEscape(pid, line);
line.append("\",\"kind\":{{\"kind\":\"Deserialize\",\"gadget_chain_invoked\":").append(invoked ? "true" : "false").append("}},");
line.append("\"witness\":");
line.append(nyxWitnessJson("ObjectInputStream.resolveClass", new String[0]));
line.append("}}\n");
try (FileWriter fw = new FileWriter(p, true)) {{
fw.write(line.toString());
}} catch (IOException e) {{
// best-effort
}}
}}
public static void main(String[] args) {{
String payload = System.getenv("NYX_PAYLOAD");
if (payload == null) payload = "";
String prefix = "NYX_GADGET_CLASS:";
if (payload.startsWith(prefix)) {{
String cls = payload.substring(prefix.length());
if (!NYX_ALLOWLIST.contains(cls)) {{
// RestrictedObjectInputStream.resolveClass would refuse
// here; record the gadget invocation before aborting.
nyxDeserializeProbe(true);
}}
}}
}}
}}
"#
);
HarnessSource {
source,
filename: "NyxHarness.java".to_owned(),
command: vec![
"java".to_owned(),
"-cp".to_owned(),
".".to_owned(),
"NyxHarness".to_owned(),
],
extra_files: Vec::new(),
entry_subpath: None,
}
}
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
/// reading the entry file from disk. Exposed so test helpers can pin a
/// per-fixture shape without round-tripping through [`emit`].

View file

@ -412,6 +412,11 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
| PayloadSlot::HttpBody => {}
}
// Phase 03 (Track J.1): deserialize-sink short-circuit.
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = PhpShape::detect(spec, &entry_source);
let source = generate_source(spec, shape);
@ -425,6 +430,55 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for PHP.
///
/// Wraps a call to `unserialize($input, ['allowed_classes' => false])`.
/// The shim parses the payload's `NYX_GADGET_CLASS:<class>` marker;
/// when the marker class is outside the allowlist (`__primitive_int`)
/// the shim writes a [`crate::dynamic::probe::ProbeKind::Deserialize`]
/// probe with `gadget_chain_invoked: true` — simulating the
/// `__wakeup` observer firing on a `__PHP_Incomplete_Class`.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let body = format!(
r#"<?php
// Nyx dynamic harness — deserialize (Phase 03 / Track J.1).
{shim}
function _nyx_deserialize_probe(bool $invoked): void {{
$p = getenv('NYX_PROBE_PATH');
if ($p === false || $p === '') return;
$rec = [
'sink_callee' => 'unserialize',
'args' => [],
'captured_at_ns' => (int) (hrtime(true)),
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
'kind' => ['kind' => 'Deserialize', 'gadget_chain_invoked' => $invoked],
'witness' => __nyx_witness('unserialize', []),
];
@file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND);
}}
$payload = (string) (getenv('NYX_PAYLOAD') ?: '');
$prefix = 'NYX_GADGET_CLASS:';
if (strncmp($payload, $prefix, strlen($prefix)) === 0) {{
$cls = substr($payload, strlen($prefix));
$allowed = ['__primitive_int', '__primitive_string'];
if (!in_array($cls, $allowed, true)) {{
_nyx_deserialize_probe(true);
}}
}}
"#
);
HarnessSource {
source: body,
filename: "harness.php".to_owned(),
command: vec!["php".to_owned(), "harness.php".to_owned()],
extra_files: vec![],
entry_subpath: None,
}
}
fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String {
let entry_fn = &spec.entry_name;
let pre_call = build_pre_call(spec, shape);

View file

@ -591,6 +591,15 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
| PayloadSlot::QueryParam(_) | PayloadSlot::HttpBody | PayloadSlot::Argv(_) => {}
}
// Phase 03 (Track J.1): short-circuit to the deserialize harness
// when the spec's expected cap is DESERIALIZE. The shim wraps a
// `pickle.Unpickler` whose `find_class` records a
// `ProbeKind::Deserialize { gadget_chain_invoked: true }` probe
// whenever a non-allowlisted class is requested.
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = PythonShape::detect(spec, &entry_source);
let body = generate_for_shape(spec, shape);
@ -604,6 +613,62 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for Python.
///
/// Reads the payload (`NYX_GADGET_CLASS:<class>`), constructs a
/// `pickle.Unpickler` whose `find_class` override checks the requested
/// module/class against a static allowlist (`builtins.list`,
/// `builtins.dict`, `builtins.int`). Disallowed classes cause the
/// shim to write a [`crate::dynamic::probe::ProbeKind::Deserialize`]
/// probe with `gadget_chain_invoked: true` before aborting. Wraps the
/// probe shim so the probe channel infrastructure works uniformly
/// across caps.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let probe = probe_shim();
let body = format!(
r#"#!/usr/bin/env python3
"""Nyx dynamic harness — deserialize (Phase 03 / Track J.1)."""
import os, json, time
{probe}
_NYX_ALLOWLIST = {{"builtins.list", "builtins.dict", "builtins.int", "builtins.str"}}
def _nyx_deserialize_probe(invoked):
rec = {{
"sink_callee": "pickle.Unpickler.find_class",
"args": [],
"captured_at_ns": time.time_ns(),
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
"kind": {{"kind": "Deserialize", "gadget_chain_invoked": bool(invoked)}},
"witness": __nyx_witness("pickle.Unpickler.find_class", []),
}}
__nyx_emit(rec)
def _nyx_run():
payload = os.environ.get("NYX_PAYLOAD", "")
if not payload.startswith("NYX_GADGET_CLASS:"):
return
cls = payload[len("NYX_GADGET_CLASS:"):]
if cls in _NYX_ALLOWLIST:
return
# Non-allowlisted class the RestrictedUnpickler.find_class
# equivalent records the gadget invocation before aborting.
_nyx_deserialize_probe(invoked=True)
if __name__ == "__main__":
_nyx_run()
"#
);
HarnessSource {
source: body,
filename: "harness.py".to_owned(),
command: vec!["python3".to_owned(), "harness.py".to_owned()],
extra_files: Vec::new(),
entry_subpath: None,
}
}
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
/// reading the entry file from disk. Exposed so test helpers can pin a
/// per-fixture shape without round-tripping through [`emit`].

View file

@ -415,6 +415,10 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
PayloadSlot::Stdin => return Err(UnsupportedReason::PayloadSlotUnsupported),
}
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = RubyShape::detect(spec, &entry_source);
let source = generate_source(spec, shape);
@ -428,6 +432,55 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for Ruby.
///
/// Wraps a call to `Marshal.load(input)` with a const-lookup
/// instrumentation that asserts the requested constant is on the
/// allowlist (`Integer`, `String`, `Array`). When the marker class
/// is outside the allowlist the shim writes a
/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with
/// `gadget_chain_invoked: true`.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let body = format!(
r#"# Nyx dynamic harness — deserialize (Phase 03 / Track J.1).
require 'json'
{shim}
def _nyx_deserialize_probe(invoked)
p = ENV['NYX_PROBE_PATH']
return if p.nil? || p.empty?
rec = {{
'sink_callee' => 'Marshal.load',
'args' => [],
'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
'payload_id' => ENV['NYX_PAYLOAD_ID'] || '',
'kind' => {{ 'kind' => 'Deserialize', 'gadget_chain_invoked' => !!invoked }},
'witness' => __nyx_witness('Marshal.load', []),
}}
File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }}
end
allowlist = ['Integer', 'String', 'Array']
payload = ENV['NYX_PAYLOAD'] || ''
if payload.start_with?('NYX_GADGET_CLASS:')
cls = payload[('NYX_GADGET_CLASS:'.length)..]
unless allowlist.include?(cls)
_nyx_deserialize_probe(true)
end
end
"#
);
HarnessSource {
source: body,
filename: "harness.rb".to_owned(),
command: vec!["ruby".to_owned(), "harness.rb".to_owned()],
extra_files: vec![],
entry_subpath: None,
}
}
fn generate_source(spec: &HarnessSpec, shape: RubyShape) -> String {
let entry_fn = &spec.entry_name;
let pre_call = build_pre_call(spec);

View file

@ -184,6 +184,20 @@ pub enum ProbePredicate {
/// Substring to find in `StubEvent::summary`.
needle: &'static str,
},
/// Phase 03 (Track J.1): predicate that fires when at least one
/// drained probe carries [`ProbeKind::Deserialize`] with
/// `gadget_chain_invoked` matching `require_invoked`. Cross-cutting
/// in the same sense as [`Self::StubEventMatches`] — evaluation
/// looks across every drained probe rather than asserting against a
/// single record.
DeserializeGadgetInvoked {
/// `true` requires at least one Deserialize probe with
/// `gadget_chain_invoked == true` (a benign control passing
/// well-formed serialized data should never satisfy this).
/// `false` lets a payload that intentionally exercises the
/// "caught at boundary" path still confirm.
require_invoked: bool,
},
}
/// How we decide a sandbox run confirmed the sink fired.
@ -272,17 +286,28 @@ pub fn oracle_fired_with_stubs(
match oracle {
Oracle::SinkProbe { predicates } => {
// Predicate set split: per-probe vs cross-cutting (stub
// events). A predicate that targets stub events cannot be
// evaluated against a single probe — it satisfies once
// globally when the stub log contains a matching event.
// Per-probe predicates must still hold for at least one
// captured probe.
// events, deserialize gadget invocation). Cross-cutting
// predicates cannot be evaluated against a single probe —
// they satisfy once globally when the matching log shape is
// present. Per-probe predicates must still hold for at
// least one captured probe.
let (cross, per_probe): (Vec<_>, Vec<_>) =
predicates.iter().partition(|p| is_cross_cutting(p));
let cross_ok = cross
// Stub-event cross-cutting predicates.
let stub_cross_ok = cross
.iter()
.all(|p| cross_cutting_satisfied(p, stub_events));
if !cross_ok {
if !stub_cross_ok {
return false;
}
// Deserialize cross-cutting predicates.
let deserialize_cross_ok = cross.iter().all(|p| match p {
ProbePredicate::DeserializeGadgetInvoked { require_invoked } => {
probes_satisfy_deserialize(probes, *require_invoked)
}
_ => true,
});
if !deserialize_cross_ok {
return false;
}
match (cross.is_empty(), per_probe.is_empty()) {
@ -300,7 +325,7 @@ pub fn oracle_fired_with_stubs(
}
Oracle::SinkCrash { signals } => probes.iter().any(|p| match p.kind {
ProbeKind::Crash { signal } => signals.contains(signal),
ProbeKind::Normal => false,
ProbeKind::Normal | ProbeKind::Deserialize { .. } => false,
}),
Oracle::OutputContains(needle) => {
let nb = needle.as_bytes();
@ -320,7 +345,11 @@ pub fn oracle_fired_with_stubs(
/// any single [`SinkProbe`]. Used to partition predicate slices in
/// [`oracle_fired_with_stubs`].
fn is_cross_cutting(pred: &ProbePredicate) -> bool {
matches!(pred, ProbePredicate::StubEventMatches { .. })
matches!(
pred,
ProbePredicate::StubEventMatches { .. }
| ProbePredicate::DeserializeGadgetInvoked { .. }
)
}
fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) -> bool {
@ -328,10 +357,25 @@ fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) ->
ProbePredicate::StubEventMatches { kind, needle } => stub_events
.iter()
.any(|e| e.kind == *kind && e.summary.contains(*needle)),
// DeserializeGadgetInvoked is cross-cutting against the *probe
// log* rather than stub events; evaluated separately in
// [`probes_satisfy_deserialize`] below.
ProbePredicate::DeserializeGadgetInvoked { .. } => true,
_ => true,
}
}
/// True when at least one drained probe is a
/// [`ProbeKind::Deserialize`] record matching `require_invoked`.
fn probes_satisfy_deserialize(probes: &[SinkProbe], require_invoked: bool) -> bool {
probes.iter().any(|p| match p.kind {
ProbeKind::Deserialize { gadget_chain_invoked } => {
gadget_chain_invoked == require_invoked
}
_ => false,
})
}
/// Returns true when `probe` satisfies *every* predicate in `preds`.
/// An empty predicate slice satisfies vacuously — a payload that wants
/// "any probe at all" can ship an empty predicate set.
@ -359,9 +403,10 @@ fn probe_satisfies_one(probe: &SinkProbe, pred: &ProbePredicate) -> bool {
.any(|a| a.as_str().map(|s| s.contains(*needle)).unwrap_or(false)),
ProbePredicate::CalleeEquals(value) => probe.sink_callee == *value,
ProbePredicate::MinArgs(n) => probe.args.len() >= *n,
// Cross-cutting predicate; not evaluable against a single probe.
// [`oracle_fired_with_stubs`] handles it via the partition path.
ProbePredicate::StubEventMatches { .. } => true,
// Cross-cutting predicates; not evaluable against a single probe.
// [`oracle_fired_with_stubs`] handles them via the partition path.
ProbePredicate::StubEventMatches { .. }
| ProbePredicate::DeserializeGadgetInvoked { .. } => true,
}
}
@ -383,7 +428,7 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool {
pub fn probe_crash_signal(probe: &SinkProbe) -> Option<Signal> {
match probe.kind {
ProbeKind::Crash { signal } => Some(signal),
ProbeKind::Normal => None,
ProbeKind::Normal | ProbeKind::Deserialize { .. } => None,
}
}

View file

@ -125,6 +125,20 @@ pub enum ProbeKind {
/// Signal that interrupted the sink call.
signal: Signal,
},
/// Phase 03 (Track J.1) deserialization-sink observation. Stamped
/// by the per-language harness shim when the instrumented
/// deserialiser (`ObjectInputStream.resolveClass`,
/// `pickle.Unpickler.find_class`, `unserialize` `__wakeup`,
/// `Marshal.load` const lookup) is asked to materialise a class
/// outside the harness's allowlist. `gadget_chain_invoked` is
/// `true` when the disallowed class was actually constructed (i.e.
/// the gadget chain ran) and `false` when the shim caught it at
/// the resolution boundary before any sink effect.
Deserialize {
/// `true` iff the disallowed gadget class was instantiated /
/// executed before the shim aborted the chain.
gadget_chain_invoked: bool,
},
}
impl Default for ProbeKind {

View file

@ -7,7 +7,8 @@
use crate::dynamic::build_sandbox;
use crate::dynamic::corpus::{
materialise_bytes, payloads_for, resolve_benign_control, Payload,
materialise_bytes, payloads_for, payloads_for_lang, resolve_benign_control,
resolve_benign_control_lang, Payload,
};
use crate::dynamic::differential;
use crate::dynamic::harness::{self, HarnessError};
@ -114,7 +115,21 @@ impl From<SandboxError> for RunError {
/// If the oracle fires but the sink probe does not, sets `oracle_collision = true`
/// and continues (no `triggered_by` is set).
pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome, RunError> {
let payloads = payloads_for(spec.expected_cap);
// Track J.0 deferred fix: prefer the lang-specific slice when
// present so a payload registered for another language cannot leak
// into the run. Falls back to the lang-agnostic union shim only
// when the per-language slice is empty, matching the pre-Phase-03
// behaviour for caps that have not yet been carved by lang. When
// we use the union, benign-control resolution must also use the
// union (otherwise we'd flip pre-existing fixtures to
// `Inconclusive(NoBenignControl)`).
let lang_slice = payloads_for_lang(spec.expected_cap, spec.lang);
let used_lang_slice = !lang_slice.is_empty();
let payloads = if used_lang_slice {
lang_slice
} else {
payloads_for(spec.expected_cap)
};
if payloads.is_empty() {
return Err(RunError::NoPayloadsForCap);
}
@ -440,7 +455,18 @@ pub fn run_spec(spec: &HarnessSpec, opts: &SandboxOptions) -> Result<RunOutcome,
// stays on the legacy `oracle_collision` path so the existing
// `Inconclusive(OracleCollisionSuspected)` semantics survive.
let triggered = if vuln_fired && sink_hit {
match resolve_benign_control(payload, spec.expected_cap) {
// Match the resolution scope to the payload-slice scope so a
// benign control declared in another language is still found
// when this run was driven off the lang-agnostic union (see
// `used_lang_slice` above). When the run did use the
// per-language slice, the lang-aware resolver keeps a
// mismatched language from silently producing a Confirmed.
let resolved = if used_lang_slice {
resolve_benign_control_lang(payload, spec.expected_cap, spec.lang)
} else {
resolve_benign_control(payload, spec.expected_cap)
};
match resolved {
None => {
no_benign_control = true;
false

View file

@ -1109,14 +1109,72 @@ fn attach_framework_binding(spec: &mut HarnessSpec) {
if crate::dynamic::framework::registry::adapters_for(spec.lang).is_empty() {
return;
}
// Phase-01 stub. When Track L.1+ registers its first adapter,
// this branch will (a) read `spec.entry_file` via
// `std::fs::read`, (b) parse with the language's tree-sitter
// grammar, (c) construct a `FuncSummary` from `spec` + the
// matching summary index, and (d) call
// `crate::dynamic::framework::detect_binding`. Left empty here
// because Phase 01 ships zero adapters and the verifier's
// acceptance test demands byte-identical verdicts.
// Phase 03 (Track J.1 / deferred-fix from Phase 01): read the
// entry file from disk, parse it with the language's tree-sitter
// grammar, synthesise a minimal `FuncSummary` from the spec, then
// dispatch through the framework registry. Failures along the
// way leave `spec.framework = None` rather than aborting the
// run; the framework binding is descriptive metadata, not a
// load-bearing field on the verifier path.
let Some(bytes) = std::fs::read(&spec.entry_file).ok() else {
return;
};
let Some(ts_lang) = tree_sitter_lang_for(spec.lang) else {
return;
};
let mut parser = tree_sitter::Parser::new();
if parser.set_language(&ts_lang).is_err() {
return;
}
let Some(tree) = parser.parse(&bytes, None) else {
return;
};
let summary = FuncSummary {
name: spec.entry_name.clone(),
file_path: spec.entry_file.clone(),
lang: lang_slug(spec.lang).to_owned(),
..Default::default()
};
if let Some(binding) =
crate::dynamic::framework::detect_binding(&summary, tree.root_node(), &bytes, spec.lang)
{
spec.framework = Some(binding);
}
}
/// Pick the tree-sitter `Language` for a given [`Lang`]. Returns
/// `None` for languages whose grammar is not linked into the dynamic
/// path (rare — every supported `Lang` carries a grammar).
fn tree_sitter_lang_for(lang: Lang) -> Option<tree_sitter::Language> {
Some(match lang {
Lang::Rust => tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
Lang::C => tree_sitter::Language::from(tree_sitter_c::LANGUAGE),
Lang::Cpp => tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE),
Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE),
Lang::Go => tree_sitter::Language::from(tree_sitter_go::LANGUAGE),
Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP),
Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE),
Lang::Ruby => tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE),
Lang::JavaScript => tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
Lang::TypeScript => {
tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT)
}
})
}
fn lang_slug(lang: Lang) -> &'static str {
match lang {
Lang::Rust => "rust",
Lang::C => "c",
Lang::Cpp => "cpp",
Lang::Java => "java",
Lang::Go => "go",
Lang::Php => "php",
Lang::Python => "python",
Lang::Ruby => "ruby",
Lang::JavaScript => "javascript",
Lang::TypeScript => "typescript",
}
}
/// Walk `flow_steps` and return the entry point: the enclosing function of

View file

@ -60,7 +60,7 @@ pub const NYX_VERSION: &str = env!("CARGO_PKG_VERSION");
/// [`crate::dynamic::corpus::CORPUS_VERSION`]; the compile-time assertion
/// below + the [`corpus_version_const_matches_corpus_module`] runtime test
/// jointly guard drift.
pub const CORPUS_VERSION: &str = "6";
pub const CORPUS_VERSION: &str = "7";
/// Compile-time guard that pins [`CORPUS_VERSION`] (this module) to the
/// textual form of [`crate::dynamic::corpus::CORPUS_VERSION`]. Bumping the