[pitboss] phase 03: Track J.1 + Track L.1 — DESERIALIZE corpus + Java/Python/PHP/Ruby adapters

This commit is contained in:
pitboss 2026-05-17 16:37:20 -05:00
parent 01fcaab310
commit 9dc60b51c0
33 changed files with 1625 additions and 53 deletions

View file

@ -162,6 +162,41 @@ pub fn audit_cap_coverage_runtime() -> Result<(), String> {
Ok(())
}
/// Track J.0 deferred audit: a non-benign payload's `benign_control.label`
/// must be unique *within its own `(cap, lang)` slice* — and a benign
/// payload's label may not collide with any other benign label inside the
/// same cap across lang slices, otherwise the lang-agnostic union shim
/// could resolve a vuln payload in language A against a benign payload
/// declared in language B (the latent §4.1 bug captured in the deferred
/// queue).
pub fn audit_benign_label_uniqueness_runtime() -> Result<(), String> {
use std::collections::HashMap;
let mut by_cap: HashMap<u32, HashMap<&'static str, crate::symbol::Lang>> = HashMap::new();
for &(cap, lang, slice) in CORPUS.entries {
let bucket = by_cap.entry(cap.bits()).or_default();
for p in slice {
if !p.is_benign {
continue;
}
if let Some(prev_lang) = bucket.insert(p.label, lang) {
if prev_lang != lang {
return Err(format!(
"benign label {:?} for cap {:#x} is registered in both \
{:?} and {:?} lang-agnostic resolve_benign_control \
could match the wrong language",
p.label,
cap.bits(),
prev_lang,
lang,
));
}
}
}
}
Ok(())
}
#[cfg(test)]
mod corpus_registry {
use super::*;
@ -172,5 +207,7 @@ mod corpus_registry {
fn audit() {
audit_benign_controls_runtime().expect("benign_control audit failed");
audit_cap_coverage_runtime().expect("cap coverage audit failed");
audit_benign_label_uniqueness_runtime()
.expect("benign label uniqueness audit failed");
}
}

View file

@ -0,0 +1,66 @@
//! Java `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: a base64-encoded `java.io.ObjectInputStream` byte stream
//! that materialises a gadget class outside the harness's allowlist.
//! The harness's `RestrictedObjectInputStream.resolveClass` intercepts
//! the lookup and emits a `ProbeKind::Deserialize { gadget_chain_invoked
//! = true }` probe before aborting the chain.
//!
//! Benign control: a base64-encoded `ObjectInputStream` byte stream of a
//! single allow-listed `java.lang.Integer`. The class lives inside the
//! resolveClass allowlist so no Deserialize probe is emitted.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
// Marker class name embedded in the serialized stream — the
// harness allowlist contains `java.lang.Integer` and `java.lang.String`
// only. The byte form is a small literal so const-eval can keep it.
bytes: b"NYX_GADGET_CLASS:org.nyx.deserialize.Gadget",
label: "java-deserialize-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/java/vuln.java",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "java-deserialize-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
// Allow-listed payload — the marker carries `java.lang.Integer`,
// which the harness resolveClass accepts without writing a probe.
bytes: b"NYX_GADGET_CLASS:java.lang.Integer",
label: "java-deserialize-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/java/benign.java",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,17 @@
//! Deserialization (`Cap::DESERIALIZE`) per-language payload slices.
//!
//! Phase 03 (Track J.1) lands the first cap end-to-end: Java
//! (`ObjectInputStream.readObject` / `XMLDecoder`), Python (`pickle.loads`
//! / `yaml.unsafe_load`), PHP (`unserialize`), and Ruby (`Marshal.load`
//! / `YAML.load`). Every vuln payload is paired with a benign control
//! whose oracle should *not* fire — the per-language harness shims
//! emit a [`crate::dynamic::probe::ProbeKind::Deserialize`] record with
//! `gadget_chain_invoked: true` when a non-allowlisted gadget class is
//! materialised by the instrumented deserialiser; benign well-formed
//! serialized data does not reach the allowlist boundary and so leaves
//! no Deserialize probe.
pub mod java;
pub mod php;
pub mod python;
pub mod ruby;

View file

@ -0,0 +1,64 @@
//! PHP `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string handed to `unserialize($input)` where the
//! harness wraps the call with `['allowed_classes' => false]` and an
//! observer on `__wakeup`. When `unserialize` materialises a
//! `__PHP_Incomplete_Class` from a non-allowlisted class name, the
//! observer emits a `ProbeKind::Deserialize { gadget_chain_invoked:
//! true }` probe.
//!
//! Benign control: serialised primitive (an `int`) that
//! `unserialize` materialises without engaging the allowlist boundary.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:PHP_Object_Injection_RCE",
label: "php-unserialize-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/php/vuln.php",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "php-unserialize-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
// Allow-listed marker — the harness allowlist accepts
// `__primitive_int` as a no-op type representing a serialised
// integer literal.
bytes: b"NYX_GADGET_CLASS:__primitive_int",
label: "php-unserialize-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/php/benign.php",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,60 @@
//! Python `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string consumed by the harness shim which calls
//! `pickle.Unpickler(...).load()` with `find_class` overridden to record
//! a `ProbeKind::Deserialize { gadget_chain_invoked: true }` whenever a
//! non-allowlisted class is requested. The harness allowlists
//! `builtins.list` / `builtins.dict` / `builtins.int`; the marker class
//! `nyx.gadget.RCE` is outside that set.
//!
//! Benign control: payload requests only allow-listed builtins.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:nyx.gadget.RCE",
label: "python-pickle-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/python/vuln.py",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "python-pickle-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:builtins.list",
label: "python-pickle-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/python/benign.py",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,61 @@
//! Ruby `Cap::DESERIALIZE` payloads.
//!
//! Vuln payload: marker string consumed by the harness shim which calls
//! `Marshal.load(input)` with `Marshal.const_defined?`-style
//! instrumentation that records a `ProbeKind::Deserialize {
//! gadget_chain_invoked: true }` probe whenever a non-allowlisted
//! constant is materialised. The harness allowlist contains `Integer`
//! / `String` / `Array`.
//!
//! Benign control: marker requests only the allow-listed `Integer`
//! constant.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:Nyx::Gadget::RCE",
label: "ruby-marshal-gadget",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/ruby/vuln.rb",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
benign_control: Some(PayloadRef {
label: "ruby-marshal-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: b"NYX_GADGET_CLASS:Integer",
label: "ruby-marshal-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::DeserializeGadgetInvoked {
require_invoked: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 7,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/deserialize/ruby/benign.rb",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -23,7 +23,7 @@
use std::collections::HashMap;
use std::sync::OnceLock;
use super::{cmdi, fmt_string, path_trav, sqli, ssrf, xss};
use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, xss};
use super::{CapCorpus, CuratedPayload, Oracle};
use crate::dynamic::oracle::ProbePredicate;
use crate::labels::Cap;
@ -37,7 +37,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits()
| Cap::SHELL_ESCAPE.bits()
| Cap::URL_ENCODE.bits()
| Cap::JSON_PARSE.bits()
| Cap::DESERIALIZE.bits()
| Cap::CRYPTO.bits()
| Cap::UNAUTHORIZED_ID.bits()
| Cap::DATA_EXFIL.bits()
@ -58,6 +57,10 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[
(Cap::SSRF, Lang::Rust, ssrf::rust::PAYLOADS),
(Cap::HTML_ESCAPE, Lang::Rust, xss::rust::PAYLOADS),
(Cap::FMT_STRING, Lang::C, fmt_string::c::PAYLOADS),
(Cap::DESERIALIZE, Lang::Java, deserialize::java::PAYLOADS),
(Cap::DESERIALIZE, Lang::Python, deserialize::python::PAYLOADS),
(Cap::DESERIALIZE, Lang::Php, deserialize::php::PAYLOADS),
(Cap::DESERIALIZE, Lang::Ruby, deserialize::ruby::PAYLOADS),
];
/// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by
@ -114,10 +117,23 @@ pub fn payloads_for(cap: Cap) -> &'static [CuratedPayload] {
}
/// Return the (first) benign control payload for a cap, if one exists.
///
/// Lang-agnostic union shim — searches every registered `(cap, lang)`
/// slice in declaration order. Prefer [`benign_payload_for_lang`] when
/// the caller knows the harness's [`Lang`] so cross-language label
/// collisions (e.g. an `ssrf-benign` label registered for both Rust and
/// Python) cannot resolve to a wrong-language fixture.
pub fn benign_payload_for(cap: Cap) -> Option<&'static CuratedPayload> {
payloads_for(cap).iter().find(|p| p.is_benign)
}
/// Lang-aware [`benign_payload_for`]. Restricts the search to the
/// requested `(cap, lang)` slice so a payload's benign control is
/// always resolved inside the same language vertical.
pub fn benign_payload_for_lang(cap: Cap, lang: Lang) -> Option<&'static CuratedPayload> {
payloads_for_lang(cap, lang).iter().find(|p| p.is_benign)
}
/// Resolve a [`CuratedPayload::benign_control`] reference to the matching
/// benign entry inside the same cap's payload slice (across all langs).
///
@ -126,6 +142,13 @@ pub fn benign_payload_for(cap: Cap) -> Option<&'static CuratedPayload> {
/// non-benign in the corpus. The runner treats the `None` result as
/// `NoControl` and downgrades the verdict to
/// [`crate::evidence::InconclusiveReason::NoBenignControl`].
///
/// Lang-agnostic union shim — kept for the small set of pre-Phase-03
/// callers that do not carry a [`Lang`] at the call site. Prefer
/// [`resolve_benign_control_lang`] in any new code: with multiple
/// `(cap, lang)` slices registered for the same cap, the union shim
/// can match a wrong-language fixture's label and silently confirm
/// against a benign that never ran.
pub fn resolve_benign_control(
vuln_payload: &CuratedPayload,
cap: Cap,
@ -136,6 +159,22 @@ pub fn resolve_benign_control(
.find(|p| p.is_benign && p.label == r.label)
}
/// Lang-aware [`resolve_benign_control`]. Restricts the search to the
/// `(cap, lang)` slice that produced the vuln payload so the
/// differential rule (§4.1) can never compare against a wrong-language
/// benign even when two language slices share a label. Phase 03 wires
/// this through [`crate::dynamic::runner`].
pub fn resolve_benign_control_lang(
vuln_payload: &CuratedPayload,
cap: Cap,
lang: Lang,
) -> Option<&'static CuratedPayload> {
let r = vuln_payload.benign_control?;
payloads_for_lang(cap, lang)
.iter()
.find(|p| p.is_benign && p.label == r.label)
}
/// Materialise the effective bytes for a payload.
///
/// For static payloads (`oob_nonce_slot == false`) returns the `bytes`
@ -237,7 +276,6 @@ mod tests {
Cap::SHELL_ESCAPE,
Cap::URL_ENCODE,
Cap::JSON_PARSE,
Cap::DESERIALIZE,
Cap::CRYPTO,
Cap::UNAUTHORIZED_ID,
Cap::DATA_EXFIL,
@ -275,6 +313,7 @@ mod tests {
Cap::FILE_IO,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
] {
let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign);
assert!(has_vuln, "{cap:?} must have at least one vuln payload");
@ -321,6 +360,7 @@ mod tests {
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
];
for cap in caps {
for p in payloads_for(cap) {
@ -342,6 +382,7 @@ mod tests {
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
];
for cap in caps {
for p in payloads_for(cap) {
@ -450,6 +491,7 @@ mod tests {
Cap::SSRF,
Cap::HTML_ESCAPE,
Cap::FMT_STRING,
Cap::DESERIALIZE,
];
for cap in caps {
for p in payloads_for(cap).iter().filter(|p| p.is_benign) {
@ -474,10 +516,23 @@ mod tests {
#[test]
fn back_compat_union_matches_registered_entry() {
// With one (cap, lang) entry per cap, the union must contain the
// same labels as the underlying slice (byte-identical verdict
// requirement, Phase 02 acceptance).
// For caps with one (cap, lang) entry only, the lang-agnostic
// union must contain the same labels as the underlying slice
// (byte-identical verdict requirement, Phase 02 acceptance).
// Phase 03 introduces multi-lang caps (DESERIALIZE), so single-
// entry caps are filtered separately from the union check.
use std::collections::HashMap;
let mut entries_by_cap: HashMap<u32, Vec<(Lang, &'static [CuratedPayload])>> =
HashMap::new();
for &(cap, lang, slice) in CORPUS.entries {
entries_by_cap.entry(cap.bits()).or_default().push((lang, slice));
}
for (cap_bits, langs) in &entries_by_cap {
if langs.len() != 1 {
continue;
}
let (lang, slice) = langs[0];
let cap = Cap::from_bits_truncate(*cap_bits);
let union = payloads_for(cap);
assert_eq!(
union.len(),
@ -490,4 +545,49 @@ mod tests {
}
}
}
#[test]
fn deserialize_has_per_lang_slices_for_phase_03() {
// Phase 03 (Track J.1) acceptance: DESERIALIZE registers
// payloads in Java / Python / PHP / Ruby and the lang-aware
// lookup never returns empty for any of them.
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
assert!(
!payloads_for_lang(Cap::DESERIALIZE, lang).is_empty(),
"DESERIALIZE must have at least one payload for {lang:?}",
);
}
// Rust / C / Go / JS / TS / Cpp not yet covered — those slices
// remain empty.
for lang in [
Lang::Rust,
Lang::C,
Lang::Cpp,
Lang::Go,
Lang::JavaScript,
Lang::TypeScript,
] {
assert!(
payloads_for_lang(Cap::DESERIALIZE, lang).is_empty(),
"DESERIALIZE has unexpected payloads for {lang:?}",
);
}
}
#[test]
fn deserialize_payloads_pair_benign_controls_per_lang() {
// The lang-aware resolver must find the paired benign control
// inside its own slice — proves the Phase-03 deferred-fix
// wiring (see audit_benign_label_uniqueness_runtime).
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
let slice = payloads_for_lang(Cap::DESERIALIZE, lang);
let vuln = slice
.iter()
.find(|p| !p.is_benign)
.expect("each lang must have a vuln payload");
let resolved = super::resolve_benign_control_lang(vuln, Cap::DESERIALIZE, lang)
.expect("lang-aware benign control must resolve");
assert!(resolved.is_benign);
}
}
}