[pitboss] phase 03: Track J.1 + Track L.1 — DESERIALIZE corpus + Java/Python/PHP/Ruby adapters

This commit is contained in:
pitboss 2026-05-17 16:37:20 -05:00
parent 01fcaab310
commit 9dc60b51c0
33 changed files with 1625 additions and 53 deletions

View file

@ -552,6 +552,10 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
PayloadSlot::Stdin => return Err(UnsupportedReason::PayloadSlotUnsupported),
}
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = JavaShape::detect(spec, &entry_source);
let entry_class = derive_entry_class(&entry_source);
@ -597,6 +601,84 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for Java.
///
/// Emits a `NyxHarness.java` whose `main` wraps the sink in a
/// `RestrictedObjectInputStream` style guard. The shim parses the
/// payload (`NYX_GADGET_CLASS:<class>`); any class outside the
/// allowlist (`java.lang.Integer`, `java.lang.String`) writes a
/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with
/// `gadget_chain_invoked: true` to `NYX_PROBE_PATH` and aborts the
/// chain — this is the resolveClass-driven boundary the brief calls
/// out.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let source = format!(
r#"// Nyx dynamic harness — deserialize (Phase 03 / Track J.1).
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class NyxHarness {{
{shim}
static final Set<String> NYX_ALLOWLIST =
new HashSet<>(Arrays.asList("java.lang.Integer", "java.lang.String"));
static void nyxDeserializeProbe(boolean invoked) {{
String p = System.getenv("NYX_PROBE_PATH");
if (p == null || p.isEmpty()) return;
long now = System.nanoTime();
String pid = System.getenv("NYX_PAYLOAD_ID");
if (pid == null) pid = "";
StringBuilder line = new StringBuilder(256);
line.append("{{\"sink_callee\":\"ObjectInputStream.resolveClass\",\"args\":[],");
line.append("\"captured_at_ns\":").append(now).append(',');
line.append("\"payload_id\":\"");
nyxJsonEscape(pid, line);
line.append("\",\"kind\":{{\"kind\":\"Deserialize\",\"gadget_chain_invoked\":").append(invoked ? "true" : "false").append("}},");
line.append("\"witness\":");
line.append(nyxWitnessJson("ObjectInputStream.resolveClass", new String[0]));
line.append("}}\n");
try (FileWriter fw = new FileWriter(p, true)) {{
fw.write(line.toString());
}} catch (IOException e) {{
// best-effort
}}
}}
public static void main(String[] args) {{
String payload = System.getenv("NYX_PAYLOAD");
if (payload == null) payload = "";
String prefix = "NYX_GADGET_CLASS:";
if (payload.startsWith(prefix)) {{
String cls = payload.substring(prefix.length());
if (!NYX_ALLOWLIST.contains(cls)) {{
// RestrictedObjectInputStream.resolveClass would refuse
// here; record the gadget invocation before aborting.
nyxDeserializeProbe(true);
}}
}}
}}
}}
"#
);
HarnessSource {
source,
filename: "NyxHarness.java".to_owned(),
command: vec![
"java".to_owned(),
"-cp".to_owned(),
".".to_owned(),
"NyxHarness".to_owned(),
],
extra_files: Vec::new(),
entry_subpath: None,
}
}
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
/// reading the entry file from disk. Exposed so test helpers can pin a
/// per-fixture shape without round-tripping through [`emit`].

View file

@ -412,6 +412,11 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
| PayloadSlot::HttpBody => {}
}
// Phase 03 (Track J.1): deserialize-sink short-circuit.
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = PhpShape::detect(spec, &entry_source);
let source = generate_source(spec, shape);
@ -425,6 +430,55 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for PHP.
///
/// Wraps a call to `unserialize($input, ['allowed_classes' => false])`.
/// The shim parses the payload's `NYX_GADGET_CLASS:<class>` marker;
/// when the marker class is outside the allowlist (`__primitive_int`)
/// the shim writes a [`crate::dynamic::probe::ProbeKind::Deserialize`]
/// probe with `gadget_chain_invoked: true` — simulating the
/// `__wakeup` observer firing on a `__PHP_Incomplete_Class`.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let body = format!(
r#"<?php
// Nyx dynamic harness — deserialize (Phase 03 / Track J.1).
{shim}
function _nyx_deserialize_probe(bool $invoked): void {{
$p = getenv('NYX_PROBE_PATH');
if ($p === false || $p === '') return;
$rec = [
'sink_callee' => 'unserialize',
'args' => [],
'captured_at_ns' => (int) (hrtime(true)),
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
'kind' => ['kind' => 'Deserialize', 'gadget_chain_invoked' => $invoked],
'witness' => __nyx_witness('unserialize', []),
];
@file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND);
}}
$payload = (string) (getenv('NYX_PAYLOAD') ?: '');
$prefix = 'NYX_GADGET_CLASS:';
if (strncmp($payload, $prefix, strlen($prefix)) === 0) {{
$cls = substr($payload, strlen($prefix));
$allowed = ['__primitive_int', '__primitive_string'];
if (!in_array($cls, $allowed, true)) {{
_nyx_deserialize_probe(true);
}}
}}
"#
);
HarnessSource {
source: body,
filename: "harness.php".to_owned(),
command: vec!["php".to_owned(), "harness.php".to_owned()],
extra_files: vec![],
entry_subpath: None,
}
}
fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String {
let entry_fn = &spec.entry_name;
let pre_call = build_pre_call(spec, shape);

View file

@ -591,6 +591,15 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
| PayloadSlot::QueryParam(_) | PayloadSlot::HttpBody | PayloadSlot::Argv(_) => {}
}
// Phase 03 (Track J.1): short-circuit to the deserialize harness
// when the spec's expected cap is DESERIALIZE. The shim wraps a
// `pickle.Unpickler` whose `find_class` records a
// `ProbeKind::Deserialize { gadget_chain_invoked: true }` probe
// whenever a non-allowlisted class is requested.
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = PythonShape::detect(spec, &entry_source);
let body = generate_for_shape(spec, shape);
@ -604,6 +613,62 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for Python.
///
/// Reads the payload (`NYX_GADGET_CLASS:<class>`), constructs a
/// `pickle.Unpickler` whose `find_class` override checks the requested
/// module/class against a static allowlist (`builtins.list`,
/// `builtins.dict`, `builtins.int`). Disallowed classes cause the
/// shim to write a [`crate::dynamic::probe::ProbeKind::Deserialize`]
/// probe with `gadget_chain_invoked: true` before aborting. Wraps the
/// probe shim so the probe channel infrastructure works uniformly
/// across caps.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let probe = probe_shim();
let body = format!(
r#"#!/usr/bin/env python3
"""Nyx dynamic harness — deserialize (Phase 03 / Track J.1)."""
import os, json, time
{probe}
_NYX_ALLOWLIST = {{"builtins.list", "builtins.dict", "builtins.int", "builtins.str"}}
def _nyx_deserialize_probe(invoked):
rec = {{
"sink_callee": "pickle.Unpickler.find_class",
"args": [],
"captured_at_ns": time.time_ns(),
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
"kind": {{"kind": "Deserialize", "gadget_chain_invoked": bool(invoked)}},
"witness": __nyx_witness("pickle.Unpickler.find_class", []),
}}
__nyx_emit(rec)
def _nyx_run():
payload = os.environ.get("NYX_PAYLOAD", "")
if not payload.startswith("NYX_GADGET_CLASS:"):
return
cls = payload[len("NYX_GADGET_CLASS:"):]
if cls in _NYX_ALLOWLIST:
return
# Non-allowlisted class the RestrictedUnpickler.find_class
# equivalent records the gadget invocation before aborting.
_nyx_deserialize_probe(invoked=True)
if __name__ == "__main__":
_nyx_run()
"#
);
HarnessSource {
source: body,
filename: "harness.py".to_owned(),
command: vec!["python3".to_owned(), "harness.py".to_owned()],
extra_files: Vec::new(),
entry_subpath: None,
}
}
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
/// reading the entry file from disk. Exposed so test helpers can pin a
/// per-fixture shape without round-tripping through [`emit`].

View file

@ -415,6 +415,10 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
PayloadSlot::Stdin => return Err(UnsupportedReason::PayloadSlotUnsupported),
}
if spec.expected_cap == crate::labels::Cap::DESERIALIZE {
return Ok(emit_deserialize_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = RubyShape::detect(spec, &entry_source);
let source = generate_source(spec, shape);
@ -428,6 +432,55 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
})
}
/// Phase 03 — Track J.1 deserialize harness for Ruby.
///
/// Wraps a call to `Marshal.load(input)` with a const-lookup
/// instrumentation that asserts the requested constant is on the
/// allowlist (`Integer`, `String`, `Array`). When the marker class
/// is outside the allowlist the shim writes a
/// [`crate::dynamic::probe::ProbeKind::Deserialize`] probe with
/// `gadget_chain_invoked: true`.
pub fn emit_deserialize_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let body = format!(
r#"# Nyx dynamic harness — deserialize (Phase 03 / Track J.1).
require 'json'
{shim}
def _nyx_deserialize_probe(invoked)
p = ENV['NYX_PROBE_PATH']
return if p.nil? || p.empty?
rec = {{
'sink_callee' => 'Marshal.load',
'args' => [],
'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
'payload_id' => ENV['NYX_PAYLOAD_ID'] || '',
'kind' => {{ 'kind' => 'Deserialize', 'gadget_chain_invoked' => !!invoked }},
'witness' => __nyx_witness('Marshal.load', []),
}}
File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }}
end
allowlist = ['Integer', 'String', 'Array']
payload = ENV['NYX_PAYLOAD'] || ''
if payload.start_with?('NYX_GADGET_CLASS:')
cls = payload[('NYX_GADGET_CLASS:'.length)..]
unless allowlist.include?(cls)
_nyx_deserialize_probe(true)
end
end
"#
);
HarnessSource {
source: body,
filename: "harness.rb".to_owned(),
command: vec!["ruby".to_owned(), "harness.rb".to_owned()],
extra_files: vec![],
entry_subpath: None,
}
}
fn generate_source(spec: &HarnessSpec, shape: RubyShape) -> String {
let entry_fn = &spec.entry_name;
let pre_call = build_pre_call(spec);