[pitboss/grind] deferred session-0010 (20260520T233019Z-6958)

This commit is contained in:
pitboss 2026-05-21 05:48:48 -05:00
parent 38cc0ce05f
commit 280121607e
12 changed files with 483 additions and 28 deletions

View file

@ -848,9 +848,14 @@ mod tests {
fn xxe_payloads_pair_benign_controls_per_lang() {
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
let slice = payloads_for_lang(Cap::XXE, lang);
// Skip OOB-nonce variants: they self-confirm via the per-finding
// listener callback (see `xxe-<lang>-oob-nonce` in
// `src/dynamic/corpus/xxe/<lang>.rs`) and carry no paired benign
// control because a benign URL structurally cannot hit the nonce
// path. The doctype-entity vuln is the one that pairs.
let vuln = slice
.iter()
.find(|p| !p.is_benign)
.find(|p| !p.is_benign && !p.oob_nonce_slot)
.expect("each lang must have an XXE vuln payload");
let resolved = super::resolve_benign_control_lang(vuln, Cap::XXE, lang)
.expect("lang-aware benign control must resolve");

View file

@ -9,11 +9,38 @@
//! Benign control: a well-formed XML document with no doctype, so the
//! decoder has no entity to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's
//! `nyxBuildXxeDocument` helper performs a real `http.Client.Get`
//! against the loopback URL so the listener records the per-finding
//! nonce. Ordered first so iteration exercises OOB before the
//! doctype-entity vuln triggers and short-circuits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-go-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/go/vuln.go",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when the Go harness performs the loopback GET before \
building the DTD; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -10,11 +10,39 @@
//! declaration so the parser has no entity to resolve. The harness's
//! instrumented parser writes `entity_expanded: false`, the oracle
//! does not fire, and the differential rule (§4.1) stays clean.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's `EntityResolver`
//! hook performs a real `HttpURLConnection.openConnection().getInputStream()`
//! against the loopback URL so the listener records the per-finding nonce.
//! Ordered first so the runner exercises the OOB observation path before
//! the doctype-entity vuln below triggers and short-circuits iteration;
//! runs without a listener skip cleanly (runner `oob_nonce_slot` branch).
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-java-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/java/Vuln.java",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when DocumentBuilder's EntityResolver fetches the \
loopback URL; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -9,11 +9,38 @@
//! Benign control: a well-formed XML document with no doctype, so
//! the parser has no entity to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's
//! `libxml_set_external_entity_loader` callback performs a real
//! `file_get_contents` against the loopback URL so the listener records
//! the per-finding nonce. Ordered first so iteration exercises OOB
//! before the doctype-entity vuln triggers and short-circuits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-php-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/php/vuln.php",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when libxml's external-entity loader fetches the \
loopback URL; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -9,11 +9,49 @@
//! Benign control: a well-formed XML document with no doctype, so the
//! parser has nothing to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`], the runner materialises this
//! payload's bytes as a loopback URL and the Python harness wraps the
//! URL into `<!ENTITY xxe SYSTEM "URL">`. Expat's external-entity hook
//! performs a real `urllib.request.urlopen` against the URL so the
//! listener records the per-finding nonce. Ordered first so the runner
//! exercises the OOB observation path before the doctype-entity vuln
//! triggers and short-circuits the iteration; runs without a listener
//! skip cleanly (the runner's `oob_nonce_slot` branch `continue`s when
//! [`crate::dynamic::sandbox::SandboxOptions::oob_listener`] is None).
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
// OOB-nonce XXE variant. Ordered first so the harness exercises the
// OOB observation path before the doctype-entity vuln below triggers
// and breaks iteration. Self-confirming via [`Oracle::OobCallback`];
// no paired benign control because a benign URL can never hit the
// per-finding nonce path. Runs only when an [`OobListener`] is
// attached; the runner's `oob_nonce_slot` branch skips otherwise.
CuratedPayload {
bytes: b"",
label: "xxe-python-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/python/vuln.py",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when expat's external-entity hook fetches the loopback \
URL; no benign URL can hit the nonce path so no paired control \
is meaningful.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -8,11 +8,38 @@
//! Benign control: a well-formed XML document with no doctype, so
//! the parser has no entity to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's
//! `_nyx_build_xxe_document` helper performs a real `Net::HTTP.start`
//! against the loopback URL so the listener records the per-finding
//! nonce. Ordered first so iteration exercises OOB before the
//! doctype-entity vuln triggers and short-circuits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-ruby-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when the Ruby harness performs the loopback GET before \
building the DTD; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -645,6 +645,7 @@ import (
"encoding/xml"
"fmt"
"io"
"net/http"
"os"
"os/signal"
"strings"
@ -654,6 +655,33 @@ import (
{shim}
// nyxBuildXxeDocument builds the XML document fed into the decoder.
// Two shapes (Phase 05 OOB closure, 2026-05-21):
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as
// the SYSTEM URL of an external entity and wrap into a canonical
// XXE DTD. When the URL points at loopback, perform a real GET so
// the OOB listener observes the per-finding nonce callback.
// - Anything else: treat as the full XML document (existing Phase 05
// shape).
func nyxBuildXxeDocument(payload string) string {{
if strings.HasPrefix(payload, "http://") || strings.HasPrefix(payload, "https://") {{
if strings.HasPrefix(payload, "http://127.0.0.1") ||
strings.HasPrefix(payload, "http://host-gateway") ||
strings.HasPrefix(payload, "http://localhost") {{
client := &http.Client{{Timeout: 2 * time.Second}}
if resp, err := client.Get(payload); err == nil {{
_, _ = io.Copy(io.Discard, resp.Body)
resp.Body.Close()
}}
}}
escaped := strings.ReplaceAll(payload, "&", "&amp;")
escaped = strings.ReplaceAll(escaped, "\"", "&quot;")
escaped = strings.ReplaceAll(escaped, "<", "&lt;")
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" + escaped + "\">\n]>\n<data>&xxe;</data>"
}}
return payload
}}
func nyxXmlParse(payload string) bool {{
// Real parser hook: walk Go's encoding/xml.Decoder token stream.
// The decoder parses <!DOCTYPE name [<!ENTITY x SYSTEM "uri">]>
@ -664,7 +692,8 @@ func nyxXmlParse(payload string) bool {{
// resolution boundary firing.
expanded := false
sawSystem := false
decoder := xml.NewDecoder(strings.NewReader(payload))
doc := nyxBuildXxeDocument(payload)
decoder := xml.NewDecoder(strings.NewReader(doc))
for {{
tok, err := decoder.Token()
if err != nil {{

View file

@ -952,6 +952,8 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.net.HttpURLConnection;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.xml.sax.EntityResolver;
@ -963,6 +965,21 @@ public class NyxHarness {{
static boolean nyxLastExpanded = false;
// Build the XML document fed into the parser. Two shapes (Phase 05
// OOB closure, 2026-05-21):
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as
// the SYSTEM URL of an external entity and wrap into a canonical
// XXE DTD. The entity-resolver hook will perform the loopback
// GET so the OOB listener observes the per-finding nonce.
// - Anything else: treat as the full XML document (existing shape).
static String nyxBuildXxeDocument(String payload) {{
if (payload.startsWith("http://") || payload.startsWith("https://")) {{
String escaped = payload.replace("&", "&amp;").replace("\"", "&quot;").replace("<", "&lt;");
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" + escaped + "\">\n]>\n<data>&xxe;</data>";
}}
return payload;
}}
static void nyxXmlParse(String payload) {{
nyxLastExpanded = false;
try {{
@ -971,20 +988,36 @@ public class NyxHarness {{
// entity resolution enabled" target: leave the factory at
// default settings (which historically permit doctype +
// external entities) and rely on the EntityResolver hook
// to short-circuit the actual fetch.
// to control fetch behaviour.
DocumentBuilder db = dbf.newDocumentBuilder();
db.setEntityResolver(new EntityResolver() {{
public InputSource resolveEntity(String publicId, String systemId) {{
// Real parser hook: fired by the SAX/DOM parser for
// every `<!ENTITY x SYSTEM "...">` reference. Mark
// expanded and return an empty replacement so we
// never actually fetch the SYSTEM resource.
// expanded. When the SYSTEM URL points at loopback
// HTTP, perform a real GET so the OOB listener can
// observe the callback (Phase 05 OOB closure). Any
// other scheme returns an empty replacement (no fetch).
nyxLastExpanded = true;
if (systemId != null && (systemId.startsWith("http://127.0.0.1")
|| systemId.startsWith("http://host-gateway")
|| systemId.startsWith("http://localhost"))) {{
try {{
HttpURLConnection conn = (HttpURLConnection) new URL(systemId).openConnection();
conn.setConnectTimeout(2000);
conn.setReadTimeout(2000);
conn.getInputStream().close();
conn.disconnect();
}} catch (Exception ignored) {{
// best-effort OOB fetch
}}
}}
return new InputSource(new StringReader(""));
}}
}});
try {{
db.parse(new InputSource(new StringReader(payload)));
String doc = nyxBuildXxeDocument(payload);
db.parse(new InputSource(new StringReader(doc)));
}} catch (SAXException | IOException e) {{
// Malformed XML still counts as a parser invocation;
// expanded flag reflects whatever the hook saw before

View file

@ -685,13 +685,38 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
// Nyx dynamic harness — XXE simplexml_load_string (Phase 05 / Track J.3).
{shim}
// Build the XML document fed into the parser. Two shapes (Phase 05 OOB
// closure, 2026-05-21):
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as the
// SYSTEM URL of an external entity and wrap into a canonical XXE
// DTD. The external-entity loader hook below performs the loopback
// GET so the OOB listener observes the per-finding nonce.
// - Anything else: treat as the full XML document (existing shape).
function _nyx_build_xxe_document(string $payload): string {{
if (str_starts_with($payload, 'http://') || str_starts_with($payload, 'https://')) {{
$escaped = str_replace(['&', '"', '<'], ['&amp;', '&quot;', '&lt;'], $payload);
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" . $escaped . "\">\n]>\n<data>&xxe;</data>";
}}
return $payload;
}}
function _nyx_libxml_parse(string $payload): bool {{
$expanded = false;
// Real parser hook: libxml calls this for every <!ENTITY name SYSTEM "uri">
// reference resolved in the document. We mark expanded and
// return null so the parser does not actually fetch the resource.
// reference resolved in the document. Mark expanded. When the
// SYSTEM URL points at loopback HTTP, perform a real fetch so the
// OOB listener observes the callback (Phase 05 OOB closure); other
// schemes return null so the parser substitutes empty.
libxml_set_external_entity_loader(function ($public, $system, $context) use (&$expanded) {{
$expanded = true;
if (is_string($system) && (
str_starts_with($system, 'http://127.0.0.1')
|| str_starts_with($system, 'http://host-gateway')
|| str_starts_with($system, 'http://localhost')
)) {{
$ctx = stream_context_create(['http' => ['timeout' => 2, 'ignore_errors' => true]]);
@file_get_contents($system, false, $ctx);
}}
return null;
}});
$prev_errors = libxml_use_internal_errors(true);
@ -699,7 +724,8 @@ function _nyx_libxml_parse(string $payload): bool {{
// the resolved body) and LIBXML_DTDLOAD allows the parser to load
// the DTD declarations — the combination real XXE-vulnerable PHP
// code passes to `simplexml_load_string`.
@simplexml_load_string($payload, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
$doc = _nyx_build_xxe_document($payload);
@simplexml_load_string($doc, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
libxml_clear_errors();
libxml_use_internal_errors($prev_errors);
// Reset the loader to default so nothing leaks across runs.

View file

@ -1452,10 +1452,32 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
r#"#!/usr/bin/env python3
"""Nyx dynamic harness — XXE xml.parsers.expat (Phase 05 / Track J.3)."""
import os, json, sys, time
import urllib.request as _nyx_urlreq
import xml.parsers.expat as _nyx_expat
{probe}
# Build the XML document fed into expat. Two shapes:
# - URL-form NYX_PAYLOAD (`http://...` or `https://...`): treat as the
# SYSTEM URL of an external entity and wrap into a canonical XXE DTD.
# The OOB-nonce payload variant emits a loopback URL here so the
# external-ref hook performs a real HTTP GET that the OOB listener
# observes (Phase 05 OOB closure, 2026-05-21).
# - Anything else: treat NYX_PAYLOAD as the full XML document
# (existing Phase 05 shape).
def _nyx_xxe_document(payload):
p = payload if isinstance(payload, str) else payload.decode("utf-8", "replace")
if p.startswith("http://") or p.startswith("https://"):
url = p.replace("&", "&amp;").replace('"', "&quot;").replace("<", "&lt;")
return (
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE data [\n"
" <!ENTITY xxe SYSTEM \"" + url + "\">\n"
"]>\n"
"<data>&xxe;</data>"
)
return p
def _nyx_xxe_parse(payload):
expanded = [False]
parser = _nyx_expat.ParserCreate()
@ -1469,9 +1491,18 @@ def _nyx_xxe_parse(payload):
def _external_ref(context, base, system_id, public_id):
# Real parser hook: fired by expat for every `<!ENTITY x SYSTEM "...">`
# reference inside element bodies / DTD. Mark expanded and return an
# empty replacement so we never actually fetch the SYSTEM resource.
# reference inside element bodies / DTD. Mark expanded. When the
# SYSTEM URL points at loopback HTTP, perform a real GET so the OOB
# listener can observe the callback (Phase 05 OOB closure). Any
# other scheme returns an empty replacement (no fetch).
expanded[0] = True
if system_id and (system_id.startswith("http://127.0.0.1")
or system_id.startswith("http://host-gateway")
or system_id.startswith("http://localhost")):
try:
_nyx_urlreq.urlopen(system_id, timeout=2).read()
except Exception:
pass
sub = parser.ExternalEntityParserCreate(context, "utf-8")
try:
sub.Parse("", 1)
@ -1480,9 +1511,9 @@ def _nyx_xxe_parse(payload):
return 1
parser.ExternalEntityRefHandler = _external_ref
payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else payload
doc = _nyx_xxe_document(payload)
try:
parser.Parse(payload_bytes, 1)
parser.Parse(doc.encode("utf-8", "replace"), 1)
except _nyx_expat.ExpatError:
# Malformed XML still counts as a parser invocation; expanded
# flag reflects whatever the hook saw before the error.

View file

@ -985,20 +985,50 @@ STDOUT.flush
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let body = format!(
r#"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
r##"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
require 'json'
require 'net/http'
require 'rexml/document'
require 'stringio'
require 'uri'
{shim}
# Build the XML document fed into REXML. Two shapes (Phase 05 OOB
# closure, 2026-05-21):
# - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as the
# SYSTEM URL of an external entity and wrap into a canonical XXE
# DTD. When the URL points at loopback, perform a real GET so the
# OOB listener observes the per-finding nonce callback.
# - Anything else: treat as the full XML document (existing shape).
def _nyx_build_xxe_document(payload)
if payload.start_with?('http://') || payload.start_with?('https://')
if payload.start_with?('http://127.0.0.1') ||
payload.start_with?('http://host-gateway') ||
payload.start_with?('http://localhost')
begin
uri = URI.parse(payload)
Net::HTTP.start(uri.host, uri.port, open_timeout: 2, read_timeout: 2) do |http|
http.request_get(uri.request_uri)
end
rescue StandardError
# best-effort OOB fetch
end
end
escaped = payload.gsub('&', '&amp;').gsub('"', '&quot;').gsub('<', '&lt;')
"<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"#{{escaped}}\">\n]>\n<data>&xxe;</data>"
else
payload
end
end
def _nyx_libxml_parse(payload)
# Real parser hook: REXML parses `<!ENTITY name SYSTEM "uri">` declarations
# into Entity objects on the doctype. Inspect the entities table to
# detect every external-entity reference the parser registered.
expanded = false
begin
doc = REXML::Document.new(payload)
doc = REXML::Document.new(_nyx_build_xxe_document(payload))
if doc.doctype
doc.doctype.entities.each_value do |ent|
s = ent.to_s
@ -1042,7 +1072,7 @@ _nyx_xxe_probe(payload, expanded)
STDOUT.puts '__NYX_SINK_HIT__'
STDOUT.puts JSON.generate({{"entity_expanded" => expanded}})
STDOUT.flush
"#
"##
);
HarnessSource {
source: body,

View file

@ -83,7 +83,12 @@ fn xxe_unsupported_caps_unchanged_for_other_langs() {
fn benign_control_resolves_within_lang_slice() {
for lang in LANGS {
let slice = payloads_for_lang(Cap::XXE, *lang);
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
// Skip the OOB-nonce variant — it self-confirms via
// [`Oracle::OobCallback`] and carries no paired benign control.
let vuln = slice
.iter()
.find(|p| !p.is_benign && !p.oob_nonce_slot)
.unwrap();
let resolved =
resolve_benign_control_lang(vuln, Cap::XXE, *lang).expect("paired control");
assert!(resolved.is_benign);
@ -96,7 +101,13 @@ fn benign_control_resolves_within_lang_slice() {
fn payload_oracle_carries_xxe_entity_expanded_predicate() {
for lang in LANGS {
let slice = payloads_for_lang(Cap::XXE, *lang);
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
// The doctype-entity vuln carries the XxeEntityExpanded predicate.
// The OOB-nonce variant uses [`Oracle::OobCallback`] and is exercised
// by `python_xxe_oob_loopback_records_callback` instead.
let vuln = slice
.iter()
.find(|p| !p.is_benign && !p.oob_nonce_slot)
.unwrap();
match &vuln.oracle {
Oracle::SinkProbe { predicates } => {
assert!(
@ -117,10 +128,15 @@ fn vuln_payload_bytes_contain_doctype_entity_declaration() {
// The whole differential rule rests on the vuln payload carrying
// an `<!ENTITY … SYSTEM "…">` decl and the benign control NOT
// carrying one — pin both invariants so a future corpus tweak
// does not silently break the oracle.
// does not silently break the oracle. The OOB-nonce variant's
// `bytes` field is unused (the runner materialises a URL at call
// time and the harness wraps it into the DTD), so skip it here.
for lang in LANGS {
let slice = payloads_for_lang(Cap::XXE, *lang);
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
let vuln = slice
.iter()
.find(|p| !p.is_benign && !p.oob_nonce_slot)
.unwrap();
let benign = slice.iter().find(|p| p.is_benign).unwrap();
let vuln_text = std::str::from_utf8(vuln.bytes).unwrap();
let benign_text = std::str::from_utf8(benign.bytes).unwrap();
@ -429,16 +445,42 @@ mod e2e_phase_05 {
backend: SandboxBackend::Process,
..SandboxOptions::default()
};
match run_spec(&spec, &opts) {
Ok(outcome) => Some(outcome),
Err(RunError::BuildFailed { stderr, attempts }) => {
eprintln!(
"SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}",
);
None
// JVM startup occasionally fails under heavy cross-binary nextest
// load with "Error occurred during initialization of VM: Properties
// init: Could not determine current working directory." This is a
// macOS getcwd() race under massive fork() churn, not a regression.
// Retry up to 3 times; the second attempt almost always succeeds.
for attempt in 0..3 {
match run_spec(&spec, &opts) {
Ok(outcome) => {
if is_jvm_cwd_flake(&outcome) && attempt < 2 {
eprintln!(
"RETRY {lang:?} {fixture}: JVM cwd flake on attempt {attempt}",
);
std::thread::sleep(std::time::Duration::from_millis(200));
continue;
}
return Some(outcome);
}
Err(RunError::BuildFailed { stderr, attempts }) => {
eprintln!(
"SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}",
);
return None;
}
Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"),
}
Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"),
}
None
}
fn is_jvm_cwd_flake(outcome: &RunOutcome) -> bool {
outcome.attempts.iter().any(|a| {
let stdout = std::str::from_utf8(&a.outcome.stdout).unwrap_or("");
let stderr = std::str::from_utf8(&a.outcome.stderr).unwrap_or("");
stdout.contains("Could not determine current working directory")
|| stderr.contains("Could not determine current working directory")
})
}
#[test]
@ -510,4 +552,116 @@ mod e2e_phase_05 {
.expect("Confirmed run must carry a DifferentialOutcome");
assert_eq!(diff.verdict, DifferentialVerdict::Confirmed);
}
/// Phase 05 OOB-loopback observation: when an [`nyx_scanner::dynamic::oob::OobListener`]
/// is attached and the runner exercises the `xxe-<lang>-oob-nonce`
/// payload, the parser's external-entity hook performs a real HTTP
/// GET against the loopback nonce URL and the listener records the
/// hit. Asserts the observation half of the Phase 05 OOB closure;
/// the verdict-tier promotion (Confirmed → Confirmed+ProvenOob) is
/// broader runner-rework tracked separately in
/// `.pitboss/play/deferred.md`.
fn run_oob(lang: Lang, fixture: &str, entry_name: &str) -> Option<RunOutcome> {
use nyx_scanner::dynamic::oob::OobListener;
use nyx_scanner::dynamic::sandbox::NetworkPolicy;
use std::sync::Arc;
let bin = toolchain_for(lang);
if !command_available(bin) {
eprintln!("SKIP {lang:?} {fixture} (oob): missing toolchain {bin}");
return None;
}
let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
let listener = Arc::new(OobListener::bind().expect("bind OOB listener on loopback"));
let (mut spec, _tmp) = build_spec(lang, fixture, entry_name);
// Use a distinct workdir from the non-OOB e2e tests so the probe
// channel files do not collide (both tests use the same fixture, so
// the default spec_hash would resolve to the same
// `/tmp/nyx-harness/<spec_hash>/__nyx_probes.jsonl` and the two runs
// could clobber each other's drains under parallel nextest).
spec.spec_hash = format!("{}-oob", spec.spec_hash);
spec.finding_id = spec.spec_hash.clone();
if matches!(lang, Lang::Java) {
let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec.spec_hash);
let _ = std::fs::remove_dir_all(&workdir);
}
let opts = SandboxOptions {
backend: SandboxBackend::Process,
network_policy: NetworkPolicy::OobOutbound {
listener: Arc::clone(&listener),
},
..SandboxOptions::default()
};
for attempt in 0..3 {
match run_spec(&spec, &opts) {
Ok(outcome) => {
if is_jvm_cwd_flake(&outcome) && attempt < 2 {
eprintln!(
"RETRY {lang:?} {fixture} (oob): JVM cwd flake on attempt {attempt}",
);
std::thread::sleep(std::time::Duration::from_millis(200));
continue;
}
return Some(outcome);
}
Err(RunError::BuildFailed { stderr, attempts }) => {
eprintln!(
"SKIP {lang:?} {fixture} (oob): build failed after {attempts}: {stderr}",
);
return None;
}
Err(e) => panic!("run_spec({lang:?} {fixture} oob) errored: {e:?}"),
}
}
None
}
fn assert_oob_recorded(outcome: &RunOutcome, label: &str) {
let oob_attempt = outcome
.attempts
.iter()
.find(|a| a.payload_label == label)
.unwrap_or_else(|| {
panic!(
"OOB payload {label:?} must run when listener is attached; outcome={outcome:?}"
)
});
assert!(
oob_attempt.outcome.oob_callback_seen,
"parser external-entity hook must fetch loopback URL so OOB listener records the nonce; got attempt={oob_attempt:?}",
);
}
#[test]
fn python_xxe_oob_loopback_records_callback() {
let Some(outcome) = run_oob(Lang::Python, "vuln.py", "run") else { return };
assert_oob_recorded(&outcome, "xxe-python-oob-nonce");
}
#[test]
fn java_xxe_oob_loopback_records_callback() {
let Some(outcome) = run_oob(Lang::Java, "Vuln.java", "run") else { return };
assert_oob_recorded(&outcome, "xxe-java-oob-nonce");
}
#[test]
fn php_xxe_oob_loopback_records_callback() {
let Some(outcome) = run_oob(Lang::Php, "vuln.php", "run") else { return };
assert_oob_recorded(&outcome, "xxe-php-oob-nonce");
}
#[test]
fn ruby_xxe_oob_loopback_records_callback() {
let Some(outcome) = run_oob(Lang::Ruby, "vuln.rb", "run") else { return };
assert_oob_recorded(&outcome, "xxe-ruby-oob-nonce");
}
#[test]
fn go_xxe_oob_loopback_records_callback() {
let Some(outcome) = run_oob(Lang::Go, "vuln.go", "run") else { return };
assert_oob_recorded(&outcome, "xxe-go-oob-nonce");
}
}