[pitboss/grind] deferred session-0010 (20260520T233019Z-6958)

This commit is contained in:
pitboss 2026-05-21 05:48:48 -05:00
parent 38cc0ce05f
commit 280121607e
12 changed files with 483 additions and 28 deletions

View file

@ -848,9 +848,14 @@ mod tests {
fn xxe_payloads_pair_benign_controls_per_lang() {
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
let slice = payloads_for_lang(Cap::XXE, lang);
// Skip OOB-nonce variants: they self-confirm via the per-finding
// listener callback (see `xxe-<lang>-oob-nonce` in
// `src/dynamic/corpus/xxe/<lang>.rs`) and carry no paired benign
// control because a benign URL structurally cannot hit the nonce
// path. The doctype-entity vuln is the one that pairs.
let vuln = slice
.iter()
.find(|p| !p.is_benign)
.find(|p| !p.is_benign && !p.oob_nonce_slot)
.expect("each lang must have an XXE vuln payload");
let resolved = super::resolve_benign_control_lang(vuln, Cap::XXE, lang)
.expect("lang-aware benign control must resolve");

View file

@ -9,11 +9,38 @@
//! Benign control: a well-formed XML document with no doctype, so the
//! decoder has no entity to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's
//! `nyxBuildXxeDocument` helper performs a real `http.Client.Get`
//! against the loopback URL so the listener records the per-finding
//! nonce. Ordered first so iteration exercises OOB before the
//! doctype-entity vuln triggers and short-circuits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-go-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/go/vuln.go",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when the Go harness performs the loopback GET before \
building the DTD; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -10,11 +10,39 @@
//! declaration so the parser has no entity to resolve. The harness's
//! instrumented parser writes `entity_expanded: false`, the oracle
//! does not fire, and the differential rule (§4.1) stays clean.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's `EntityResolver`
//! hook performs a real `HttpURLConnection.openConnection().getInputStream()`
//! against the loopback URL so the listener records the per-finding nonce.
//! Ordered first so the runner exercises the OOB observation path before
//! the doctype-entity vuln below triggers and short-circuits iteration;
//! runs without a listener skip cleanly (runner `oob_nonce_slot` branch).
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-java-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/java/Vuln.java",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when DocumentBuilder's EntityResolver fetches the \
loopback URL; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -9,11 +9,38 @@
//! Benign control: a well-formed XML document with no doctype, so
//! the parser has no entity to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's
//! `libxml_set_external_entity_loader` callback performs a real
//! `file_get_contents` against the loopback URL so the listener records
//! the per-finding nonce. Ordered first so iteration exercises OOB
//! before the doctype-entity vuln triggers and short-circuits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-php-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/php/vuln.php",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when libxml's external-entity loader fetches the \
loopback URL; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -9,11 +9,49 @@
//! Benign control: a well-formed XML document with no doctype, so the
//! parser has nothing to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`], the runner materialises this
//! payload's bytes as a loopback URL and the Python harness wraps the
//! URL into `<!ENTITY xxe SYSTEM "URL">`. Expat's external-entity hook
//! performs a real `urllib.request.urlopen` against the URL so the
//! listener records the per-finding nonce. Ordered first so the runner
//! exercises the OOB observation path before the doctype-entity vuln
//! triggers and short-circuits the iteration; runs without a listener
//! skip cleanly (the runner's `oob_nonce_slot` branch `continue`s when
//! [`crate::dynamic::sandbox::SandboxOptions::oob_listener`] is None).
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
// OOB-nonce XXE variant. Ordered first so the harness exercises the
// OOB observation path before the doctype-entity vuln below triggers
// and breaks iteration. Self-confirming via [`Oracle::OobCallback`];
// no paired benign control because a benign URL can never hit the
// per-finding nonce path. Runs only when an [`OobListener`] is
// attached; the runner's `oob_nonce_slot` branch skips otherwise.
CuratedPayload {
bytes: b"",
label: "xxe-python-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/python/vuln.py",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when expat's external-entity hook fetches the loopback \
URL; no benign URL can hit the nonce path so no paired control \
is meaningful.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -8,11 +8,38 @@
//! Benign control: a well-formed XML document with no doctype, so
//! the parser has no entity to resolve and the shim writes
//! `entity_expanded: false`.
//!
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
//! [`crate::dynamic::oob::OobListener`] the harness's
//! `_nyx_build_xxe_document` helper performs a real `Net::HTTP.start`
//! against the loopback URL so the listener records the per-finding
//! nonce. Ordered first so iteration exercises OOB before the
//! doctype-entity vuln triggers and short-circuits.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: b"",
label: "xxe-ruby-oob-nonce",
oracle: Oracle::OobCallback { host: "127.0.0.1" },
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 15,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
],
oob_nonce_slot: true,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: Some(
"OOB-nonce XXE payload self-confirms via the per-finding listener \
callback when the Ruby harness performs the loopback GET before \
building the DTD; no benign URL can hit the nonce path.",
),
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [

View file

@ -645,6 +645,7 @@ import (
"encoding/xml"
"fmt"
"io"
"net/http"
"os"
"os/signal"
"strings"
@ -654,6 +655,33 @@ import (
{shim}
// nyxBuildXxeDocument builds the XML document fed into the decoder.
// Two shapes (Phase 05 OOB closure, 2026-05-21):
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as
// the SYSTEM URL of an external entity and wrap into a canonical
// XXE DTD. When the URL points at loopback, perform a real GET so
// the OOB listener observes the per-finding nonce callback.
// - Anything else: treat as the full XML document (existing Phase 05
// shape).
func nyxBuildXxeDocument(payload string) string {{
if strings.HasPrefix(payload, "http://") || strings.HasPrefix(payload, "https://") {{
if strings.HasPrefix(payload, "http://127.0.0.1") ||
strings.HasPrefix(payload, "http://host-gateway") ||
strings.HasPrefix(payload, "http://localhost") {{
client := &http.Client{{Timeout: 2 * time.Second}}
if resp, err := client.Get(payload); err == nil {{
_, _ = io.Copy(io.Discard, resp.Body)
resp.Body.Close()
}}
}}
escaped := strings.ReplaceAll(payload, "&", "&amp;")
escaped = strings.ReplaceAll(escaped, "\"", "&quot;")
escaped = strings.ReplaceAll(escaped, "<", "&lt;")
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" + escaped + "\">\n]>\n<data>&xxe;</data>"
}}
return payload
}}
func nyxXmlParse(payload string) bool {{
// Real parser hook: walk Go's encoding/xml.Decoder token stream.
// The decoder parses <!DOCTYPE name [<!ENTITY x SYSTEM "uri">]>
@ -664,7 +692,8 @@ func nyxXmlParse(payload string) bool {{
// resolution boundary firing.
expanded := false
sawSystem := false
decoder := xml.NewDecoder(strings.NewReader(payload))
doc := nyxBuildXxeDocument(payload)
decoder := xml.NewDecoder(strings.NewReader(doc))
for {{
tok, err := decoder.Token()
if err != nil {{

View file

@ -952,6 +952,8 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
import java.io.FileWriter;
import java.io.IOException;
import java.io.StringReader;
import java.net.HttpURLConnection;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.xml.sax.EntityResolver;
@ -963,6 +965,21 @@ public class NyxHarness {{
static boolean nyxLastExpanded = false;
// Build the XML document fed into the parser. Two shapes (Phase 05
// OOB closure, 2026-05-21):
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as
// the SYSTEM URL of an external entity and wrap into a canonical
// XXE DTD. The entity-resolver hook will perform the loopback
// GET so the OOB listener observes the per-finding nonce.
// - Anything else: treat as the full XML document (existing shape).
static String nyxBuildXxeDocument(String payload) {{
if (payload.startsWith("http://") || payload.startsWith("https://")) {{
String escaped = payload.replace("&", "&amp;").replace("\"", "&quot;").replace("<", "&lt;");
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" + escaped + "\">\n]>\n<data>&xxe;</data>";
}}
return payload;
}}
static void nyxXmlParse(String payload) {{
nyxLastExpanded = false;
try {{
@ -971,20 +988,36 @@ public class NyxHarness {{
// entity resolution enabled" target: leave the factory at
// default settings (which historically permit doctype +
// external entities) and rely on the EntityResolver hook
// to short-circuit the actual fetch.
// to control fetch behaviour.
DocumentBuilder db = dbf.newDocumentBuilder();
db.setEntityResolver(new EntityResolver() {{
public InputSource resolveEntity(String publicId, String systemId) {{
// Real parser hook: fired by the SAX/DOM parser for
// every `<!ENTITY x SYSTEM "...">` reference. Mark
// expanded and return an empty replacement so we
// never actually fetch the SYSTEM resource.
// expanded. When the SYSTEM URL points at loopback
// HTTP, perform a real GET so the OOB listener can
// observe the callback (Phase 05 OOB closure). Any
// other scheme returns an empty replacement (no fetch).
nyxLastExpanded = true;
if (systemId != null && (systemId.startsWith("http://127.0.0.1")
|| systemId.startsWith("http://host-gateway")
|| systemId.startsWith("http://localhost"))) {{
try {{
HttpURLConnection conn = (HttpURLConnection) new URL(systemId).openConnection();
conn.setConnectTimeout(2000);
conn.setReadTimeout(2000);
conn.getInputStream().close();
conn.disconnect();
}} catch (Exception ignored) {{
// best-effort OOB fetch
}}
}}
return new InputSource(new StringReader(""));
}}
}});
try {{
db.parse(new InputSource(new StringReader(payload)));
String doc = nyxBuildXxeDocument(payload);
db.parse(new InputSource(new StringReader(doc)));
}} catch (SAXException | IOException e) {{
// Malformed XML still counts as a parser invocation;
// expanded flag reflects whatever the hook saw before

View file

@ -685,13 +685,38 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
// Nyx dynamic harness — XXE simplexml_load_string (Phase 05 / Track J.3).
{shim}
// Build the XML document fed into the parser. Two shapes (Phase 05 OOB
// closure, 2026-05-21):
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as the
// SYSTEM URL of an external entity and wrap into a canonical XXE
// DTD. The external-entity loader hook below performs the loopback
// GET so the OOB listener observes the per-finding nonce.
// - Anything else: treat as the full XML document (existing shape).
function _nyx_build_xxe_document(string $payload): string {{
if (str_starts_with($payload, 'http://') || str_starts_with($payload, 'https://')) {{
$escaped = str_replace(['&', '"', '<'], ['&amp;', '&quot;', '&lt;'], $payload);
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" . $escaped . "\">\n]>\n<data>&xxe;</data>";
}}
return $payload;
}}
function _nyx_libxml_parse(string $payload): bool {{
$expanded = false;
// Real parser hook: libxml calls this for every <!ENTITY name SYSTEM "uri">
// reference resolved in the document. We mark expanded and
// return null so the parser does not actually fetch the resource.
// reference resolved in the document. Mark expanded. When the
// SYSTEM URL points at loopback HTTP, perform a real fetch so the
// OOB listener observes the callback (Phase 05 OOB closure); other
// schemes return null so the parser substitutes empty.
libxml_set_external_entity_loader(function ($public, $system, $context) use (&$expanded) {{
$expanded = true;
if (is_string($system) && (
str_starts_with($system, 'http://127.0.0.1')
|| str_starts_with($system, 'http://host-gateway')
|| str_starts_with($system, 'http://localhost')
)) {{
$ctx = stream_context_create(['http' => ['timeout' => 2, 'ignore_errors' => true]]);
@file_get_contents($system, false, $ctx);
}}
return null;
}});
$prev_errors = libxml_use_internal_errors(true);
@ -699,7 +724,8 @@ function _nyx_libxml_parse(string $payload): bool {{
// the resolved body) and LIBXML_DTDLOAD allows the parser to load
// the DTD declarations — the combination real XXE-vulnerable PHP
// code passes to `simplexml_load_string`.
@simplexml_load_string($payload, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
$doc = _nyx_build_xxe_document($payload);
@simplexml_load_string($doc, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
libxml_clear_errors();
libxml_use_internal_errors($prev_errors);
// Reset the loader to default so nothing leaks across runs.

View file

@ -1452,10 +1452,32 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
r#"#!/usr/bin/env python3
"""Nyx dynamic harness — XXE xml.parsers.expat (Phase 05 / Track J.3)."""
import os, json, sys, time
import urllib.request as _nyx_urlreq
import xml.parsers.expat as _nyx_expat
{probe}
# Build the XML document fed into expat. Two shapes:
# - URL-form NYX_PAYLOAD (`http://...` or `https://...`): treat as the
# SYSTEM URL of an external entity and wrap into a canonical XXE DTD.
# The OOB-nonce payload variant emits a loopback URL here so the
# external-ref hook performs a real HTTP GET that the OOB listener
# observes (Phase 05 OOB closure, 2026-05-21).
# - Anything else: treat NYX_PAYLOAD as the full XML document
# (existing Phase 05 shape).
def _nyx_xxe_document(payload):
p = payload if isinstance(payload, str) else payload.decode("utf-8", "replace")
if p.startswith("http://") or p.startswith("https://"):
url = p.replace("&", "&amp;").replace('"', "&quot;").replace("<", "&lt;")
return (
"<?xml version=\"1.0\"?>\n"
"<!DOCTYPE data [\n"
" <!ENTITY xxe SYSTEM \"" + url + "\">\n"
"]>\n"
"<data>&xxe;</data>"
)
return p
def _nyx_xxe_parse(payload):
expanded = [False]
parser = _nyx_expat.ParserCreate()
@ -1469,9 +1491,18 @@ def _nyx_xxe_parse(payload):
def _external_ref(context, base, system_id, public_id):
# Real parser hook: fired by expat for every `<!ENTITY x SYSTEM "...">`
# reference inside element bodies / DTD. Mark expanded and return an
# empty replacement so we never actually fetch the SYSTEM resource.
# reference inside element bodies / DTD. Mark expanded. When the
# SYSTEM URL points at loopback HTTP, perform a real GET so the OOB
# listener can observe the callback (Phase 05 OOB closure). Any
# other scheme returns an empty replacement (no fetch).
expanded[0] = True
if system_id and (system_id.startswith("http://127.0.0.1")
or system_id.startswith("http://host-gateway")
or system_id.startswith("http://localhost")):
try:
_nyx_urlreq.urlopen(system_id, timeout=2).read()
except Exception:
pass
sub = parser.ExternalEntityParserCreate(context, "utf-8")
try:
sub.Parse("", 1)
@ -1480,9 +1511,9 @@ def _nyx_xxe_parse(payload):
return 1
parser.ExternalEntityRefHandler = _external_ref
payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else payload
doc = _nyx_xxe_document(payload)
try:
parser.Parse(payload_bytes, 1)
parser.Parse(doc.encode("utf-8", "replace"), 1)
except _nyx_expat.ExpatError:
# Malformed XML still counts as a parser invocation; expanded
# flag reflects whatever the hook saw before the error.

View file

@ -985,20 +985,50 @@ STDOUT.flush
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let body = format!(
r#"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
r##"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
require 'json'
require 'net/http'
require 'rexml/document'
require 'stringio'
require 'uri'
{shim}
# Build the XML document fed into REXML. Two shapes (Phase 05 OOB
# closure, 2026-05-21):
# - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as the
# SYSTEM URL of an external entity and wrap into a canonical XXE
# DTD. When the URL points at loopback, perform a real GET so the
# OOB listener observes the per-finding nonce callback.
# - Anything else: treat as the full XML document (existing shape).
def _nyx_build_xxe_document(payload)
if payload.start_with?('http://') || payload.start_with?('https://')
if payload.start_with?('http://127.0.0.1') ||
payload.start_with?('http://host-gateway') ||
payload.start_with?('http://localhost')
begin
uri = URI.parse(payload)
Net::HTTP.start(uri.host, uri.port, open_timeout: 2, read_timeout: 2) do |http|
http.request_get(uri.request_uri)
end
rescue StandardError
# best-effort OOB fetch
end
end
escaped = payload.gsub('&', '&amp;').gsub('"', '&quot;').gsub('<', '&lt;')
"<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"#{{escaped}}\">\n]>\n<data>&xxe;</data>"
else
payload
end
end
def _nyx_libxml_parse(payload)
# Real parser hook: REXML parses `<!ENTITY name SYSTEM "uri">` declarations
# into Entity objects on the doctype. Inspect the entities table to
# detect every external-entity reference the parser registered.
expanded = false
begin
doc = REXML::Document.new(payload)
doc = REXML::Document.new(_nyx_build_xxe_document(payload))
if doc.doctype
doc.doctype.entities.each_value do |ent|
s = ent.to_s
@ -1042,7 +1072,7 @@ _nyx_xxe_probe(payload, expanded)
STDOUT.puts '__NYX_SINK_HIT__'
STDOUT.puts JSON.generate({{"entity_expanded" => expanded}})
STDOUT.flush
"#
"##
);
HarnessSource {
source: body,