mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss/grind] deferred session-0010 (20260520T233019Z-6958)
This commit is contained in:
parent
38cc0ce05f
commit
280121607e
12 changed files with 483 additions and 28 deletions
|
|
@ -848,9 +848,14 @@ mod tests {
|
|||
fn xxe_payloads_pair_benign_controls_per_lang() {
|
||||
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
|
||||
let slice = payloads_for_lang(Cap::XXE, lang);
|
||||
// Skip OOB-nonce variants: they self-confirm via the per-finding
|
||||
// listener callback (see `xxe-<lang>-oob-nonce` in
|
||||
// `src/dynamic/corpus/xxe/<lang>.rs`) and carry no paired benign
|
||||
// control because a benign URL structurally cannot hit the nonce
|
||||
// path. The doctype-entity vuln is the one that pairs.
|
||||
let vuln = slice
|
||||
.iter()
|
||||
.find(|p| !p.is_benign)
|
||||
.find(|p| !p.is_benign && !p.oob_nonce_slot)
|
||||
.expect("each lang must have an XXE vuln payload");
|
||||
let resolved = super::resolve_benign_control_lang(vuln, Cap::XXE, lang)
|
||||
.expect("lang-aware benign control must resolve");
|
||||
|
|
|
|||
|
|
@ -9,11 +9,38 @@
|
|||
//! Benign control: a well-formed XML document with no doctype, so the
|
||||
//! decoder has no entity to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
//!
|
||||
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
|
||||
//! [`crate::dynamic::oob::OobListener`] the harness's
|
||||
//! `nyxBuildXxeDocument` helper performs a real `http.Client.Get`
|
||||
//! against the loopback URL so the listener records the per-finding
|
||||
//! nonce. Ordered first so iteration exercises OOB before the
|
||||
//! doctype-entity vuln triggers and short-circuits.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"",
|
||||
label: "xxe-go-oob-nonce",
|
||||
oracle: Oracle::OobCallback { host: "127.0.0.1" },
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/go/vuln.go",
|
||||
],
|
||||
oob_nonce_slot: true,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: Some(
|
||||
"OOB-nonce XXE payload self-confirms via the per-finding listener \
|
||||
callback when the Go harness performs the loopback GET before \
|
||||
building the DTD; no benign URL can hit the nonce path.",
|
||||
),
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
|
|
|
|||
|
|
@ -10,11 +10,39 @@
|
|||
//! declaration so the parser has no entity to resolve. The harness's
|
||||
//! instrumented parser writes `entity_expanded: false`, the oracle
|
||||
//! does not fire, and the differential rule (§4.1) stays clean.
|
||||
//!
|
||||
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
|
||||
//! [`crate::dynamic::oob::OobListener`] the harness's `EntityResolver`
|
||||
//! hook performs a real `HttpURLConnection.openConnection().getInputStream()`
|
||||
//! against the loopback URL so the listener records the per-finding nonce.
|
||||
//! Ordered first so the runner exercises the OOB observation path before
|
||||
//! the doctype-entity vuln below triggers and short-circuits iteration;
|
||||
//! runs without a listener skip cleanly (runner `oob_nonce_slot` branch).
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"",
|
||||
label: "xxe-java-oob-nonce",
|
||||
oracle: Oracle::OobCallback { host: "127.0.0.1" },
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/java/Vuln.java",
|
||||
],
|
||||
oob_nonce_slot: true,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: Some(
|
||||
"OOB-nonce XXE payload self-confirms via the per-finding listener \
|
||||
callback when DocumentBuilder's EntityResolver fetches the \
|
||||
loopback URL; no benign URL can hit the nonce path.",
|
||||
),
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
|
|
|
|||
|
|
@ -9,11 +9,38 @@
|
|||
//! Benign control: a well-formed XML document with no doctype, so
|
||||
//! the parser has no entity to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
//!
|
||||
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
|
||||
//! [`crate::dynamic::oob::OobListener`] the harness's
|
||||
//! `libxml_set_external_entity_loader` callback performs a real
|
||||
//! `file_get_contents` against the loopback URL so the listener records
|
||||
//! the per-finding nonce. Ordered first so iteration exercises OOB
|
||||
//! before the doctype-entity vuln triggers and short-circuits.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"",
|
||||
label: "xxe-php-oob-nonce",
|
||||
oracle: Oracle::OobCallback { host: "127.0.0.1" },
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/php/vuln.php",
|
||||
],
|
||||
oob_nonce_slot: true,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: Some(
|
||||
"OOB-nonce XXE payload self-confirms via the per-finding listener \
|
||||
callback when libxml's external-entity loader fetches the \
|
||||
loopback URL; no benign URL can hit the nonce path.",
|
||||
),
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
|
|
|
|||
|
|
@ -9,11 +9,49 @@
|
|||
//! Benign control: a well-formed XML document with no doctype, so the
|
||||
//! parser has nothing to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
//!
|
||||
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
|
||||
//! [`crate::dynamic::oob::OobListener`], the runner materialises this
|
||||
//! payload's bytes as a loopback URL and the Python harness wraps the
|
||||
//! URL into `<!ENTITY xxe SYSTEM "URL">`. Expat's external-entity hook
|
||||
//! performs a real `urllib.request.urlopen` against the URL so the
|
||||
//! listener records the per-finding nonce. Ordered first so the runner
|
||||
//! exercises the OOB observation path before the doctype-entity vuln
|
||||
//! triggers and short-circuits the iteration; runs without a listener
|
||||
//! skip cleanly (the runner's `oob_nonce_slot` branch `continue`s when
|
||||
//! [`crate::dynamic::sandbox::SandboxOptions::oob_listener`] is None).
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
// OOB-nonce XXE variant. Ordered first so the harness exercises the
|
||||
// OOB observation path before the doctype-entity vuln below triggers
|
||||
// and breaks iteration. Self-confirming via [`Oracle::OobCallback`];
|
||||
// no paired benign control because a benign URL can never hit the
|
||||
// per-finding nonce path. Runs only when an [`OobListener`] is
|
||||
// attached; the runner's `oob_nonce_slot` branch skips otherwise.
|
||||
CuratedPayload {
|
||||
bytes: b"",
|
||||
label: "xxe-python-oob-nonce",
|
||||
oracle: Oracle::OobCallback { host: "127.0.0.1" },
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/python/vuln.py",
|
||||
],
|
||||
oob_nonce_slot: true,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: Some(
|
||||
"OOB-nonce XXE payload self-confirms via the per-finding listener \
|
||||
callback when expat's external-entity hook fetches the loopback \
|
||||
URL; no benign URL can hit the nonce path so no paired control \
|
||||
is meaningful.",
|
||||
),
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
|
|
|
|||
|
|
@ -8,11 +8,38 @@
|
|||
//! Benign control: a well-formed XML document with no doctype, so
|
||||
//! the parser has no entity to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
//!
|
||||
//! OOB-nonce variant (added 2026-05-21): when the runner attaches an
|
||||
//! [`crate::dynamic::oob::OobListener`] the harness's
|
||||
//! `_nyx_build_xxe_document` helper performs a real `Net::HTTP.start`
|
||||
//! against the loopback URL so the listener records the per-finding
|
||||
//! nonce. Ordered first so iteration exercises OOB before the
|
||||
//! doctype-entity vuln triggers and short-circuits.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: b"",
|
||||
label: "xxe-ruby-oob-nonce",
|
||||
oracle: Oracle::OobCallback { host: "127.0.0.1" },
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 15,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
|
||||
],
|
||||
oob_nonce_slot: true,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: Some(
|
||||
"OOB-nonce XXE payload self-confirms via the per-finding listener \
|
||||
callback when the Ruby harness performs the loopback GET before \
|
||||
building the DTD; no benign URL can hit the nonce path.",
|
||||
),
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
|
|
|
|||
|
|
@ -645,6 +645,7 @@ import (
|
|||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
"strings"
|
||||
|
|
@ -654,6 +655,33 @@ import (
|
|||
|
||||
{shim}
|
||||
|
||||
// nyxBuildXxeDocument builds the XML document fed into the decoder.
|
||||
// Two shapes (Phase 05 OOB closure, 2026-05-21):
|
||||
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as
|
||||
// the SYSTEM URL of an external entity and wrap into a canonical
|
||||
// XXE DTD. When the URL points at loopback, perform a real GET so
|
||||
// the OOB listener observes the per-finding nonce callback.
|
||||
// - Anything else: treat as the full XML document (existing Phase 05
|
||||
// shape).
|
||||
func nyxBuildXxeDocument(payload string) string {{
|
||||
if strings.HasPrefix(payload, "http://") || strings.HasPrefix(payload, "https://") {{
|
||||
if strings.HasPrefix(payload, "http://127.0.0.1") ||
|
||||
strings.HasPrefix(payload, "http://host-gateway") ||
|
||||
strings.HasPrefix(payload, "http://localhost") {{
|
||||
client := &http.Client{{Timeout: 2 * time.Second}}
|
||||
if resp, err := client.Get(payload); err == nil {{
|
||||
_, _ = io.Copy(io.Discard, resp.Body)
|
||||
resp.Body.Close()
|
||||
}}
|
||||
}}
|
||||
escaped := strings.ReplaceAll(payload, "&", "&")
|
||||
escaped = strings.ReplaceAll(escaped, "\"", """)
|
||||
escaped = strings.ReplaceAll(escaped, "<", "<")
|
||||
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" + escaped + "\">\n]>\n<data>&xxe;</data>"
|
||||
}}
|
||||
return payload
|
||||
}}
|
||||
|
||||
func nyxXmlParse(payload string) bool {{
|
||||
// Real parser hook: walk Go's encoding/xml.Decoder token stream.
|
||||
// The decoder parses <!DOCTYPE name [<!ENTITY x SYSTEM "uri">]>
|
||||
|
|
@ -664,7 +692,8 @@ func nyxXmlParse(payload string) bool {{
|
|||
// resolution boundary firing.
|
||||
expanded := false
|
||||
sawSystem := false
|
||||
decoder := xml.NewDecoder(strings.NewReader(payload))
|
||||
doc := nyxBuildXxeDocument(payload)
|
||||
decoder := xml.NewDecoder(strings.NewReader(doc))
|
||||
for {{
|
||||
tok, err := decoder.Token()
|
||||
if err != nil {{
|
||||
|
|
|
|||
|
|
@ -952,6 +952,8 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
|||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import org.xml.sax.EntityResolver;
|
||||
|
|
@ -963,6 +965,21 @@ public class NyxHarness {{
|
|||
|
||||
static boolean nyxLastExpanded = false;
|
||||
|
||||
// Build the XML document fed into the parser. Two shapes (Phase 05
|
||||
// OOB closure, 2026-05-21):
|
||||
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as
|
||||
// the SYSTEM URL of an external entity and wrap into a canonical
|
||||
// XXE DTD. The entity-resolver hook will perform the loopback
|
||||
// GET so the OOB listener observes the per-finding nonce.
|
||||
// - Anything else: treat as the full XML document (existing shape).
|
||||
static String nyxBuildXxeDocument(String payload) {{
|
||||
if (payload.startsWith("http://") || payload.startsWith("https://")) {{
|
||||
String escaped = payload.replace("&", "&").replace("\"", """).replace("<", "<");
|
||||
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" + escaped + "\">\n]>\n<data>&xxe;</data>";
|
||||
}}
|
||||
return payload;
|
||||
}}
|
||||
|
||||
static void nyxXmlParse(String payload) {{
|
||||
nyxLastExpanded = false;
|
||||
try {{
|
||||
|
|
@ -971,20 +988,36 @@ public class NyxHarness {{
|
|||
// entity resolution enabled" target: leave the factory at
|
||||
// default settings (which historically permit doctype +
|
||||
// external entities) and rely on the EntityResolver hook
|
||||
// to short-circuit the actual fetch.
|
||||
// to control fetch behaviour.
|
||||
DocumentBuilder db = dbf.newDocumentBuilder();
|
||||
db.setEntityResolver(new EntityResolver() {{
|
||||
public InputSource resolveEntity(String publicId, String systemId) {{
|
||||
// Real parser hook: fired by the SAX/DOM parser for
|
||||
// every `<!ENTITY x SYSTEM "...">` reference. Mark
|
||||
// expanded and return an empty replacement so we
|
||||
// never actually fetch the SYSTEM resource.
|
||||
// expanded. When the SYSTEM URL points at loopback
|
||||
// HTTP, perform a real GET so the OOB listener can
|
||||
// observe the callback (Phase 05 OOB closure). Any
|
||||
// other scheme returns an empty replacement (no fetch).
|
||||
nyxLastExpanded = true;
|
||||
if (systemId != null && (systemId.startsWith("http://127.0.0.1")
|
||||
|| systemId.startsWith("http://host-gateway")
|
||||
|| systemId.startsWith("http://localhost"))) {{
|
||||
try {{
|
||||
HttpURLConnection conn = (HttpURLConnection) new URL(systemId).openConnection();
|
||||
conn.setConnectTimeout(2000);
|
||||
conn.setReadTimeout(2000);
|
||||
conn.getInputStream().close();
|
||||
conn.disconnect();
|
||||
}} catch (Exception ignored) {{
|
||||
// best-effort OOB fetch
|
||||
}}
|
||||
}}
|
||||
return new InputSource(new StringReader(""));
|
||||
}}
|
||||
}});
|
||||
try {{
|
||||
db.parse(new InputSource(new StringReader(payload)));
|
||||
String doc = nyxBuildXxeDocument(payload);
|
||||
db.parse(new InputSource(new StringReader(doc)));
|
||||
}} catch (SAXException | IOException e) {{
|
||||
// Malformed XML still counts as a parser invocation;
|
||||
// expanded flag reflects whatever the hook saw before
|
||||
|
|
|
|||
|
|
@ -685,13 +685,38 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
|||
// Nyx dynamic harness — XXE simplexml_load_string (Phase 05 / Track J.3).
|
||||
{shim}
|
||||
|
||||
// Build the XML document fed into the parser. Two shapes (Phase 05 OOB
|
||||
// closure, 2026-05-21):
|
||||
// - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as the
|
||||
// SYSTEM URL of an external entity and wrap into a canonical XXE
|
||||
// DTD. The external-entity loader hook below performs the loopback
|
||||
// GET so the OOB listener observes the per-finding nonce.
|
||||
// - Anything else: treat as the full XML document (existing shape).
|
||||
function _nyx_build_xxe_document(string $payload): string {{
|
||||
if (str_starts_with($payload, 'http://') || str_starts_with($payload, 'https://')) {{
|
||||
$escaped = str_replace(['&', '"', '<'], ['&', '"', '<'], $payload);
|
||||
return "<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"" . $escaped . "\">\n]>\n<data>&xxe;</data>";
|
||||
}}
|
||||
return $payload;
|
||||
}}
|
||||
|
||||
function _nyx_libxml_parse(string $payload): bool {{
|
||||
$expanded = false;
|
||||
// Real parser hook: libxml calls this for every <!ENTITY name SYSTEM "uri">
|
||||
// reference resolved in the document. We mark expanded and
|
||||
// return null so the parser does not actually fetch the resource.
|
||||
// reference resolved in the document. Mark expanded. When the
|
||||
// SYSTEM URL points at loopback HTTP, perform a real fetch so the
|
||||
// OOB listener observes the callback (Phase 05 OOB closure); other
|
||||
// schemes return null so the parser substitutes empty.
|
||||
libxml_set_external_entity_loader(function ($public, $system, $context) use (&$expanded) {{
|
||||
$expanded = true;
|
||||
if (is_string($system) && (
|
||||
str_starts_with($system, 'http://127.0.0.1')
|
||||
|| str_starts_with($system, 'http://host-gateway')
|
||||
|| str_starts_with($system, 'http://localhost')
|
||||
)) {{
|
||||
$ctx = stream_context_create(['http' => ['timeout' => 2, 'ignore_errors' => true]]);
|
||||
@file_get_contents($system, false, $ctx);
|
||||
}}
|
||||
return null;
|
||||
}});
|
||||
$prev_errors = libxml_use_internal_errors(true);
|
||||
|
|
@ -699,7 +724,8 @@ function _nyx_libxml_parse(string $payload): bool {{
|
|||
// the resolved body) and LIBXML_DTDLOAD allows the parser to load
|
||||
// the DTD declarations — the combination real XXE-vulnerable PHP
|
||||
// code passes to `simplexml_load_string`.
|
||||
@simplexml_load_string($payload, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
|
||||
$doc = _nyx_build_xxe_document($payload);
|
||||
@simplexml_load_string($doc, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors($prev_errors);
|
||||
// Reset the loader to default so nothing leaks across runs.
|
||||
|
|
|
|||
|
|
@ -1452,10 +1452,32 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
|||
r#"#!/usr/bin/env python3
|
||||
"""Nyx dynamic harness — XXE xml.parsers.expat (Phase 05 / Track J.3)."""
|
||||
import os, json, sys, time
|
||||
import urllib.request as _nyx_urlreq
|
||||
import xml.parsers.expat as _nyx_expat
|
||||
|
||||
{probe}
|
||||
|
||||
# Build the XML document fed into expat. Two shapes:
|
||||
# - URL-form NYX_PAYLOAD (`http://...` or `https://...`): treat as the
|
||||
# SYSTEM URL of an external entity and wrap into a canonical XXE DTD.
|
||||
# The OOB-nonce payload variant emits a loopback URL here so the
|
||||
# external-ref hook performs a real HTTP GET that the OOB listener
|
||||
# observes (Phase 05 OOB closure, 2026-05-21).
|
||||
# - Anything else: treat NYX_PAYLOAD as the full XML document
|
||||
# (existing Phase 05 shape).
|
||||
def _nyx_xxe_document(payload):
|
||||
p = payload if isinstance(payload, str) else payload.decode("utf-8", "replace")
|
||||
if p.startswith("http://") or p.startswith("https://"):
|
||||
url = p.replace("&", "&").replace('"', """).replace("<", "<")
|
||||
return (
|
||||
"<?xml version=\"1.0\"?>\n"
|
||||
"<!DOCTYPE data [\n"
|
||||
" <!ENTITY xxe SYSTEM \"" + url + "\">\n"
|
||||
"]>\n"
|
||||
"<data>&xxe;</data>"
|
||||
)
|
||||
return p
|
||||
|
||||
def _nyx_xxe_parse(payload):
|
||||
expanded = [False]
|
||||
parser = _nyx_expat.ParserCreate()
|
||||
|
|
@ -1469,9 +1491,18 @@ def _nyx_xxe_parse(payload):
|
|||
|
||||
def _external_ref(context, base, system_id, public_id):
|
||||
# Real parser hook: fired by expat for every `<!ENTITY x SYSTEM "...">`
|
||||
# reference inside element bodies / DTD. Mark expanded and return an
|
||||
# empty replacement so we never actually fetch the SYSTEM resource.
|
||||
# reference inside element bodies / DTD. Mark expanded. When the
|
||||
# SYSTEM URL points at loopback HTTP, perform a real GET so the OOB
|
||||
# listener can observe the callback (Phase 05 OOB closure). Any
|
||||
# other scheme returns an empty replacement (no fetch).
|
||||
expanded[0] = True
|
||||
if system_id and (system_id.startswith("http://127.0.0.1")
|
||||
or system_id.startswith("http://host-gateway")
|
||||
or system_id.startswith("http://localhost")):
|
||||
try:
|
||||
_nyx_urlreq.urlopen(system_id, timeout=2).read()
|
||||
except Exception:
|
||||
pass
|
||||
sub = parser.ExternalEntityParserCreate(context, "utf-8")
|
||||
try:
|
||||
sub.Parse("", 1)
|
||||
|
|
@ -1480,9 +1511,9 @@ def _nyx_xxe_parse(payload):
|
|||
return 1
|
||||
|
||||
parser.ExternalEntityRefHandler = _external_ref
|
||||
payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else payload
|
||||
doc = _nyx_xxe_document(payload)
|
||||
try:
|
||||
parser.Parse(payload_bytes, 1)
|
||||
parser.Parse(doc.encode("utf-8", "replace"), 1)
|
||||
except _nyx_expat.ExpatError:
|
||||
# Malformed XML still counts as a parser invocation; expanded
|
||||
# flag reflects whatever the hook saw before the error.
|
||||
|
|
|
|||
|
|
@ -985,20 +985,50 @@ STDOUT.flush
|
|||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let body = format!(
|
||||
r#"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
|
||||
r##"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
|
||||
require 'json'
|
||||
require 'net/http'
|
||||
require 'rexml/document'
|
||||
require 'stringio'
|
||||
require 'uri'
|
||||
|
||||
{shim}
|
||||
|
||||
# Build the XML document fed into REXML. Two shapes (Phase 05 OOB
|
||||
# closure, 2026-05-21):
|
||||
# - URL-form NYX_PAYLOAD (`http://...` / `https://...`): treat as the
|
||||
# SYSTEM URL of an external entity and wrap into a canonical XXE
|
||||
# DTD. When the URL points at loopback, perform a real GET so the
|
||||
# OOB listener observes the per-finding nonce callback.
|
||||
# - Anything else: treat as the full XML document (existing shape).
|
||||
def _nyx_build_xxe_document(payload)
|
||||
if payload.start_with?('http://') || payload.start_with?('https://')
|
||||
if payload.start_with?('http://127.0.0.1') ||
|
||||
payload.start_with?('http://host-gateway') ||
|
||||
payload.start_with?('http://localhost')
|
||||
begin
|
||||
uri = URI.parse(payload)
|
||||
Net::HTTP.start(uri.host, uri.port, open_timeout: 2, read_timeout: 2) do |http|
|
||||
http.request_get(uri.request_uri)
|
||||
end
|
||||
rescue StandardError
|
||||
# best-effort OOB fetch
|
||||
end
|
||||
end
|
||||
escaped = payload.gsub('&', '&').gsub('"', '"').gsub('<', '<')
|
||||
"<?xml version=\"1.0\"?>\n<!DOCTYPE data [\n <!ENTITY xxe SYSTEM \"#{{escaped}}\">\n]>\n<data>&xxe;</data>"
|
||||
else
|
||||
payload
|
||||
end
|
||||
end
|
||||
|
||||
def _nyx_libxml_parse(payload)
|
||||
# Real parser hook: REXML parses `<!ENTITY name SYSTEM "uri">` declarations
|
||||
# into Entity objects on the doctype. Inspect the entities table to
|
||||
# detect every external-entity reference the parser registered.
|
||||
expanded = false
|
||||
begin
|
||||
doc = REXML::Document.new(payload)
|
||||
doc = REXML::Document.new(_nyx_build_xxe_document(payload))
|
||||
if doc.doctype
|
||||
doc.doctype.entities.each_value do |ent|
|
||||
s = ent.to_s
|
||||
|
|
@ -1042,7 +1072,7 @@ _nyx_xxe_probe(payload, expanded)
|
|||
STDOUT.puts '__NYX_SINK_HIT__'
|
||||
STDOUT.puts JSON.generate({{"entity_expanded" => expanded}})
|
||||
STDOUT.flush
|
||||
"#
|
||||
"##
|
||||
);
|
||||
HarnessSource {
|
||||
source: body,
|
||||
|
|
|
|||
|
|
@ -83,7 +83,12 @@ fn xxe_unsupported_caps_unchanged_for_other_langs() {
|
|||
fn benign_control_resolves_within_lang_slice() {
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
|
||||
// Skip the OOB-nonce variant — it self-confirms via
|
||||
// [`Oracle::OobCallback`] and carries no paired benign control.
|
||||
let vuln = slice
|
||||
.iter()
|
||||
.find(|p| !p.is_benign && !p.oob_nonce_slot)
|
||||
.unwrap();
|
||||
let resolved =
|
||||
resolve_benign_control_lang(vuln, Cap::XXE, *lang).expect("paired control");
|
||||
assert!(resolved.is_benign);
|
||||
|
|
@ -96,7 +101,13 @@ fn benign_control_resolves_within_lang_slice() {
|
|||
fn payload_oracle_carries_xxe_entity_expanded_predicate() {
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
|
||||
// The doctype-entity vuln carries the XxeEntityExpanded predicate.
|
||||
// The OOB-nonce variant uses [`Oracle::OobCallback`] and is exercised
|
||||
// by `python_xxe_oob_loopback_records_callback` instead.
|
||||
let vuln = slice
|
||||
.iter()
|
||||
.find(|p| !p.is_benign && !p.oob_nonce_slot)
|
||||
.unwrap();
|
||||
match &vuln.oracle {
|
||||
Oracle::SinkProbe { predicates } => {
|
||||
assert!(
|
||||
|
|
@ -117,10 +128,15 @@ fn vuln_payload_bytes_contain_doctype_entity_declaration() {
|
|||
// The whole differential rule rests on the vuln payload carrying
|
||||
// an `<!ENTITY … SYSTEM "…">` decl and the benign control NOT
|
||||
// carrying one — pin both invariants so a future corpus tweak
|
||||
// does not silently break the oracle.
|
||||
// does not silently break the oracle. The OOB-nonce variant's
|
||||
// `bytes` field is unused (the runner materialises a URL at call
|
||||
// time and the harness wraps it into the DTD), so skip it here.
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
|
||||
let vuln = slice
|
||||
.iter()
|
||||
.find(|p| !p.is_benign && !p.oob_nonce_slot)
|
||||
.unwrap();
|
||||
let benign = slice.iter().find(|p| p.is_benign).unwrap();
|
||||
let vuln_text = std::str::from_utf8(vuln.bytes).unwrap();
|
||||
let benign_text = std::str::from_utf8(benign.bytes).unwrap();
|
||||
|
|
@ -429,16 +445,42 @@ mod e2e_phase_05 {
|
|||
backend: SandboxBackend::Process,
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
match run_spec(&spec, &opts) {
|
||||
Ok(outcome) => Some(outcome),
|
||||
Err(RunError::BuildFailed { stderr, attempts }) => {
|
||||
eprintln!(
|
||||
"SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}",
|
||||
);
|
||||
None
|
||||
// JVM startup occasionally fails under heavy cross-binary nextest
|
||||
// load with "Error occurred during initialization of VM: Properties
|
||||
// init: Could not determine current working directory." This is a
|
||||
// macOS getcwd() race under massive fork() churn, not a regression.
|
||||
// Retry up to 3 times; the second attempt almost always succeeds.
|
||||
for attempt in 0..3 {
|
||||
match run_spec(&spec, &opts) {
|
||||
Ok(outcome) => {
|
||||
if is_jvm_cwd_flake(&outcome) && attempt < 2 {
|
||||
eprintln!(
|
||||
"RETRY {lang:?} {fixture}: JVM cwd flake on attempt {attempt}",
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_millis(200));
|
||||
continue;
|
||||
}
|
||||
return Some(outcome);
|
||||
}
|
||||
Err(RunError::BuildFailed { stderr, attempts }) => {
|
||||
eprintln!(
|
||||
"SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}",
|
||||
);
|
||||
return None;
|
||||
}
|
||||
Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"),
|
||||
}
|
||||
Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"),
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn is_jvm_cwd_flake(outcome: &RunOutcome) -> bool {
|
||||
outcome.attempts.iter().any(|a| {
|
||||
let stdout = std::str::from_utf8(&a.outcome.stdout).unwrap_or("");
|
||||
let stderr = std::str::from_utf8(&a.outcome.stderr).unwrap_or("");
|
||||
stdout.contains("Could not determine current working directory")
|
||||
|| stderr.contains("Could not determine current working directory")
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -510,4 +552,116 @@ mod e2e_phase_05 {
|
|||
.expect("Confirmed run must carry a DifferentialOutcome");
|
||||
assert_eq!(diff.verdict, DifferentialVerdict::Confirmed);
|
||||
}
|
||||
|
||||
/// Phase 05 OOB-loopback observation: when an [`nyx_scanner::dynamic::oob::OobListener`]
|
||||
/// is attached and the runner exercises the `xxe-<lang>-oob-nonce`
|
||||
/// payload, the parser's external-entity hook performs a real HTTP
|
||||
/// GET against the loopback nonce URL and the listener records the
|
||||
/// hit. Asserts the observation half of the Phase 05 OOB closure;
|
||||
/// the verdict-tier promotion (Confirmed → Confirmed+ProvenOob) is
|
||||
/// broader runner-rework tracked separately in
|
||||
/// `.pitboss/play/deferred.md`.
|
||||
fn run_oob(lang: Lang, fixture: &str, entry_name: &str) -> Option<RunOutcome> {
|
||||
use nyx_scanner::dynamic::oob::OobListener;
|
||||
use nyx_scanner::dynamic::sandbox::NetworkPolicy;
|
||||
use std::sync::Arc;
|
||||
|
||||
let bin = toolchain_for(lang);
|
||||
if !command_available(bin) {
|
||||
eprintln!("SKIP {lang:?} {fixture} (oob): missing toolchain {bin}");
|
||||
return None;
|
||||
}
|
||||
let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
|
||||
|
||||
let listener = Arc::new(OobListener::bind().expect("bind OOB listener on loopback"));
|
||||
let (mut spec, _tmp) = build_spec(lang, fixture, entry_name);
|
||||
// Use a distinct workdir from the non-OOB e2e tests so the probe
|
||||
// channel files do not collide (both tests use the same fixture, so
|
||||
// the default spec_hash would resolve to the same
|
||||
// `/tmp/nyx-harness/<spec_hash>/__nyx_probes.jsonl` and the two runs
|
||||
// could clobber each other's drains under parallel nextest).
|
||||
spec.spec_hash = format!("{}-oob", spec.spec_hash);
|
||||
spec.finding_id = spec.spec_hash.clone();
|
||||
if matches!(lang, Lang::Java) {
|
||||
let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec.spec_hash);
|
||||
let _ = std::fs::remove_dir_all(&workdir);
|
||||
}
|
||||
|
||||
let opts = SandboxOptions {
|
||||
backend: SandboxBackend::Process,
|
||||
network_policy: NetworkPolicy::OobOutbound {
|
||||
listener: Arc::clone(&listener),
|
||||
},
|
||||
..SandboxOptions::default()
|
||||
};
|
||||
|
||||
for attempt in 0..3 {
|
||||
match run_spec(&spec, &opts) {
|
||||
Ok(outcome) => {
|
||||
if is_jvm_cwd_flake(&outcome) && attempt < 2 {
|
||||
eprintln!(
|
||||
"RETRY {lang:?} {fixture} (oob): JVM cwd flake on attempt {attempt}",
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_millis(200));
|
||||
continue;
|
||||
}
|
||||
return Some(outcome);
|
||||
}
|
||||
Err(RunError::BuildFailed { stderr, attempts }) => {
|
||||
eprintln!(
|
||||
"SKIP {lang:?} {fixture} (oob): build failed after {attempts}: {stderr}",
|
||||
);
|
||||
return None;
|
||||
}
|
||||
Err(e) => panic!("run_spec({lang:?} {fixture} oob) errored: {e:?}"),
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn assert_oob_recorded(outcome: &RunOutcome, label: &str) {
|
||||
let oob_attempt = outcome
|
||||
.attempts
|
||||
.iter()
|
||||
.find(|a| a.payload_label == label)
|
||||
.unwrap_or_else(|| {
|
||||
panic!(
|
||||
"OOB payload {label:?} must run when listener is attached; outcome={outcome:?}"
|
||||
)
|
||||
});
|
||||
assert!(
|
||||
oob_attempt.outcome.oob_callback_seen,
|
||||
"parser external-entity hook must fetch loopback URL so OOB listener records the nonce; got attempt={oob_attempt:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_xxe_oob_loopback_records_callback() {
|
||||
let Some(outcome) = run_oob(Lang::Python, "vuln.py", "run") else { return };
|
||||
assert_oob_recorded(&outcome, "xxe-python-oob-nonce");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn java_xxe_oob_loopback_records_callback() {
|
||||
let Some(outcome) = run_oob(Lang::Java, "Vuln.java", "run") else { return };
|
||||
assert_oob_recorded(&outcome, "xxe-java-oob-nonce");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn php_xxe_oob_loopback_records_callback() {
|
||||
let Some(outcome) = run_oob(Lang::Php, "vuln.php", "run") else { return };
|
||||
assert_oob_recorded(&outcome, "xxe-php-oob-nonce");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ruby_xxe_oob_loopback_records_callback() {
|
||||
let Some(outcome) = run_oob(Lang::Ruby, "vuln.rb", "run") else { return };
|
||||
assert_oob_recorded(&outcome, "xxe-ruby-oob-nonce");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_xxe_oob_loopback_records_callback() {
|
||||
let Some(outcome) = run_oob(Lang::Go, "vuln.go", "run") else { return };
|
||||
assert_oob_recorded(&outcome, "xxe-go-oob-nonce");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue