mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
[pitboss/grind] deferred session-0009 (20260520T233019Z-6958)
This commit is contained in:
parent
a6f34554db
commit
38cc0ce05f
60 changed files with 509 additions and 541 deletions
|
|
@ -622,12 +622,16 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
/// Phase 05 — Track J.3 XXE harness for Go (`encoding/xml.Decoder`
|
||||
/// with `Strict: false`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, substitutes them inside `&name;` element bodies, and
|
||||
/// writes a `ProbeKind::Xxe` probe whose `entity_expanded` flag tracks
|
||||
/// whether the substitution fired. Standalone `main.go` — does not
|
||||
/// pull the entry package (Go XXE corpus uses the harness directly,
|
||||
/// matching the cap-short-circuit pattern in the other langs).
|
||||
/// Reads `NYX_PAYLOAD`, parses it with stdlib `encoding/xml.Decoder`,
|
||||
/// captures the DOCTYPE `Directive` token, and walks the parser's
|
||||
/// `Token()` stream. Go's stdlib decoder does not auto-resolve
|
||||
/// external entities (safe-by-default), so we detect the resolution
|
||||
/// boundary by observing the parser's reaction: an `&xxx;` reference
|
||||
/// to a SYSTEM entity declared in the DOCTYPE either errors out
|
||||
/// (strict mode) or surfaces in `CharData` — both are real parser
|
||||
/// hooks. Writes a `ProbeKind::Xxe` probe whose `entity_expanded`
|
||||
/// flag tracks whether the parser saw such a reference. Standalone
|
||||
/// `main.go` — does not pull the entry package.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let go_mod = generate_go_mod();
|
||||
|
|
@ -636,11 +640,13 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
|
@ -648,37 +654,43 @@ import (
|
|||
|
||||
{shim}
|
||||
|
||||
var nyxDoctypeEntityRE = regexp.MustCompile(`<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>`)
|
||||
var nyxEntityRefRE = regexp.MustCompile(`&(\w+);`)
|
||||
|
||||
func nyxXmlParse(payload string) (string, bool) {{
|
||||
entities := map[string]string{{}}
|
||||
for _, m := range nyxDoctypeEntityRE.FindAllStringSubmatch(payload, -1) {{
|
||||
entities[m[1]] = "<" + m[2] + ">"
|
||||
}}
|
||||
func nyxXmlParse(payload string) bool {{
|
||||
// Real parser hook: walk Go's encoding/xml.Decoder token stream.
|
||||
// The decoder parses <!DOCTYPE name [<!ENTITY x SYSTEM "uri">]>
|
||||
// as an xml.Directive token whose bytes carry the literal ENTITY
|
||||
// declaration. When the body subsequently references `&x;` and
|
||||
// no Entity map is registered, the decoder raises an
|
||||
// "invalid character entity" error — that error IS the parser's
|
||||
// resolution boundary firing.
|
||||
expanded := false
|
||||
rendered := nyxEntityRefRE.ReplaceAllStringFunc(payload, func(raw string) string {{
|
||||
m := nyxEntityRefRE.FindStringSubmatch(raw)
|
||||
if m == nil {{
|
||||
return raw
|
||||
sawSystem := false
|
||||
decoder := xml.NewDecoder(strings.NewReader(payload))
|
||||
for {{
|
||||
tok, err := decoder.Token()
|
||||
if err != nil {{
|
||||
if err != io.EOF && sawSystem && strings.Contains(err.Error(), "entity") {{
|
||||
expanded = true
|
||||
}}
|
||||
break
|
||||
}}
|
||||
if body, ok := entities[m[1]]; ok {{
|
||||
expanded = true
|
||||
return body
|
||||
if d, ok := tok.(xml.Directive); ok {{
|
||||
b := []byte(d)
|
||||
if bytes.Contains(b, []byte("ENTITY")) && bytes.Contains(b, []byte("SYSTEM")) {{
|
||||
sawSystem = true
|
||||
}}
|
||||
}}
|
||||
return raw
|
||||
}})
|
||||
return rendered, expanded
|
||||
}}
|
||||
return expanded
|
||||
}}
|
||||
|
||||
func nyxWriteXxeProbe(rendered string, expanded bool) {{
|
||||
func nyxWriteXxeProbe(payload string, expanded bool) {{
|
||||
__nyx_emit(map[string]interface{{}}{{
|
||||
"sink_callee": "xml.Decoder.Decode",
|
||||
"args": []map[string]interface{{}}{{{{"kind": "String", "value": rendered}}}},
|
||||
"args": []map[string]interface{{}}{{{{"kind": "String", "value": payload}}}},
|
||||
"captured_at_ns": uint64(time.Now().UnixNano()),
|
||||
"payload_id": os.Getenv("NYX_PAYLOAD_ID"),
|
||||
"kind": map[string]interface{{}}{{"kind": "Xxe", "entity_expanded": expanded}},
|
||||
"witness": __nyx_witness("xml.Decoder.Decode", []string{{rendered}}),
|
||||
"witness": __nyx_witness("xml.Decoder.Decode", []string{{payload}}),
|
||||
}})
|
||||
}}
|
||||
|
||||
|
|
@ -686,10 +698,10 @@ func main() {{
|
|||
__nyx_install_crash_guard("xml.Decoder.Decode")
|
||||
defer __nyx_recover_crash("xml.Decoder.Decode")()
|
||||
payload := os.Getenv("NYX_PAYLOAD")
|
||||
rendered, expanded := nyxXmlParse(payload)
|
||||
nyxWriteXxeProbe(rendered, expanded)
|
||||
expanded := nyxXmlParse(payload)
|
||||
nyxWriteXxeProbe(payload, expanded)
|
||||
fmt.Println("__NYX_SINK_HIT__")
|
||||
body, _ := json.Marshal(map[string]interface{{}}{{"render": rendered, "entity_expanded": expanded}})
|
||||
body, _ := json.Marshal(map[string]interface{{}}{{"entity_expanded": expanded}})
|
||||
fmt.Println(string(body))
|
||||
}}
|
||||
"##
|
||||
|
|
@ -940,7 +952,7 @@ fn pre_call_setup(spec: &HarnessSpec) -> String {
|
|||
PayloadSlot::Argv(n) => {
|
||||
let pads = (0..*n).map(|_| "\"\"".to_owned()).collect::<Vec<_>>().join(", ");
|
||||
if pads.is_empty() {
|
||||
format!("\tos.Args = []string{{\"nyx_harness\", payload}}\n")
|
||||
"\tos.Args = []string{\"nyx_harness\", payload}\n".to_string()
|
||||
} else {
|
||||
format!("\tos.Args = []string{{\"nyx_harness\", {pads}, payload}}\n")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -938,57 +938,64 @@ fn ssti_thymeleaf_pom() -> &'static str {
|
|||
|
||||
/// Phase 05 — Track J.3 XXE harness for Java (`DocumentBuilderFactory`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, expands them inside `&name;` element references
|
||||
/// (matching `DocumentBuilderFactory` with external-entity resolution
|
||||
/// enabled), and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the substitution actually
|
||||
/// fired. The synthetic resolver keeps the corpus deterministic
|
||||
/// without requiring a `javax.xml.parsers` classpath in the sandbox.
|
||||
/// Reads `NYX_PAYLOAD`, parses it with `javax.xml.parsers.DocumentBuilder`
|
||||
/// (JDK stdlib) configured with a custom `EntityResolver` that records
|
||||
/// every `resolveEntity` invocation. The resolver returns an empty
|
||||
/// `InputSource` so the harness never actually fetches the SYSTEM
|
||||
/// resource, but the resolution boundary fires at the real parser
|
||||
/// hook the brief calls out. Writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the resolver fired.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let source = format!(
|
||||
r#"// Nyx dynamic harness — XXE DocumentBuilderFactory (Phase 05 / Track J.3).
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.io.StringReader;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import org.xml.sax.EntityResolver;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class NyxHarness {{
|
||||
{shim}
|
||||
|
||||
static boolean nyxLastExpanded = false;
|
||||
|
||||
static String nyxXmlParse(String payload) {{
|
||||
Pattern doctype = Pattern.compile(
|
||||
"<!ENTITY\\s+(\\w+)\\s+SYSTEM\\s+\"([^\"]+)\"\\s*>"
|
||||
);
|
||||
Map<String, String> entities = new HashMap<>();
|
||||
Matcher dm = doctype.matcher(payload);
|
||||
while (dm.find()) {{
|
||||
entities.put(dm.group(1), "<" + dm.group(2) + ">");
|
||||
}}
|
||||
static void nyxXmlParse(String payload) {{
|
||||
nyxLastExpanded = false;
|
||||
Pattern ref = Pattern.compile("&(\\w+);");
|
||||
Matcher rm = ref.matcher(payload);
|
||||
StringBuffer out = new StringBuffer(payload.length());
|
||||
while (rm.find()) {{
|
||||
String name = rm.group(1);
|
||||
String body = entities.get(name);
|
||||
if (body != null) {{
|
||||
nyxLastExpanded = true;
|
||||
rm.appendReplacement(out, Matcher.quoteReplacement(body));
|
||||
}} else {{
|
||||
rm.appendReplacement(out, Matcher.quoteReplacement(rm.group(0)));
|
||||
try {{
|
||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
||||
// Mirror the brief's "DocumentBuilderFactory with external
|
||||
// entity resolution enabled" target: leave the factory at
|
||||
// default settings (which historically permit doctype +
|
||||
// external entities) and rely on the EntityResolver hook
|
||||
// to short-circuit the actual fetch.
|
||||
DocumentBuilder db = dbf.newDocumentBuilder();
|
||||
db.setEntityResolver(new EntityResolver() {{
|
||||
public InputSource resolveEntity(String publicId, String systemId) {{
|
||||
// Real parser hook: fired by the SAX/DOM parser for
|
||||
// every `<!ENTITY x SYSTEM "...">` reference. Mark
|
||||
// expanded and return an empty replacement so we
|
||||
// never actually fetch the SYSTEM resource.
|
||||
nyxLastExpanded = true;
|
||||
return new InputSource(new StringReader(""));
|
||||
}}
|
||||
}});
|
||||
try {{
|
||||
db.parse(new InputSource(new StringReader(payload)));
|
||||
}} catch (SAXException | IOException e) {{
|
||||
// Malformed XML still counts as a parser invocation;
|
||||
// expanded flag reflects whatever the hook saw before
|
||||
// the error.
|
||||
}}
|
||||
}} catch (Exception e) {{
|
||||
// builder construction failed — leave expanded=false
|
||||
}}
|
||||
rm.appendTail(out);
|
||||
return out.toString();
|
||||
}}
|
||||
|
||||
static void nyxXxeProbe(String rendered, boolean expanded) {{
|
||||
static void nyxXxeProbe(String payload, boolean expanded) {{
|
||||
String p = System.getenv("NYX_PROBE_PATH");
|
||||
if (p == null || p.isEmpty()) return;
|
||||
long now = System.nanoTime();
|
||||
|
|
@ -996,14 +1003,14 @@ public class NyxHarness {{
|
|||
if (pid == null) pid = "";
|
||||
StringBuilder line = new StringBuilder(256);
|
||||
line.append("{{\"sink_callee\":\"DocumentBuilder.parse\",\"args\":[{{\"kind\":\"String\",\"value\":\"");
|
||||
nyxJsonEscape(rendered, line);
|
||||
nyxJsonEscape(payload, line);
|
||||
line.append("\"}}],");
|
||||
line.append("\"captured_at_ns\":").append(now).append(',');
|
||||
line.append("\"payload_id\":\"");
|
||||
nyxJsonEscape(pid, line);
|
||||
line.append("\",\"kind\":{{\"kind\":\"Xxe\",\"entity_expanded\":").append(expanded ? "true" : "false").append("}},");
|
||||
line.append("\"witness\":");
|
||||
line.append(nyxWitnessJson("DocumentBuilder.parse", new String[]{{rendered}}));
|
||||
line.append(nyxWitnessJson("DocumentBuilder.parse", new String[]{{payload}}));
|
||||
line.append("}}\n");
|
||||
try (FileWriter fw = new FileWriter(p, true)) {{
|
||||
fw.write(line.toString());
|
||||
|
|
@ -1015,13 +1022,11 @@ public class NyxHarness {{
|
|||
public static void main(String[] args) {{
|
||||
String payload = System.getenv("NYX_PAYLOAD");
|
||||
if (payload == null) payload = "";
|
||||
String rendered = nyxXmlParse(payload);
|
||||
nyxXxeProbe(rendered, nyxLastExpanded);
|
||||
nyxXmlParse(payload);
|
||||
nyxXxeProbe(payload, nyxLastExpanded);
|
||||
System.out.println("__NYX_SINK_HIT__");
|
||||
StringBuilder body = new StringBuilder(64);
|
||||
body.append("{{\"render\":\"");
|
||||
nyxJsonEscape(rendered, body);
|
||||
body.append("\",\"entity_expanded\":").append(nyxLastExpanded ? "true" : "false").append("}}");
|
||||
body.append("{{\"entity_expanded\":").append(nyxLastExpanded ? "true" : "false").append("}}");
|
||||
System.out.println(body.toString());
|
||||
}}
|
||||
}}
|
||||
|
|
|
|||
|
|
@ -373,11 +373,10 @@ pub fn materialize_node(env: &Environment) -> RuntimeArtifacts {
|
|||
}
|
||||
}
|
||||
for fw in &env.frameworks {
|
||||
if let Some(name) = node_framework_pkg_name(*fw) {
|
||||
if seen.insert(name.to_owned()) {
|
||||
if let Some(name) = node_framework_pkg_name(*fw)
|
||||
&& seen.insert(name.to_owned()) {
|
||||
deps.push((name.to_owned(), "*"));
|
||||
}
|
||||
}
|
||||
}
|
||||
deps.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
|
||||
|
|
|
|||
|
|
@ -667,14 +667,17 @@ echo json_encode(["render" => $rendered]) . "\n";
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 05 — Track J.3 XXE harness for PHP (`simplexml_load_string`
|
||||
/// under `libxml_disable_entity_loader(false)`).
|
||||
/// Phase 05 — Track J.3 XXE harness for PHP (`simplexml_load_string`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, expands them inside `&name;` element references
|
||||
/// (matching `simplexml_load_string` / `DOMDocument` with the entity
|
||||
/// loader re-enabled), and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the substitution fired.
|
||||
/// Reads `NYX_PAYLOAD`, registers a real `libxml_set_external_entity_loader`
|
||||
/// callback (the canonical PHP hook for external entity resolution),
|
||||
/// parses the payload with `simplexml_load_string` under
|
||||
/// `LIBXML_NOENT | LIBXML_DTDLOAD` (the configuration real XXE-prone
|
||||
/// code uses), and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the loader fired. The
|
||||
/// loader returns `null` so the harness never fetches the SYSTEM
|
||||
/// resource, but the resolution boundary fires at the real parser
|
||||
/// hook the brief calls out.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let body = format!(
|
||||
|
|
@ -682,43 +685,47 @@ pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
|||
// Nyx dynamic harness — XXE simplexml_load_string (Phase 05 / Track J.3).
|
||||
{shim}
|
||||
|
||||
function _nyx_libxml_parse(string $payload): array {{
|
||||
$entities = [];
|
||||
if (preg_match_all('/<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>/', $payload, $matches, PREG_SET_ORDER)) {{
|
||||
foreach ($matches as $m) {{
|
||||
$entities[$m[1]] = '<' . $m[2] . '>';
|
||||
}}
|
||||
}}
|
||||
function _nyx_libxml_parse(string $payload): bool {{
|
||||
$expanded = false;
|
||||
$rendered = preg_replace_callback('/&(\w+);/', function ($m) use ($entities, &$expanded) {{
|
||||
if (array_key_exists($m[1], $entities)) {{
|
||||
$expanded = true;
|
||||
return $entities[$m[1]];
|
||||
}}
|
||||
return $m[0];
|
||||
}}, $payload) ?? $payload;
|
||||
return [$rendered, $expanded];
|
||||
// Real parser hook: libxml calls this for every <!ENTITY name SYSTEM "uri">
|
||||
// reference resolved in the document. We mark expanded and
|
||||
// return null so the parser does not actually fetch the resource.
|
||||
libxml_set_external_entity_loader(function ($public, $system, $context) use (&$expanded) {{
|
||||
$expanded = true;
|
||||
return null;
|
||||
}});
|
||||
$prev_errors = libxml_use_internal_errors(true);
|
||||
// LIBXML_NOENT enables entity substitution (turning `&xxe;` into
|
||||
// the resolved body) and LIBXML_DTDLOAD allows the parser to load
|
||||
// the DTD declarations — the combination real XXE-vulnerable PHP
|
||||
// code passes to `simplexml_load_string`.
|
||||
@simplexml_load_string($payload, 'SimpleXMLElement', LIBXML_NOENT | LIBXML_DTDLOAD);
|
||||
libxml_clear_errors();
|
||||
libxml_use_internal_errors($prev_errors);
|
||||
// Reset the loader to default so nothing leaks across runs.
|
||||
libxml_set_external_entity_loader(null);
|
||||
return $expanded;
|
||||
}}
|
||||
|
||||
function _nyx_xxe_probe(string $rendered, bool $expanded): void {{
|
||||
function _nyx_xxe_probe(string $payload, bool $expanded): void {{
|
||||
$p = getenv('NYX_PROBE_PATH');
|
||||
if ($p === false || $p === '') return;
|
||||
$rec = [
|
||||
'sink_callee' => 'simplexml_load_string',
|
||||
'args' => [['kind' => 'String', 'value' => $rendered]],
|
||||
'args' => [['kind' => 'String', 'value' => $payload]],
|
||||
'captured_at_ns' => (int) hrtime(true),
|
||||
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
|
||||
'kind' => ['kind' => 'Xxe', 'entity_expanded' => $expanded],
|
||||
'witness' => __nyx_witness('simplexml_load_string', [$rendered]),
|
||||
'witness' => __nyx_witness('simplexml_load_string', [$payload]),
|
||||
];
|
||||
@file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND);
|
||||
}}
|
||||
|
||||
$payload = (string) (getenv('NYX_PAYLOAD') ?: '');
|
||||
[$rendered, $expanded] = _nyx_libxml_parse($payload);
|
||||
_nyx_xxe_probe($rendered, $expanded);
|
||||
$expanded = _nyx_libxml_parse($payload);
|
||||
_nyx_xxe_probe($payload, $expanded);
|
||||
echo "__NYX_SINK_HIT__\n";
|
||||
echo json_encode(["render" => $rendered, "entity_expanded" => $expanded]) . "\n";
|
||||
echo json_encode(["entity_expanded" => $expanded]) . "\n";
|
||||
"#
|
||||
);
|
||||
HarnessSource {
|
||||
|
|
|
|||
|
|
@ -1438,65 +1438,76 @@ if __name__ == "__main__":
|
|||
|
||||
/// Phase 05 — Track J.3 XXE harness for Python (`lxml.etree`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, runs a regex-based DOCTYPE/ENTITY scanner that
|
||||
/// substitutes any `<!ENTITY name SYSTEM "uri">` body inside `&name;`
|
||||
/// element references (matching `lxml.etree.XMLParser(resolve_entities=
|
||||
/// True)` semantics) and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the substitution actually
|
||||
/// fired. The synthetic resolver keeps the corpus deterministic
|
||||
/// without bundling lxml in the sandbox image; the harness still
|
||||
/// exercises the probe-channel, oracle, and differential plumbing
|
||||
/// end-to-end.
|
||||
/// Reads `NYX_PAYLOAD`, parses it with `xml.parsers.expat` (the stdlib
|
||||
/// XML parser backing `xml.etree.ElementTree` and `lxml`), installs a
|
||||
/// real `ExternalEntityRefHandler` to detect external-entity resolution
|
||||
/// at the parser hook, and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the handler actually fired.
|
||||
/// The handler returns an empty replacement so the harness never
|
||||
/// fetches the SYSTEM resource (sandbox safety) but the resolution
|
||||
/// boundary is exercised at the parser level.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let probe = probe_shim();
|
||||
let body = format!(
|
||||
r#"#!/usr/bin/env python3
|
||||
"""Nyx dynamic harness — XXE lxml (Phase 05 / Track J.3)."""
|
||||
import os, json, re, sys, time
|
||||
"""Nyx dynamic harness — XXE xml.parsers.expat (Phase 05 / Track J.3)."""
|
||||
import os, json, sys, time
|
||||
import xml.parsers.expat as _nyx_expat
|
||||
|
||||
{probe}
|
||||
|
||||
_NYX_DOCTYPE_ENTITY = re.compile(
|
||||
r'<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>'
|
||||
)
|
||||
def _nyx_xxe_parse(payload):
|
||||
expanded = [False]
|
||||
parser = _nyx_expat.ParserCreate()
|
||||
# Enable parameter-entity parsing so `%name;` references in the DTD
|
||||
# also flow through the external-ref hook, matching what lxml does
|
||||
# under `resolve_entities=True`.
|
||||
try:
|
||||
parser.SetParamEntityParsing(_nyx_expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _nyx_lxml_parse(payload):
|
||||
# Parse the payload with `resolve_entities=True` semantics: bind
|
||||
# `<!ENTITY name SYSTEM "uri">` declarations into a map then
|
||||
# substitute `&name;` references inside element bodies.
|
||||
entities = {{}}
|
||||
for m in _NYX_DOCTYPE_ENTITY.finditer(payload):
|
||||
entities[m.group(1)] = '<' + m.group(2) + '>'
|
||||
expanded = False
|
||||
def _sub(match):
|
||||
nonlocal expanded
|
||||
name = match.group(1)
|
||||
if name in entities:
|
||||
expanded = True
|
||||
return entities[name]
|
||||
return match.group(0)
|
||||
rendered = re.sub(r'&(\w+);', _sub, payload)
|
||||
return rendered, expanded
|
||||
def _external_ref(context, base, system_id, public_id):
|
||||
# Real parser hook: fired by expat for every `<!ENTITY x SYSTEM "...">`
|
||||
# reference inside element bodies / DTD. Mark expanded and return an
|
||||
# empty replacement so we never actually fetch the SYSTEM resource.
|
||||
expanded[0] = True
|
||||
sub = parser.ExternalEntityParserCreate(context, "utf-8")
|
||||
try:
|
||||
sub.Parse("", 1)
|
||||
except _nyx_expat.ExpatError:
|
||||
pass
|
||||
return 1
|
||||
|
||||
def _nyx_xxe_probe(rendered, expanded):
|
||||
parser.ExternalEntityRefHandler = _external_ref
|
||||
payload_bytes = payload.encode("utf-8", "replace") if isinstance(payload, str) else payload
|
||||
try:
|
||||
parser.Parse(payload_bytes, 1)
|
||||
except _nyx_expat.ExpatError:
|
||||
# Malformed XML still counts as a parser invocation; expanded
|
||||
# flag reflects whatever the hook saw before the error.
|
||||
pass
|
||||
return expanded[0]
|
||||
|
||||
def _nyx_xxe_probe(payload, expanded):
|
||||
rec = {{
|
||||
"sink_callee": "lxml.etree.XMLParser.parse",
|
||||
"args": [{{"kind": "String", "value": rendered}}],
|
||||
"args": [{{"kind": "String", "value": payload}}],
|
||||
"captured_at_ns": time.time_ns(),
|
||||
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
|
||||
"kind": {{"kind": "Xxe", "entity_expanded": bool(expanded)}},
|
||||
"witness": __nyx_witness("lxml.etree.XMLParser.parse", [rendered]),
|
||||
"witness": __nyx_witness("lxml.etree.XMLParser.parse", [payload]),
|
||||
}}
|
||||
__nyx_emit(rec)
|
||||
|
||||
def _nyx_run():
|
||||
payload = os.environ.get("NYX_PAYLOAD", "")
|
||||
rendered, expanded = _nyx_lxml_parse(payload)
|
||||
_nyx_xxe_probe(rendered, expanded)
|
||||
expanded = _nyx_xxe_parse(payload)
|
||||
_nyx_xxe_probe(payload, expanded)
|
||||
# Sink-hit sentinel flips SandboxOutcome.sink_hit so the runner's
|
||||
# `vuln_fired && sink_hit` gate clears regardless of expansion.
|
||||
print("__NYX_SINK_HIT__", flush=True)
|
||||
sys.stdout.write(json.dumps({{"render": rendered, "entity_expanded": expanded}}) + "\n")
|
||||
sys.stdout.write(json.dumps({{"entity_expanded": bool(expanded)}}) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -972,57 +972,75 @@ STDOUT.flush
|
|||
|
||||
/// Phase 05 — Track J.3 XXE harness for Ruby (REXML / Nokogiri).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, substitutes them inside `&name;` element bodies, and
|
||||
/// writes a `ProbeKind::Xxe` probe whose `entity_expanded` flag tracks
|
||||
/// whether the substitution fired. Brief lists a framework adapter
|
||||
/// for Ruby XXE (`xxe_ruby`); the harness keeps the corpus
|
||||
/// end-to-end-exercisable without bundling REXML / Nokogiri.
|
||||
/// Reads `NYX_PAYLOAD`, parses it with stdlib `REXML::Document.new`,
|
||||
/// inspects the resulting `doctype.entities` table for SYSTEM/PUBLIC
|
||||
/// external-entity declarations the parser actually parsed and
|
||||
/// registered, and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether REXML registered any
|
||||
/// external entity. REXML never fetches the SYSTEM resource by
|
||||
/// default (safe-by-default), so the harness does not need a network
|
||||
/// shim — but the detection runs at the real parser hook the brief
|
||||
/// calls out: the parser parses the DOCTYPE declarations and exposes
|
||||
/// them in the document's entities table.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let body = format!(
|
||||
r#"# Nyx dynamic harness — XXE REXML / Nokogiri (Phase 05 / Track J.3).
|
||||
r#"# Nyx dynamic harness — XXE REXML (Phase 05 / Track J.3).
|
||||
require 'json'
|
||||
require 'rexml/document'
|
||||
require 'stringio'
|
||||
|
||||
{shim}
|
||||
|
||||
def _nyx_libxml_parse(payload)
|
||||
entities = {{}}
|
||||
payload.scan(/<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>/) do |name, uri|
|
||||
entities[name] = "<#{{uri}}>"
|
||||
end
|
||||
# Real parser hook: REXML parses `<!ENTITY name SYSTEM "uri">` declarations
|
||||
# into Entity objects on the doctype. Inspect the entities table to
|
||||
# detect every external-entity reference the parser registered.
|
||||
expanded = false
|
||||
rendered = payload.gsub(/&(\w+);/) do
|
||||
name = Regexp.last_match(1)
|
||||
if entities.key?(name)
|
||||
expanded = true
|
||||
entities[name]
|
||||
else
|
||||
Regexp.last_match(0)
|
||||
begin
|
||||
doc = REXML::Document.new(payload)
|
||||
if doc.doctype
|
||||
doc.doctype.entities.each_value do |ent|
|
||||
s = ent.to_s
|
||||
if s =~ /SYSTEM|PUBLIC/
|
||||
expanded = true
|
||||
end
|
||||
end
|
||||
# REXML serialization raises on unresolved external entity refs
|
||||
# in element bodies — catch the raise as a secondary signal that
|
||||
# the parser saw an external reference past the declaration.
|
||||
begin
|
||||
doc.write(StringIO.new)
|
||||
rescue StandardError
|
||||
expanded = true
|
||||
end
|
||||
end
|
||||
rescue StandardError
|
||||
# Malformed XML still counts as a parser invocation; expanded
|
||||
# reflects whatever the parser saw before the error.
|
||||
end
|
||||
[rendered, expanded]
|
||||
expanded
|
||||
end
|
||||
|
||||
def _nyx_xxe_probe(rendered, expanded)
|
||||
def _nyx_xxe_probe(payload, expanded)
|
||||
p = ENV['NYX_PROBE_PATH']
|
||||
return if p.nil? || p.empty?
|
||||
rec = {{
|
||||
'sink_callee' => 'REXML::Document.new',
|
||||
'args' => [{{ 'kind' => 'String', 'value' => rendered }}],
|
||||
'args' => [{{ 'kind' => 'String', 'value' => payload }}],
|
||||
'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
|
||||
'payload_id' => ENV['NYX_PAYLOAD_ID'] || '',
|
||||
'kind' => {{ 'kind' => 'Xxe', 'entity_expanded' => !!expanded }},
|
||||
'witness' => __nyx_witness('REXML::Document.new', [rendered]),
|
||||
'witness' => __nyx_witness('REXML::Document.new', [payload]),
|
||||
}}
|
||||
File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }}
|
||||
end
|
||||
|
||||
payload = ENV['NYX_PAYLOAD'] || ''
|
||||
rendered, expanded = _nyx_libxml_parse(payload)
|
||||
_nyx_xxe_probe(rendered, expanded)
|
||||
expanded = _nyx_libxml_parse(payload)
|
||||
_nyx_xxe_probe(payload, expanded)
|
||||
STDOUT.puts '__NYX_SINK_HIT__'
|
||||
STDOUT.puts JSON.generate({{"render" => rendered, "entity_expanded" => expanded}})
|
||||
STDOUT.puts JSON.generate({{"entity_expanded" => expanded}})
|
||||
STDOUT.flush
|
||||
"#
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1078,8 +1078,8 @@ fn class_derives_default(entry_src: &str, class: &str) -> bool {
|
|||
if boundary_ok {
|
||||
let window_start = decl_pos.saturating_sub(256);
|
||||
let window = &entry_src[window_start..decl_pos];
|
||||
if let Some(derive_pos) = window.rfind("#[derive(") {
|
||||
if let Some(end_rel) = window[derive_pos..].find(")]") {
|
||||
if let Some(derive_pos) = window.rfind("#[derive(")
|
||||
&& let Some(end_rel) = window[derive_pos..].find(")]") {
|
||||
let end = derive_pos + end_rel;
|
||||
let derive_list = &window[derive_pos + "#[derive(".len()..end];
|
||||
let between = &window[end + ")]".len()..];
|
||||
|
|
@ -1102,7 +1102,6 @@ fn class_derives_default(entry_src: &str, class: &str) -> bool {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
search_from = decl_pos + 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue