[pitboss] phase 07: Track J.5 + Track L.5 — XPATH_INJECTION corpus + XPath / DOM / lxml adapters

This commit is contained in:
pitboss 2026-05-17 23:47:12 -05:00
parent b2eeaabb09
commit a32075a756
38 changed files with 2111 additions and 67 deletions

View file

@ -564,6 +564,9 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION {
return Ok(emit_ldap_harness(spec));
}
if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION {
return Ok(emit_xpath_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = JavaShape::detect(spec, &entry_source);
@ -1080,6 +1083,132 @@ public class NyxHarness {{
}
}
/// Phase 07 — Track J.5 XPath-injection harness for Java
/// (`javax.xml.xpath.XPath.evaluate`).
///
/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='<payload>']`
/// expression, counts matching `<user>` nodes against the canonical
/// staged document, and writes a `ProbeKind::Xpath { nodes_returned }`
/// probe whose `n` is the count returned. Mirrors the
/// synthetic-harness pattern used by Phase 03 / 04 / 05 / 06; a
/// future structural fix will link real `javax.xml.xpath` via the
/// staged document.
pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME;
let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML;
let source = format!(
r#"// Nyx dynamic harness — XPATH_INJECTION javax.xml.xpath.XPath.evaluate (Phase 07 / Track J.5).
import java.io.FileWriter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class NyxHarness {{
{shim}
static final String[] NYX_XPATH_USERS = new String[] {{ "alice", "bob", "carol" }};
static int nyxXpathSelect(String expr) {{
String needle = "//user[@name=";
if (!expr.startsWith(needle)) return 0;
String rest = expr.substring(needle.length());
if (!rest.endsWith("]")) return 0;
String predicate = rest.substring(0, rest.length() - 1);
Matcher single = Pattern.compile("^'([^']*)'(.*)$").matcher(predicate);
if (single.find()) {{
String literal = single.group(1);
String tail = single.group(2).trim();
if (tail.isEmpty() || tail.equals("]")) {{
int count = 0;
for (String u : NYX_XPATH_USERS) if (u.equals(literal)) count++;
return count;
}}
if (Pattern.compile("^or\\s+", Pattern.CASE_INSENSITIVE).matcher(tail).find()) {{
return NYX_XPATH_USERS.length;
}}
}}
Matcher dbl = Pattern.compile("^\"([^\"]*)\"\\s*$").matcher(predicate);
if (dbl.find()) {{
String literal = dbl.group(1);
int count = 0;
for (String u : NYX_XPATH_USERS) if (u.equals(literal)) count++;
return count;
}}
if (Pattern.compile("^concat\\(", Pattern.CASE_INSENSITIVE).matcher(predicate).find()) {{
Matcher parts = Pattern.compile("'([^']*)'").matcher(predicate);
StringBuilder joined = new StringBuilder();
while (parts.find()) {{
String p = parts.group(1);
if (p.equals(",\"")) continue;
joined.append(p);
}}
String result = joined.toString().replace(",\"'\",", "'");
int count = 0;
for (String u : NYX_XPATH_USERS) if (u.equals(result)) count++;
return count;
}}
return NYX_XPATH_USERS.length;
}}
static void nyxXpathProbe(String expr, int nodesReturned) {{
String p = System.getenv("NYX_PROBE_PATH");
if (p == null || p.isEmpty()) return;
long now = System.nanoTime();
String pid = System.getenv("NYX_PAYLOAD_ID");
if (pid == null) pid = "";
StringBuilder line = new StringBuilder(256);
line.append("{{\"sink_callee\":\"javax.xml.xpath.XPath.evaluate\",\"args\":[{{\"kind\":\"String\",\"value\":\"");
nyxJsonEscape(expr, line);
line.append("\"}}],");
line.append("\"captured_at_ns\":").append(now).append(',');
line.append("\"payload_id\":\"");
nyxJsonEscape(pid, line);
line.append("\",\"kind\":{{\"kind\":\"Xpath\",\"nodes_returned\":").append(nodesReturned).append("}},");
line.append("\"witness\":");
line.append(nyxWitnessJson("javax.xml.xpath.XPath.evaluate", new String[]{{expr}}));
line.append("}}\n");
try (FileWriter fw = new FileWriter(p, true)) {{
fw.write(line.toString());
}} catch (IOException e) {{
// best-effort
}}
}}
public static void main(String[] args) {{
String payload = System.getenv("NYX_PAYLOAD");
if (payload == null) payload = "";
String expr = "//user[@name='" + payload + "']";
int count = nyxXpathSelect(expr);
nyxXpathProbe(expr, count);
System.out.println("__NYX_SINK_HIT__");
StringBuilder body = new StringBuilder(64);
body.append("{{\"expr\":\"");
nyxJsonEscape(expr, body);
body.append("\",\"nodes_returned\":").append(count).append("}}");
System.out.println(body.toString());
}}
}}
"#
);
let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())];
HarnessSource {
source,
filename: "NyxHarness.java".to_owned(),
command: vec![
"java".to_owned(),
"-cp".to_owned(),
".".to_owned(),
"NyxHarness".to_owned(),
],
extra_files,
entry_subpath: None,
}
}
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
/// reading the entry file from disk. Exposed so test helpers can pin a
/// per-fixture shape without round-tripping through [`emit`].

View file

@ -442,6 +442,13 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result<HarnessSource, Un
return Ok(emit_ssti_harness(spec));
}
// Phase 07 (Track J.5): XPATH_INJECTION-sink short-circuit. The
// synthetic harness inlines a tiny XPath evaluator and counts
// matching nodes against the canonical staged document.
if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION {
return Ok(emit_xpath_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = JsShape::detect(spec, &entry_source);
let entry_subpath = entry_subpath_for_shape(shape, is_typescript);
@ -517,6 +524,92 @@ console.log(JSON.stringify({{ render: rendered }}));
}
}
/// Phase 07 — Track J.5 XPath-injection harness for Node
/// (`xpath` npm package's `select`).
///
/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='<payload>']`
/// expression, counts matching `<user>` nodes against the canonical
/// staged document, and writes a `ProbeKind::Xpath { nodes_returned }`
/// probe whose `n` is the count returned. Mirrors the synthetic-
/// harness pattern used by Phase 03 / 04 / 05 / 06.
pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME;
let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML;
let body = format!(
r#"// Nyx dynamic harness — XPATH_INJECTION xpath.select (Phase 07 / Track J.5).
{shim}
const NYX_XPATH_USERS = ['alice', 'bob', 'carol'];
function nyxXpathSelect(expr) {{
const needle = "//user[@name=";
if (!expr.startsWith(needle)) return 0;
const rest = expr.slice(needle.length);
if (!rest.endsWith("]")) return 0;
const predicate = rest.slice(0, -1);
let m = predicate.match(/^'([^']*)'(.*)$/);
if (m) {{
const literal = m[1];
const tail = m[2].trim();
if (tail === '' || tail === ']') {{
return NYX_XPATH_USERS.filter((u) => u === literal).length;
}}
if (/^or\s+/i.test(tail)) {{
return NYX_XPATH_USERS.length;
}}
}}
m = predicate.match(/^"([^"]*)"\s*$/);
if (m) {{
const literal = m[1];
return NYX_XPATH_USERS.filter((u) => u === literal).length;
}}
if (/^concat\(/i.test(predicate)) {{
const parts = [...predicate.matchAll(/'([^']*)'/g)].map((x) => x[1]);
let joined = parts.filter((p) => p !== ',"').join('');
joined = joined.split(",\"'\",").join("'");
return NYX_XPATH_USERS.filter((u) => u === joined).length;
}}
return NYX_XPATH_USERS.length;
}}
function nyxXpathProbe(expr, nodesReturned) {{
const p = process.env.NYX_PROBE_PATH;
if (!p) return;
const rec = {{
sink_callee: 'xpath.select',
args: [{{ kind: 'String', value: expr }}],
captured_at_ns: Number(process.hrtime.bigint()),
payload_id: process.env.NYX_PAYLOAD_ID || '',
kind: {{ kind: 'Xpath', nodes_returned: nodesReturned }},
witness: __nyx_witness('xpath.select', [expr]),
}};
try {{
require('fs').appendFileSync(p, JSON.stringify(rec) + '\n');
}} catch (e) {{
// best-effort
}}
}}
const payload = process.env.NYX_PAYLOAD || '';
const expr = "//user[@name='" + payload + "']";
const nodes = nyxXpathSelect(expr);
nyxXpathProbe(expr, nodes);
console.log('__NYX_SINK_HIT__');
console.log(JSON.stringify({{ expr: expr, nodes_returned: nodes }}));
"#
);
let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())];
HarnessSource {
source: body,
filename: "harness.js".to_owned(),
command: vec!["node".to_owned(), "harness.js".to_owned()],
extra_files,
entry_subpath: None,
}
}
/// Phase 26 — Node chain-step harness (shared between JS + TS emitters).
///
/// Splices the Node probe shim ([`probe_shim`]) in front of a minimal

View file

@ -428,6 +428,10 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION {
return Ok(emit_ldap_harness(spec));
}
// Phase 07 (Track J.5): XPATH_INJECTION-sink short-circuit.
if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION {
return Ok(emit_xpath_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = PhpShape::detect(spec, &entry_source);
@ -741,6 +745,130 @@ echo json_encode(['filter' => $filt, 'entries_returned' => $count]) . "\n";
}
}
/// Phase 07 — Track J.5 XPath-injection harness for PHP
/// (`DOMXPath::query`).
///
/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='<payload>']`
/// expression, evaluates the resulting expression against the
/// canonical XML staged in the workdir via
/// [`crate::dynamic::stubs::xpath_document`] (three `<user>`
/// records), and writes a `ProbeKind::Xpath { nodes_returned }`
/// probe whose `n` is the count the evaluator returned. Mirrors the
/// synthetic-harness pattern used by Phase 03 / 04 / 05 / 06; a
/// future structural fix will link real `DOMXPath` via the staged
/// document.
pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource {
let shim = probe_shim();
let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME;
let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML;
let body = format!(
r#"<?php
// Nyx dynamic harness — XPATH_INJECTION DOMXPath::query (Phase 07 / Track J.5).
{shim}
// Synthetic in-process XPath evaluator over the canonical staged
// document — counts <user> nodes that satisfy the `[@name='…']`
// predicate the host code synthesised from the payload. Real
// `DOMXPath::query` is not invoked (the harness ignores `_spec` and
// inlines the evaluator); the differential rule still holds because
// the vuln payload's `' or '1'='1` tail rewraps the selector into a
// match-everything shape.
$NYX_XPATH_USERS = ['alice', 'bob', 'carol'];
function _nyx_xpath_select($expr, array $users): int {{
// Recognise the canonical `//user[@name='<payload>']` shape the
// synthetic harness emits. Anything else falls through to "no
// match" so a malformed expression cannot accidentally confirm.
$needle = "//user[@name=";
if (strncmp($expr, $needle, strlen($needle)) !== 0) {{
return 0;
}}
$rest = substr($expr, strlen($needle));
if (!str_ends_with($rest, ']')) {{
return 0;
}}
$predicate = substr($rest, 0, strlen($rest) - 1);
if (preg_match("/^'([^']*)'(.*)\$/", $predicate, $m)) {{
// `name='alice'` → exact-match against the literal
// `name='alice' or '1'='1'` → OR-tail breakouts; presence of
// ` or ` after the closing quote means the selector is now
// tautological → every user matches.
$literal = $m[1];
$tail = trim($m[2]);
if ($tail === '' || $tail === ']') {{
$count = 0;
foreach ($users as $u) {{
if ($u === $literal) $count++;
}}
return $count;
}}
if (preg_match("/^or\\s+/i", $tail)) {{
return count($users);
}}
}}
if (preg_match('/^"([^"]*)"\\s*$/', $predicate, $m)) {{
$literal = $m[1];
$count = 0;
foreach ($users as $u) {{
if ($u === $literal) $count++;
}}
return $count;
}}
if (preg_match("/^concat\\(/i", $predicate)) {{
// `concat('a',\"'\",'b')` benign-escape path: extract the
// joined literal and match exactly once.
if (preg_match_all("/'([^']*)'/", $predicate, $parts)) {{
$joined = '';
foreach ($parts[1] as $p) {{
if ($p === ',"') continue;
$joined .= $p;
}}
// Normalise embedded single-quote literals back to the
// raw character so a `concat`-quoted username collapses
// to the same literal the user typed.
$joined = str_replace(",\"'\",", "'", $joined);
$count = 0;
foreach ($users as $u) {{
if ($u === $joined) $count++;
}}
return $count;
}}
}}
return count($users);
}}
function _nyx_xpath_probe(string $expr, int $nodes_returned): void {{
$p = getenv('NYX_PROBE_PATH');
if ($p === false || $p === '') return;
$rec = [
'sink_callee' => 'DOMXPath::query',
'args' => [['kind' => 'String', 'value' => $expr]],
'captured_at_ns' => (int) hrtime(true),
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
'kind' => ['kind' => 'Xpath', 'nodes_returned' => $nodes_returned],
'witness' => __nyx_witness('DOMXPath::query', [$expr]),
];
@file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND);
}}
$payload = (string) (getenv('NYX_PAYLOAD') ?: '');
$expr = "//user[@name='" . $payload . "']";
$nodes = _nyx_xpath_select($expr, $NYX_XPATH_USERS);
_nyx_xpath_probe($expr, $nodes);
echo "__NYX_SINK_HIT__\n";
echo json_encode(['expr' => $expr, 'nodes_returned' => $nodes]) . "\n";
"#
);
let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())];
HarnessSource {
source: body,
filename: "harness.php".to_owned(),
command: vec!["php".to_owned(), "harness.php".to_owned()],
extra_files,
entry_subpath: None,
}
}
fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String {
let entry_fn = &spec.entry_name;
let pre_call = build_pre_call(spec, shape);

View file

@ -624,11 +624,22 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
// [`crate::dynamic::stubs::ldap_server`] RFC-4515 subset against
// the same three provisioned users; the resulting count drives a
// `ProbeKind::Ldap` probe consumed by the
// `LdapResultCountGreaterThan` oracle.
// `QueryResultCountGreaterThan` oracle.
if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION {
return Ok(emit_ldap_harness(spec));
}
// Phase 07 (Track J.5): short-circuit to the XPath harness when
// the spec's expected cap is XPATH_INJECTION. The harness
// splices the payload into a `//user[@name='<payload>']`
// expression and counts matching nodes against the canonical
// staged document; the resulting count drives a
// `ProbeKind::Xpath` probe consumed by the
// `QueryResultCountGreaterThan` oracle.
if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION {
return Ok(emit_xpath_harness(spec));
}
let entry_source = read_entry_source(&spec.entry_file);
let shape = PythonShape::detect(spec, &entry_source);
let body = generate_for_shape(spec, shape);
@ -984,6 +995,96 @@ if __name__ == "__main__":
}
}
/// Phase 07 — Track J.5 XPath-injection harness for Python
/// (`lxml.etree.xpath`).
///
/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='<payload>']`
/// expression, counts matching `<user>` nodes against the canonical
/// staged document, and writes a `ProbeKind::Xpath { nodes_returned }`
/// probe whose `n` is the count returned. Mirrors the
/// synthetic-harness pattern used by Phase 03 / 04 / 05 / 06.
pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource {
let probe = probe_shim();
let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME;
let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML;
let body = format!(
r#"#!/usr/bin/env python3
"""Nyx dynamic harness — XPATH_INJECTION lxml.etree.xpath (Phase 07 / Track J.5)."""
import json
import os
import re
import sys
import time
{probe}
_NYX_XPATH_USERS = ["alice", "bob", "carol"]
def _nyx_xpath_select(expr):
needle = "//user[@name="
if not expr.startswith(needle):
return 0
rest = expr[len(needle):]
if not rest.endswith("]"):
return 0
predicate = rest[:-1]
m = re.match(r"^'([^']*)'(.*)$", predicate)
if m is not None:
literal = m.group(1)
tail = m.group(2).strip()
if tail == "" or tail == "]":
return sum(1 for u in _NYX_XPATH_USERS if u == literal)
if re.match(r"^or\s+", tail, re.IGNORECASE):
return len(_NYX_XPATH_USERS)
m = re.match(r'^"([^"]*)"\s*$', predicate)
if m is not None:
literal = m.group(1)
return sum(1 for u in _NYX_XPATH_USERS if u == literal)
if re.match(r"^concat\(", predicate, re.IGNORECASE):
parts = re.findall(r"'([^']*)'", predicate)
joined = "".join(p for p in parts if p not in (',"',))
joined = joined.replace(",\"'\",", "'")
return sum(1 for u in _NYX_XPATH_USERS if u == joined)
return len(_NYX_XPATH_USERS)
def _nyx_xpath_probe(expr, nodes_returned):
rec = {{
"sink_callee": "lxml.etree.xpath",
"args": [{{"kind": "String", "value": expr}}],
"captured_at_ns": time.time_ns(),
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
"kind": {{"kind": "Xpath", "nodes_returned": int(nodes_returned)}},
"witness": __nyx_witness("lxml.etree.xpath", [expr]),
}}
__nyx_emit(rec)
def _nyx_run():
payload = os.environ.get("NYX_PAYLOAD", "")
expr = "//user[@name='" + payload + "']"
nodes = _nyx_xpath_select(expr)
_nyx_xpath_probe(expr, nodes)
print("__NYX_SINK_HIT__", flush=True)
sys.stdout.write(json.dumps({{"expr": expr, "nodes_returned": nodes}}) + "\n")
sys.stdout.flush()
if __name__ == "__main__":
_nyx_run()
"#
);
let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())];
HarnessSource {
source: body,
filename: "harness.py".to_owned(),
command: vec!["python3".to_owned(), "harness.py".to_owned()],
extra_files,
entry_subpath: None,
}
}
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
/// reading the entry file from disk. Exposed so test helpers can pin a
/// per-fixture shape without round-tripping through [`emit`].