[pitboss] phase 05: Track J.3 + Track L.3 — XXE corpus + DocumentBuilder / lxml / libxml / SimpleXML adapters

This commit is contained in:
pitboss 2026-05-17 20:39:12 -05:00
parent 637b733928
commit 4de925c3ef
35 changed files with 1985 additions and 23 deletions

View file

@ -23,7 +23,7 @@
use std::collections::HashMap;
use std::sync::OnceLock;
use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, ssti, xss};
use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, ssti, xss, xxe};
use super::{CapCorpus, CuratedPayload, Oracle};
use crate::dynamic::oracle::ProbePredicate;
use crate::labels::Cap;
@ -44,7 +44,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits()
| Cap::XPATH_INJECTION.bits()
| Cap::HEADER_INJECTION.bits()
| Cap::OPEN_REDIRECT.bits()
| Cap::XXE.bits()
| Cap::PROTOTYPE_POLLUTION.bits();
/// Flat `(Cap, Lang, slice)` table. A single cap can carry per-language
@ -65,6 +64,11 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[
(Cap::SSTI, Lang::Php, ssti::php_twig::PAYLOADS),
(Cap::SSTI, Lang::Java, ssti::java_thymeleaf::PAYLOADS),
(Cap::SSTI, Lang::JavaScript, ssti::js_handlebars::PAYLOADS),
(Cap::XXE, Lang::Java, xxe::java::PAYLOADS),
(Cap::XXE, Lang::Python, xxe::python::PAYLOADS),
(Cap::XXE, Lang::Php, xxe::php::PAYLOADS),
(Cap::XXE, Lang::Ruby, xxe::ruby::PAYLOADS),
(Cap::XXE, Lang::Go, xxe::go::PAYLOADS),
];
/// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by
@ -273,6 +277,7 @@ mod tests {
assert!(!payloads_for(Cap::FMT_STRING).is_empty());
assert!(!payloads_for(Cap::DESERIALIZE).is_empty());
assert!(!payloads_for(Cap::SSTI).is_empty());
assert!(!payloads_for(Cap::XXE).is_empty());
}
#[test]
@ -289,7 +294,6 @@ mod tests {
Cap::XPATH_INJECTION,
Cap::HEADER_INJECTION,
Cap::OPEN_REDIRECT,
Cap::XXE,
Cap::PROTOTYPE_POLLUTION,
];
for cap in unsupported {
@ -320,6 +324,7 @@ mod tests {
Cap::FMT_STRING,
Cap::DESERIALIZE,
Cap::SSTI,
Cap::XXE,
] {
let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign);
assert!(has_vuln, "{cap:?} must have at least one vuln payload");
@ -368,6 +373,7 @@ mod tests {
Cap::FMT_STRING,
Cap::DESERIALIZE,
Cap::SSTI,
Cap::XXE,
];
for cap in caps {
for p in payloads_for(cap) {
@ -391,6 +397,7 @@ mod tests {
Cap::FMT_STRING,
Cap::DESERIALIZE,
Cap::SSTI,
Cap::XXE,
];
for cap in caps {
for p in payloads_for(cap) {
@ -501,6 +508,7 @@ mod tests {
Cap::FMT_STRING,
Cap::DESERIALIZE,
Cap::SSTI,
Cap::XXE,
];
for cap in caps {
for p in payloads_for(cap).iter().filter(|p| p.is_benign) {
@ -629,6 +637,46 @@ mod tests {
}
}
#[test]
fn xxe_has_per_lang_slices_for_phase_05() {
// Phase 05 (Track J.3) acceptance: XXE registers payloads in
// Java / Python / PHP / Ruby / Go and the lang-aware lookup
// never returns empty for any of them.
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
assert!(
!payloads_for_lang(Cap::XXE, lang).is_empty(),
"XXE must have at least one payload for {lang:?}",
);
}
// Rust / C / Cpp / JS / TS not yet covered.
for lang in [
Lang::Rust,
Lang::C,
Lang::Cpp,
Lang::JavaScript,
Lang::TypeScript,
] {
assert!(
payloads_for_lang(Cap::XXE, lang).is_empty(),
"XXE has unexpected payloads for {lang:?}",
);
}
}
#[test]
fn xxe_payloads_pair_benign_controls_per_lang() {
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
let slice = payloads_for_lang(Cap::XXE, lang);
let vuln = slice
.iter()
.find(|p| !p.is_benign)
.expect("each lang must have an XXE vuln payload");
let resolved = super::resolve_benign_control_lang(vuln, Cap::XXE, lang)
.expect("lang-aware benign control must resolve");
assert!(resolved.is_benign);
}
}
#[test]
fn deserialize_payloads_pair_benign_controls_per_lang() {
// The lang-aware resolver must find the paired benign control

View file

@ -0,0 +1,66 @@
//! Go `Cap::XXE` payloads — `encoding/xml.Decoder` with `Strict: false`.
//!
//! Vuln payload: an XML document declaring an external entity that
//! the harness's instrumented `xml.Decoder` (running non-strict so
//! the doctype is parsed at all) expands inside `<data>`; the shim
//! writes `ProbeKind::Xxe { entity_expanded: true }` once it sees the
//! entity body substitute into the decoded element value.
//!
//! Benign control: a well-formed XML document with no doctype, so the
//! decoder has no entity to resolve and the shim writes
//! `entity_expanded: false`.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [
<!ENTITY xxe SYSTEM "file:///etc/hostname">
]>
<data>&xxe;</data>"#,
label: "xxe-go-doctype-entity",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/go/vuln.go",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
benign_control: Some(PayloadRef {
label: "xxe-go-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<data>hello</data>"#,
label: "xxe-go-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/go/benign.go",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,67 @@
//! Java `Cap::XXE` payloads — `DocumentBuilderFactory` / `SAXParser`.
//!
//! Vuln payload: an XML document declaring an external entity that
//! the harness's instrumented `DocumentBuilder.parse` resolves and
//! substitutes inside `<data>` — the parser writes a
//! `ProbeKind::Xxe { entity_expanded: true }` record once it sees the
//! entity body materialise.
//!
//! Benign control: a well-formed XML document with no doctype
//! declaration so the parser has no entity to resolve. The harness's
//! instrumented parser writes `entity_expanded: false`, the oracle
//! does not fire, and the differential rule (§4.1) stays clean.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [
<!ENTITY xxe SYSTEM "file:///etc/hostname">
]>
<data>&xxe;</data>"#,
label: "xxe-java-doctype-entity",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/java/vuln.java",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
benign_control: Some(PayloadRef {
label: "xxe-java-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<data>hello</data>"#,
label: "xxe-java-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/java/benign.java",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,24 @@
//! XML External Entity expansion (`Cap::XXE`) per-language payload slices.
//!
//! Phase 05 (Track J.3) carves XXE across the five most-common XML
//! parser stacks: Java (`DocumentBuilderFactory`), Python
//! (`lxml.etree.XMLParser`), PHP (`simplexml_load_string` under
//! `libxml_disable_entity_loader(false)`), Ruby (REXML / Nokogiri), and
//! Go (`encoding/xml.Decoder`). Every vuln payload ships an XML
//! document declaring an external entity (`<!ENTITY xxe SYSTEM "…">`)
//! that the engine expands inside an element body. The paired benign
//! control omits the doctype + entity so the parser has nothing to
//! resolve; the oracle's
//! [`crate::dynamic::oracle::ProbePredicate::XxeEntityExpanded`] check
//! satisfies on the vuln run (`entity_expanded: true`) and stays clear
//! on the benign run, fulfilling the §4.1 differential rule.
//!
//! C# is intentionally omitted: the [`crate::symbol::Lang`] enum has
//! no `CSharp` variant, so the corpus has nowhere to register it.
//! Tracked in `.pitboss/play/deferred.md`.
pub mod go;
pub mod java;
pub mod php;
pub mod python;
pub mod ruby;

View file

@ -0,0 +1,66 @@
//! PHP `Cap::XXE` payloads — `simplexml_load_string` under
//! `libxml_disable_entity_loader(false)`.
//!
//! Vuln payload: an XML document declaring an external entity that
//! the harness's instrumented parser expands inside `<data>`; the
//! shim writes `ProbeKind::Xxe { entity_expanded: true }` once it
//! sees the entity body substitute into the parsed output.
//!
//! Benign control: a well-formed XML document with no doctype, so
//! the parser has no entity to resolve and the shim writes
//! `entity_expanded: false`.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [
<!ENTITY xxe SYSTEM "file:///etc/hostname">
]>
<data>&xxe;</data>"#,
label: "xxe-php-doctype-entity",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/php/vuln.php",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
benign_control: Some(PayloadRef {
label: "xxe-php-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<data>hello</data>"#,
label: "xxe-php-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/php/benign.php",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,66 @@
//! Python `Cap::XXE` payloads — `lxml.etree.XMLParser(resolve_entities=True)`.
//!
//! Vuln payload: an XML document declaring an external entity that
//! the harness's instrumented parser (`resolve_entities=True`)
//! expands inside `<data>`; the shim writes
//! `ProbeKind::Xxe { entity_expanded: true }` once it sees the entity
//! body substitute into the parsed tree.
//!
//! Benign control: a well-formed XML document with no doctype, so the
//! parser has nothing to resolve and the shim writes
//! `entity_expanded: false`.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [
<!ENTITY xxe SYSTEM "file:///etc/hostname">
]>
<data>&xxe;</data>"#,
label: "xxe-python-doctype-entity",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/python/vuln.py",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
benign_control: Some(PayloadRef {
label: "xxe-python-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<data>hello</data>"#,
label: "xxe-python-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/python/benign.py",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];

View file

@ -0,0 +1,65 @@
//! Ruby `Cap::XXE` payloads — REXML / Nokogiri document parsers.
//!
//! Vuln payload: an XML document declaring an external entity that
//! the harness's instrumented parser expands inside `<data>`; the
//! shim writes `ProbeKind::Xxe { entity_expanded: true }` once it
//! sees the entity body substitute into the parsed output.
//!
//! Benign control: a well-formed XML document with no doctype, so
//! the parser has no entity to resolve and the shim writes
//! `entity_expanded: false`.
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
use crate::dynamic::oracle::ProbePredicate;
pub const PAYLOADS: &[CuratedPayload] = &[
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<!DOCTYPE data [
<!ENTITY xxe SYSTEM "file:///etc/hostname">
]>
<data>&xxe;</data>"#,
label: "xxe-ruby-doctype-entity",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: false,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
],
oob_nonce_slot: false,
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
benign_control: Some(PayloadRef {
label: "xxe-ruby-benign",
}),
no_benign_control_rationale: None,
},
CuratedPayload {
bytes: br#"<?xml version="1.0"?>
<data>hello</data>"#,
label: "xxe-ruby-benign",
oracle: Oracle::SinkProbe {
predicates: &[ProbePredicate::XxeEntityExpanded {
require_expanded: true,
}],
},
is_benign: true,
provenance: PayloadProvenance::Curated,
since_corpus_version: 9,
deprecated_at_corpus_version: None,
fixture_paths: &[
"tests/dynamic_fixtures/xxe/ruby/benign.rb",
],
oob_nonce_slot: false,
probe_predicates: &[],
benign_control: None,
no_benign_control_rationale: None,
},
];