mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss] phase 05: Track J.3 + Track L.3 — XXE corpus + DocumentBuilder / lxml / libxml / SimpleXML adapters
This commit is contained in:
parent
637b733928
commit
4de925c3ef
35 changed files with 1985 additions and 23 deletions
|
|
@ -55,6 +55,7 @@ mod sqli;
|
|||
mod ssrf;
|
||||
mod ssti;
|
||||
mod xss;
|
||||
mod xxe;
|
||||
|
||||
pub use registry::{
|
||||
audit_marker_collisions, benign_payload_for, benign_payload_for_lang, materialise_bytes,
|
||||
|
|
@ -86,7 +87,8 @@ pub use crate::dynamic::oracle::Oracle;
|
|||
/// | 6 | 2026-05-17 | Phase 02 / Track J.0: `(Cap, Lang)` registry refactor; `no_benign_control_rationale` field; compile-time provenance audit |
|
||||
/// | 7 | 2026-05-17 | Phase 03 / Track J.1: `DESERIALIZE` cap lit for Java / Python / PHP / Ruby; `ProbeKind::Deserialize` + `ProbePredicate::DeserializeGadgetInvoked` |
|
||||
/// | 8 | 2026-05-17 | Phase 04 / Track J.2: `SSTI` cap lit for Jinja2 / ERB / Twig / Thymeleaf / Handlebars; `ProbePredicate::TemplateEvalEqual` |
|
||||
pub const CORPUS_VERSION: u32 = 8;
|
||||
/// | 9 | 2026-05-17 | Phase 05 / Track J.3: `XXE` cap lit for Java / Python / PHP / Ruby / Go; `ProbeKind::Xxe` + `ProbePredicate::XxeEntityExpanded` |
|
||||
pub const CORPUS_VERSION: u32 = 9;
|
||||
|
||||
/// Where a payload originated.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
use std::collections::HashMap;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, ssti, xss};
|
||||
use super::{cmdi, deserialize, fmt_string, path_trav, sqli, ssrf, ssti, xss, xxe};
|
||||
use super::{CapCorpus, CuratedPayload, Oracle};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
use crate::labels::Cap;
|
||||
|
|
@ -44,7 +44,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits()
|
|||
| Cap::XPATH_INJECTION.bits()
|
||||
| Cap::HEADER_INJECTION.bits()
|
||||
| Cap::OPEN_REDIRECT.bits()
|
||||
| Cap::XXE.bits()
|
||||
| Cap::PROTOTYPE_POLLUTION.bits();
|
||||
|
||||
/// Flat `(Cap, Lang, slice)` table. A single cap can carry per-language
|
||||
|
|
@ -65,6 +64,11 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[
|
|||
(Cap::SSTI, Lang::Php, ssti::php_twig::PAYLOADS),
|
||||
(Cap::SSTI, Lang::Java, ssti::java_thymeleaf::PAYLOADS),
|
||||
(Cap::SSTI, Lang::JavaScript, ssti::js_handlebars::PAYLOADS),
|
||||
(Cap::XXE, Lang::Java, xxe::java::PAYLOADS),
|
||||
(Cap::XXE, Lang::Python, xxe::python::PAYLOADS),
|
||||
(Cap::XXE, Lang::Php, xxe::php::PAYLOADS),
|
||||
(Cap::XXE, Lang::Ruby, xxe::ruby::PAYLOADS),
|
||||
(Cap::XXE, Lang::Go, xxe::go::PAYLOADS),
|
||||
];
|
||||
|
||||
/// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by
|
||||
|
|
@ -273,6 +277,7 @@ mod tests {
|
|||
assert!(!payloads_for(Cap::FMT_STRING).is_empty());
|
||||
assert!(!payloads_for(Cap::DESERIALIZE).is_empty());
|
||||
assert!(!payloads_for(Cap::SSTI).is_empty());
|
||||
assert!(!payloads_for(Cap::XXE).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -289,7 +294,6 @@ mod tests {
|
|||
Cap::XPATH_INJECTION,
|
||||
Cap::HEADER_INJECTION,
|
||||
Cap::OPEN_REDIRECT,
|
||||
Cap::XXE,
|
||||
Cap::PROTOTYPE_POLLUTION,
|
||||
];
|
||||
for cap in unsupported {
|
||||
|
|
@ -320,6 +324,7 @@ mod tests {
|
|||
Cap::FMT_STRING,
|
||||
Cap::DESERIALIZE,
|
||||
Cap::SSTI,
|
||||
Cap::XXE,
|
||||
] {
|
||||
let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign);
|
||||
assert!(has_vuln, "{cap:?} must have at least one vuln payload");
|
||||
|
|
@ -368,6 +373,7 @@ mod tests {
|
|||
Cap::FMT_STRING,
|
||||
Cap::DESERIALIZE,
|
||||
Cap::SSTI,
|
||||
Cap::XXE,
|
||||
];
|
||||
for cap in caps {
|
||||
for p in payloads_for(cap) {
|
||||
|
|
@ -391,6 +397,7 @@ mod tests {
|
|||
Cap::FMT_STRING,
|
||||
Cap::DESERIALIZE,
|
||||
Cap::SSTI,
|
||||
Cap::XXE,
|
||||
];
|
||||
for cap in caps {
|
||||
for p in payloads_for(cap) {
|
||||
|
|
@ -501,6 +508,7 @@ mod tests {
|
|||
Cap::FMT_STRING,
|
||||
Cap::DESERIALIZE,
|
||||
Cap::SSTI,
|
||||
Cap::XXE,
|
||||
];
|
||||
for cap in caps {
|
||||
for p in payloads_for(cap).iter().filter(|p| p.is_benign) {
|
||||
|
|
@ -629,6 +637,46 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xxe_has_per_lang_slices_for_phase_05() {
|
||||
// Phase 05 (Track J.3) acceptance: XXE registers payloads in
|
||||
// Java / Python / PHP / Ruby / Go and the lang-aware lookup
|
||||
// never returns empty for any of them.
|
||||
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
|
||||
assert!(
|
||||
!payloads_for_lang(Cap::XXE, lang).is_empty(),
|
||||
"XXE must have at least one payload for {lang:?}",
|
||||
);
|
||||
}
|
||||
// Rust / C / Cpp / JS / TS not yet covered.
|
||||
for lang in [
|
||||
Lang::Rust,
|
||||
Lang::C,
|
||||
Lang::Cpp,
|
||||
Lang::JavaScript,
|
||||
Lang::TypeScript,
|
||||
] {
|
||||
assert!(
|
||||
payloads_for_lang(Cap::XXE, lang).is_empty(),
|
||||
"XXE has unexpected payloads for {lang:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xxe_payloads_pair_benign_controls_per_lang() {
|
||||
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go] {
|
||||
let slice = payloads_for_lang(Cap::XXE, lang);
|
||||
let vuln = slice
|
||||
.iter()
|
||||
.find(|p| !p.is_benign)
|
||||
.expect("each lang must have an XXE vuln payload");
|
||||
let resolved = super::resolve_benign_control_lang(vuln, Cap::XXE, lang)
|
||||
.expect("lang-aware benign control must resolve");
|
||||
assert!(resolved.is_benign);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deserialize_payloads_pair_benign_controls_per_lang() {
|
||||
// The lang-aware resolver must find the paired benign control
|
||||
|
|
|
|||
66
src/dynamic/corpus/xxe/go.rs
Normal file
66
src/dynamic/corpus/xxe/go.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
//! Go `Cap::XXE` payloads — `encoding/xml.Decoder` with `Strict: false`.
|
||||
//!
|
||||
//! Vuln payload: an XML document declaring an external entity that
|
||||
//! the harness's instrumented `xml.Decoder` (running non-strict so
|
||||
//! the doctype is parsed at all) expands inside `<data>`; the shim
|
||||
//! writes `ProbeKind::Xxe { entity_expanded: true }` once it sees the
|
||||
//! entity body substitute into the decoded element value.
|
||||
//!
|
||||
//! Benign control: a well-formed XML document with no doctype, so the
|
||||
//! decoder has no entity to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
<!ENTITY xxe SYSTEM "file:///etc/hostname">
|
||||
]>
|
||||
<data>&xxe;</data>"#,
|
||||
label: "xxe-go-doctype-entity",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/go/vuln.go",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "xxe-go-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<data>hello</data>"#,
|
||||
label: "xxe-go-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/go/benign.go",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
67
src/dynamic/corpus/xxe/java.rs
Normal file
67
src/dynamic/corpus/xxe/java.rs
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
//! Java `Cap::XXE` payloads — `DocumentBuilderFactory` / `SAXParser`.
|
||||
//!
|
||||
//! Vuln payload: an XML document declaring an external entity that
|
||||
//! the harness's instrumented `DocumentBuilder.parse` resolves and
|
||||
//! substitutes inside `<data>` — the parser writes a
|
||||
//! `ProbeKind::Xxe { entity_expanded: true }` record once it sees the
|
||||
//! entity body materialise.
|
||||
//!
|
||||
//! Benign control: a well-formed XML document with no doctype
|
||||
//! declaration so the parser has no entity to resolve. The harness's
|
||||
//! instrumented parser writes `entity_expanded: false`, the oracle
|
||||
//! does not fire, and the differential rule (§4.1) stays clean.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
<!ENTITY xxe SYSTEM "file:///etc/hostname">
|
||||
]>
|
||||
<data>&xxe;</data>"#,
|
||||
label: "xxe-java-doctype-entity",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/java/vuln.java",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "xxe-java-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<data>hello</data>"#,
|
||||
label: "xxe-java-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/java/benign.java",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
24
src/dynamic/corpus/xxe/mod.rs
Normal file
24
src/dynamic/corpus/xxe/mod.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
//! XML External Entity expansion (`Cap::XXE`) per-language payload slices.
|
||||
//!
|
||||
//! Phase 05 (Track J.3) carves XXE across the five most-common XML
|
||||
//! parser stacks: Java (`DocumentBuilderFactory`), Python
|
||||
//! (`lxml.etree.XMLParser`), PHP (`simplexml_load_string` under
|
||||
//! `libxml_disable_entity_loader(false)`), Ruby (REXML / Nokogiri), and
|
||||
//! Go (`encoding/xml.Decoder`). Every vuln payload ships an XML
|
||||
//! document declaring an external entity (`<!ENTITY xxe SYSTEM "…">`)
|
||||
//! that the engine expands inside an element body. The paired benign
|
||||
//! control omits the doctype + entity so the parser has nothing to
|
||||
//! resolve; the oracle's
|
||||
//! [`crate::dynamic::oracle::ProbePredicate::XxeEntityExpanded`] check
|
||||
//! satisfies on the vuln run (`entity_expanded: true`) and stays clear
|
||||
//! on the benign run, fulfilling the §4.1 differential rule.
|
||||
//!
|
||||
//! C# is intentionally omitted: the [`crate::symbol::Lang`] enum has
|
||||
//! no `CSharp` variant, so the corpus has nowhere to register it.
|
||||
//! Tracked in `.pitboss/play/deferred.md`.
|
||||
|
||||
pub mod go;
|
||||
pub mod java;
|
||||
pub mod php;
|
||||
pub mod python;
|
||||
pub mod ruby;
|
||||
66
src/dynamic/corpus/xxe/php.rs
Normal file
66
src/dynamic/corpus/xxe/php.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
//! PHP `Cap::XXE` payloads — `simplexml_load_string` under
|
||||
//! `libxml_disable_entity_loader(false)`.
|
||||
//!
|
||||
//! Vuln payload: an XML document declaring an external entity that
|
||||
//! the harness's instrumented parser expands inside `<data>`; the
|
||||
//! shim writes `ProbeKind::Xxe { entity_expanded: true }` once it
|
||||
//! sees the entity body substitute into the parsed output.
|
||||
//!
|
||||
//! Benign control: a well-formed XML document with no doctype, so
|
||||
//! the parser has no entity to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
<!ENTITY xxe SYSTEM "file:///etc/hostname">
|
||||
]>
|
||||
<data>&xxe;</data>"#,
|
||||
label: "xxe-php-doctype-entity",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/php/vuln.php",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "xxe-php-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<data>hello</data>"#,
|
||||
label: "xxe-php-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/php/benign.php",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
66
src/dynamic/corpus/xxe/python.rs
Normal file
66
src/dynamic/corpus/xxe/python.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
//! Python `Cap::XXE` payloads — `lxml.etree.XMLParser(resolve_entities=True)`.
|
||||
//!
|
||||
//! Vuln payload: an XML document declaring an external entity that
|
||||
//! the harness's instrumented parser (`resolve_entities=True`)
|
||||
//! expands inside `<data>`; the shim writes
|
||||
//! `ProbeKind::Xxe { entity_expanded: true }` once it sees the entity
|
||||
//! body substitute into the parsed tree.
|
||||
//!
|
||||
//! Benign control: a well-formed XML document with no doctype, so the
|
||||
//! parser has nothing to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
<!ENTITY xxe SYSTEM "file:///etc/hostname">
|
||||
]>
|
||||
<data>&xxe;</data>"#,
|
||||
label: "xxe-python-doctype-entity",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/python/vuln.py",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "xxe-python-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<data>hello</data>"#,
|
||||
label: "xxe-python-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/python/benign.py",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
65
src/dynamic/corpus/xxe/ruby.rs
Normal file
65
src/dynamic/corpus/xxe/ruby.rs
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
//! Ruby `Cap::XXE` payloads — REXML / Nokogiri document parsers.
|
||||
//!
|
||||
//! Vuln payload: an XML document declaring an external entity that
|
||||
//! the harness's instrumented parser expands inside `<data>`; the
|
||||
//! shim writes `ProbeKind::Xxe { entity_expanded: true }` once it
|
||||
//! sees the entity body substitute into the parsed output.
|
||||
//!
|
||||
//! Benign control: a well-formed XML document with no doctype, so
|
||||
//! the parser has no entity to resolve and the shim writes
|
||||
//! `entity_expanded: false`.
|
||||
|
||||
use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef};
|
||||
use crate::dynamic::oracle::ProbePredicate;
|
||||
|
||||
pub const PAYLOADS: &[CuratedPayload] = &[
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<!DOCTYPE data [
|
||||
<!ENTITY xxe SYSTEM "file:///etc/hostname">
|
||||
]>
|
||||
<data>&xxe;</data>"#,
|
||||
label: "xxe-ruby-doctype-entity",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: false,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
benign_control: Some(PayloadRef {
|
||||
label: "xxe-ruby-benign",
|
||||
}),
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
CuratedPayload {
|
||||
bytes: br#"<?xml version="1.0"?>
|
||||
<data>hello</data>"#,
|
||||
label: "xxe-ruby-benign",
|
||||
oracle: Oracle::SinkProbe {
|
||||
predicates: &[ProbePredicate::XxeEntityExpanded {
|
||||
require_expanded: true,
|
||||
}],
|
||||
},
|
||||
is_benign: true,
|
||||
provenance: PayloadProvenance::Curated,
|
||||
since_corpus_version: 9,
|
||||
deprecated_at_corpus_version: None,
|
||||
fixture_paths: &[
|
||||
"tests/dynamic_fixtures/xxe/ruby/benign.rb",
|
||||
],
|
||||
oob_nonce_slot: false,
|
||||
probe_predicates: &[],
|
||||
benign_control: None,
|
||||
no_benign_control_rationale: None,
|
||||
},
|
||||
];
|
||||
|
|
@ -20,6 +20,11 @@ pub mod python_jinja2;
|
|||
pub mod python_pickle;
|
||||
pub mod ruby_erb;
|
||||
pub mod ruby_marshal;
|
||||
pub mod xxe_go;
|
||||
pub mod xxe_java;
|
||||
pub mod xxe_php;
|
||||
pub mod xxe_python;
|
||||
pub mod xxe_ruby;
|
||||
|
||||
pub use java_deserialize::JavaDeserializeAdapter;
|
||||
pub use java_thymeleaf::JavaThymeleafAdapter;
|
||||
|
|
@ -30,6 +35,11 @@ pub use python_jinja2::PythonJinja2Adapter;
|
|||
pub use python_pickle::PythonPickleAdapter;
|
||||
pub use ruby_erb::RubyErbAdapter;
|
||||
pub use ruby_marshal::RubyMarshalAdapter;
|
||||
pub use xxe_go::XxeGoAdapter;
|
||||
pub use xxe_java::XxeJavaAdapter;
|
||||
pub use xxe_php::XxePhpAdapter;
|
||||
pub use xxe_python::XxePythonAdapter;
|
||||
pub use xxe_ruby::XxeRubyAdapter;
|
||||
|
||||
/// True when any callee in `summary.callees` matches `predicate`.
|
||||
fn any_callee_matches(
|
||||
|
|
|
|||
113
src/dynamic/framework/adapters/xxe_go.rs
Normal file
113
src/dynamic/framework/adapters/xxe_go.rs
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
//! Go [`super::super::FrameworkAdapter`] matching XXE-prone
|
||||
//! `encoding/xml` parser constructions.
|
||||
//!
|
||||
//! Phase 05 (Track J.3). Fires when the function body invokes one of
|
||||
//! the canonical `encoding/xml` entry points (`xml.NewDecoder`,
|
||||
//! `xml.Unmarshal`, `Decoder.Decode`) and the surrounding source
|
||||
//! mentions the `encoding/xml` import — the brief specifically calls
|
||||
//! out `xml.Decoder` with `Strict: false` as the XXE-prone shape.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct XxeGoAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "xxe-go";
|
||||
|
||||
fn callee_is_xml_parser(name: &str) -> bool {
|
||||
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
||||
matches!(
|
||||
last,
|
||||
"NewDecoder" | "Unmarshal" | "Decode" | "DecodeElement"
|
||||
)
|
||||
}
|
||||
|
||||
fn source_imports_xml(file_bytes: &[u8]) -> bool {
|
||||
const NEEDLES: &[&[u8]] = &[
|
||||
b"encoding/xml",
|
||||
b"xml.NewDecoder",
|
||||
b"xml.Unmarshal",
|
||||
b"xml.Decoder",
|
||||
];
|
||||
NEEDLES
|
||||
.iter()
|
||||
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for XxeGoAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Go
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
|
||||
let matches_source = source_imports_xml(file_bytes);
|
||||
if matches_call && matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_go(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_on_xml_new_decoder() {
|
||||
let src: &[u8] = b"package main\nimport (\"bytes\"; \"encoding/xml\")\n\
|
||||
func Run(body string) {\n\
|
||||
d := xml.NewDecoder(bytes.NewReader([]byte(body)))\n\
|
||||
d.Strict = false\n\
|
||||
_ = d.Decode(&struct{}{})\n\
|
||||
}\n";
|
||||
let tree = parse_go(src);
|
||||
let summary = FuncSummary {
|
||||
name: "Run".into(),
|
||||
callees: vec![crate::summary::CalleeSite::bare("NewDecoder")],
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxeGoAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"package main\nfunc Add(a, b int) int { return a + b }\n";
|
||||
let tree = parse_go(src);
|
||||
let summary = FuncSummary {
|
||||
name: "Add".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxeGoAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
139
src/dynamic/framework/adapters/xxe_java.rs
Normal file
139
src/dynamic/framework/adapters/xxe_java.rs
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
//! Java [`super::super::FrameworkAdapter`] matching XXE-prone XML parser
|
||||
//! constructions.
|
||||
//!
|
||||
//! Phase 05 (Track J.3). Fires when the function body invokes a
|
||||
//! `DocumentBuilder.parse` / `SAXParser.parse` / `XMLInputFactory`
|
||||
//! call site and the surrounding source pulls in one of the
|
||||
//! `javax.xml.parsers` / `org.w3c.dom` / `org.xml.sax` packages —
|
||||
//! i.e. an XML parser that, by default and without
|
||||
//! `disallow-doctype-decl`, expands external entities.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct XxeJavaAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "xxe-java";
|
||||
|
||||
fn callee_is_xml_parse(name: &str) -> bool {
|
||||
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
||||
matches!(
|
||||
last,
|
||||
"parse"
|
||||
| "newDocumentBuilder"
|
||||
| "newSAXParser"
|
||||
| "createXMLEventReader"
|
||||
| "createXMLStreamReader"
|
||||
| "newInstance"
|
||||
)
|
||||
}
|
||||
|
||||
fn source_imports_xml_parser(file_bytes: &[u8]) -> bool {
|
||||
const NEEDLES: &[&[u8]] = &[
|
||||
b"javax.xml.parsers",
|
||||
b"DocumentBuilderFactory",
|
||||
b"DocumentBuilder",
|
||||
b"SAXParserFactory",
|
||||
b"XMLInputFactory",
|
||||
b"org.xml.sax",
|
||||
b"org.w3c.dom",
|
||||
];
|
||||
NEEDLES
|
||||
.iter()
|
||||
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for XxeJavaAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Java
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_xml_parse);
|
||||
let matches_source = source_imports_xml_parser(file_bytes);
|
||||
if matches_call && matches_source {
|
||||
return Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
});
|
||||
}
|
||||
// Fall-back: source clearly imports the XXE-prone parser even
|
||||
// when the call-graph summary did not capture the parse call.
|
||||
if matches_source
|
||||
&& file_bytes
|
||||
.windows(b".parse(".len())
|
||||
.any(|w| w == b".parse(")
|
||||
{
|
||||
return Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
});
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_on_document_builder_parse() {
|
||||
let src: &[u8] = b"import javax.xml.parsers.DocumentBuilderFactory;\n\
|
||||
public class V {\n public static void run(byte[] b) throws Exception {\n\
|
||||
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();\n\
|
||||
f.newDocumentBuilder().parse(new java.io.ByteArrayInputStream(b));\n\
|
||||
}\n}\n";
|
||||
let tree = parse_java(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
callees: vec![crate::summary::CalleeSite::bare("parse")],
|
||||
..Default::default()
|
||||
};
|
||||
let binding = XxeJavaAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.expect("must fire on DocumentBuilder.parse fixture");
|
||||
assert_eq!(binding.adapter, ADAPTER_NAME);
|
||||
assert_eq!(binding.kind, EntryKind::Function);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] =
|
||||
b"public class V { public static void run(String b) { System.out.println(b); } }\n";
|
||||
let tree = parse_java(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxeJavaAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
120
src/dynamic/framework/adapters/xxe_php.rs
Normal file
120
src/dynamic/framework/adapters/xxe_php.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
//! PHP [`super::super::FrameworkAdapter`] matching XXE-prone XML
|
||||
//! parser constructions.
|
||||
//!
|
||||
//! Phase 05 (Track J.3). Fires when the function body invokes one of
|
||||
//! the canonical PHP XML entry points (`simplexml_load_string`,
|
||||
//! `simplexml_load_file`, `DOMDocument::loadXML`,
|
||||
//! `DOMDocument::load`, `xml_parser_create`) and the surrounding
|
||||
//! source mentions an XML / libxml symbol — the parser, by default
|
||||
//! and under `libxml_disable_entity_loader(false)`, expands external
|
||||
//! entities.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct XxePhpAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "xxe-php";
|
||||
|
||||
fn callee_is_xml_parser(name: &str) -> bool {
|
||||
let last = name.rsplit_once("::").map(|(_, s)| s)
|
||||
.or_else(|| name.rsplit_once('.').map(|(_, s)| s))
|
||||
.or_else(|| name.rsplit_once("->").map(|(_, s)| s))
|
||||
.unwrap_or(name);
|
||||
matches!(
|
||||
last,
|
||||
"simplexml_load_string"
|
||||
| "simplexml_load_file"
|
||||
| "loadXML"
|
||||
| "load"
|
||||
| "xml_parser_create"
|
||||
| "xml_parse"
|
||||
)
|
||||
}
|
||||
|
||||
fn source_imports_xml(file_bytes: &[u8]) -> bool {
|
||||
const NEEDLES: &[&[u8]] = &[
|
||||
b"simplexml_load_string",
|
||||
b"simplexml_load_file",
|
||||
b"DOMDocument",
|
||||
b"xml_parser_create",
|
||||
b"libxml_disable_entity_loader",
|
||||
b"LIBXML_NOENT",
|
||||
];
|
||||
NEEDLES
|
||||
.iter()
|
||||
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for XxePhpAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Php
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
|
||||
let matches_source = source_imports_xml(file_bytes);
|
||||
if matches_call || matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_php(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_on_simplexml_load_string() {
|
||||
let src: &[u8] = b"<?php\nfunction run($body) {\n return simplexml_load_string($body);\n}\n";
|
||||
let tree = parse_php(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
callees: vec![crate::summary::CalleeSite::bare("simplexml_load_string")],
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxePhpAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"<?php\nfunction add($a, $b) { return $a + $b; }\n";
|
||||
let tree = parse_php(src);
|
||||
let summary = FuncSummary {
|
||||
name: "add".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxePhpAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
120
src/dynamic/framework/adapters/xxe_python.rs
Normal file
120
src/dynamic/framework/adapters/xxe_python.rs
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
//! Python [`super::super::FrameworkAdapter`] matching XXE-prone XML
|
||||
//! parser constructions.
|
||||
//!
|
||||
//! Phase 05 (Track J.3). Fires when the function body invokes one of
|
||||
//! the canonical lxml / stdlib XML entry points
|
||||
//! (`lxml.etree.XMLParser`, `lxml.etree.parse`, `lxml.etree.fromstring`,
|
||||
//! `xml.etree.ElementTree.parse`, `xml.sax.parse`,
|
||||
//! `xml.dom.minidom.parseString`) and the surrounding source mentions
|
||||
//! the matching module. Callee matching is last-segment-aware so
|
||||
//! receiver-prefixed calls (`etree.XMLParser`,
|
||||
//! `ElementTree.fromstring`) hit the same predicate.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct XxePythonAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "xxe-python";
|
||||
|
||||
fn callee_is_xml_parser(name: &str) -> bool {
|
||||
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
||||
matches!(
|
||||
last,
|
||||
"XMLParser"
|
||||
| "parse"
|
||||
| "fromstring"
|
||||
| "parseString"
|
||||
| "XMLPullParser"
|
||||
| "iterparse"
|
||||
)
|
||||
}
|
||||
|
||||
fn source_imports_xml(file_bytes: &[u8]) -> bool {
|
||||
const NEEDLES: &[&[u8]] = &[
|
||||
b"lxml.etree",
|
||||
b"lxml import",
|
||||
b"xml.etree",
|
||||
b"ElementTree",
|
||||
b"xml.sax",
|
||||
b"xml.dom",
|
||||
b"defusedxml",
|
||||
];
|
||||
NEEDLES
|
||||
.iter()
|
||||
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for XxePythonAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Python
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
|
||||
let matches_source = source_imports_xml(file_bytes);
|
||||
if matches_call && matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_on_lxml_etree_fromstring() {
|
||||
let src: &[u8] = b"from lxml import etree\n\
|
||||
def run(body):\n return etree.fromstring(body)\n";
|
||||
let tree = parse_python(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
callees: vec![crate::summary::CalleeSite::bare("fromstring")],
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxePythonAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"def add(a, b):\n return a + b\n";
|
||||
let tree = parse_python(src);
|
||||
let summary = FuncSummary {
|
||||
name: "add".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxePythonAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
109
src/dynamic/framework/adapters/xxe_ruby.rs
Normal file
109
src/dynamic/framework/adapters/xxe_ruby.rs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
//! Ruby [`super::super::FrameworkAdapter`] matching XXE-prone XML
|
||||
//! parser constructions.
|
||||
//!
|
||||
//! Phase 05 (Track J.3). Fires when the function body invokes one of
|
||||
//! the canonical Ruby XML entry points
|
||||
//! (`REXML::Document.new`, `Nokogiri::XML`, `Nokogiri::XML::Document.parse`,
|
||||
//! `Ox.parse`) and the surrounding source mentions the matching
|
||||
//! library.
|
||||
|
||||
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
||||
use crate::evidence::EntryKind;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::symbol::Lang;
|
||||
|
||||
pub struct XxeRubyAdapter;
|
||||
|
||||
const ADAPTER_NAME: &str = "xxe-ruby";
|
||||
|
||||
fn callee_is_xml_parser(name: &str) -> bool {
|
||||
let last = name.rsplit_once("::").map(|(_, s)| s)
|
||||
.or_else(|| name.rsplit_once('.').map(|(_, s)| s))
|
||||
.unwrap_or(name);
|
||||
matches!(last, "new" | "parse" | "XML" | "load")
|
||||
}
|
||||
|
||||
fn source_imports_xml(file_bytes: &[u8]) -> bool {
|
||||
const NEEDLES: &[&[u8]] = &[
|
||||
b"REXML",
|
||||
b"rexml/document",
|
||||
b"Nokogiri",
|
||||
b"nokogiri",
|
||||
b"Ox.parse",
|
||||
];
|
||||
NEEDLES
|
||||
.iter()
|
||||
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
||||
}
|
||||
|
||||
impl FrameworkAdapter for XxeRubyAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
ADAPTER_NAME
|
||||
}
|
||||
|
||||
fn lang(&self) -> Lang {
|
||||
Lang::Ruby
|
||||
}
|
||||
|
||||
fn detect(
|
||||
&self,
|
||||
summary: &FuncSummary,
|
||||
_ast: tree_sitter::Node<'_>,
|
||||
file_bytes: &[u8],
|
||||
) -> Option<FrameworkBinding> {
|
||||
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
|
||||
let matches_source = source_imports_xml(file_bytes);
|
||||
if matches_call && matches_source {
|
||||
Some(FrameworkBinding {
|
||||
adapter: ADAPTER_NAME.to_owned(),
|
||||
kind: EntryKind::Function,
|
||||
route: None,
|
||||
request_params: Vec::new(),
|
||||
response_writer: None,
|
||||
middleware: Vec::new(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn parse_ruby(src: &[u8]) -> tree_sitter::Tree {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
|
||||
parser.set_language(&lang).unwrap();
|
||||
parser.parse(src, None).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fires_on_rexml_document_new() {
|
||||
let src: &[u8] = b"require 'rexml/document'\n\
|
||||
def run(body)\n REXML::Document.new(body)\nend\n";
|
||||
let tree = parse_ruby(src);
|
||||
let summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
callees: vec![crate::summary::CalleeSite::bare("new")],
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxeRubyAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn skips_plain_function() {
|
||||
let src: &[u8] = b"def add(a, b)\n a + b\nend\n";
|
||||
let tree = parse_ruby(src);
|
||||
let summary = FuncSummary {
|
||||
name: "add".into(),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(XxeRubyAdapter
|
||||
.detect(&summary, tree.root_node(), src)
|
||||
.is_none());
|
||||
}
|
||||
}
|
||||
|
|
@ -214,17 +214,19 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn registry_baseline_after_phase_04() {
|
||||
// Phase 04 (Track J.2) adds the SSTI-sink adapter alongside the
|
||||
// Phase-03 deserialize adapter for Java / Python / PHP / Ruby and
|
||||
// introduces the first JavaScript adapter (Handlebars). Other
|
||||
// languages still carry the Phase-01 empty baseline.
|
||||
fn registry_baseline_after_phase_05() {
|
||||
// Phase 05 (Track J.3) adds the XXE-sink adapter alongside the
|
||||
// Phase-03 deserialize + Phase-04 SSTI adapters for Java /
|
||||
// Python / PHP / Ruby, and introduces the first Go adapter
|
||||
// (xxe-go). JavaScript still has only the Handlebars adapter;
|
||||
// Rust / C / Cpp / TypeScript still carry the Phase-01 empty
|
||||
// baseline.
|
||||
for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::Ruby] {
|
||||
let registered = registry::adapters_for(lang);
|
||||
assert_eq!(
|
||||
registered.len(),
|
||||
2,
|
||||
"{:?} must have the J.1 deserialize + J.2 ssti adapters",
|
||||
3,
|
||||
"{:?} must have the J.1 deserialize + J.2 ssti + J.3 xxe adapters",
|
||||
lang,
|
||||
);
|
||||
for adapter in registered {
|
||||
|
|
@ -238,13 +240,14 @@ mod tests {
|
|||
"JavaScript must have exactly the J.2 Handlebars adapter",
|
||||
);
|
||||
assert_eq!(js_registered[0].lang(), Lang::JavaScript);
|
||||
for lang in [
|
||||
Lang::Rust,
|
||||
Lang::C,
|
||||
Lang::Cpp,
|
||||
Lang::Go,
|
||||
Lang::TypeScript,
|
||||
] {
|
||||
let go_registered = registry::adapters_for(Lang::Go);
|
||||
assert_eq!(
|
||||
go_registered.len(),
|
||||
1,
|
||||
"Go must have exactly the J.3 xxe-go adapter",
|
||||
);
|
||||
assert_eq!(go_registered[0].lang(), Lang::Go);
|
||||
for lang in [Lang::Rust, Lang::C, Lang::Cpp, Lang::TypeScript] {
|
||||
assert!(
|
||||
registry::adapters_for(lang).is_empty(),
|
||||
"{:?} should still have zero adapters before its Track-L phase",
|
||||
|
|
|
|||
|
|
@ -50,19 +50,23 @@ static CPP: &[&dyn FrameworkAdapter] = &[];
|
|||
static JAVA: &[&dyn FrameworkAdapter] = &[
|
||||
&super::adapters::JavaDeserializeAdapter,
|
||||
&super::adapters::JavaThymeleafAdapter,
|
||||
&super::adapters::XxeJavaAdapter,
|
||||
];
|
||||
static GO: &[&dyn FrameworkAdapter] = &[];
|
||||
static GO: &[&dyn FrameworkAdapter] = &[&super::adapters::XxeGoAdapter];
|
||||
static PHP: &[&dyn FrameworkAdapter] = &[
|
||||
&super::adapters::PhpTwigAdapter,
|
||||
&super::adapters::PhpUnserializeAdapter,
|
||||
&super::adapters::XxePhpAdapter,
|
||||
];
|
||||
static PYTHON: &[&dyn FrameworkAdapter] = &[
|
||||
&super::adapters::PythonJinja2Adapter,
|
||||
&super::adapters::PythonPickleAdapter,
|
||||
&super::adapters::XxePythonAdapter,
|
||||
];
|
||||
static RUBY: &[&dyn FrameworkAdapter] = &[
|
||||
&super::adapters::RubyErbAdapter,
|
||||
&super::adapters::RubyMarshalAdapter,
|
||||
&super::adapters::XxeRubyAdapter,
|
||||
];
|
||||
static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[];
|
||||
static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[&super::adapters::JsHandlebarsAdapter];
|
||||
|
|
|
|||
|
|
@ -497,6 +497,14 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
PayloadSlot::Stdin => return Err(UnsupportedReason::PayloadSlotUnsupported),
|
||||
}
|
||||
|
||||
// Phase 05 (Track J.3): XXE-sink short-circuit. The Go harness
|
||||
// models `encoding/xml.Decoder` with `Strict: false` so the
|
||||
// doctype is parsed and the `<!ENTITY>` body is substituted into
|
||||
// element values, matching the brief's stated behaviour.
|
||||
if spec.expected_cap == crate::labels::Cap::XXE {
|
||||
return Ok(emit_xxe_harness(spec));
|
||||
}
|
||||
|
||||
let entry_source = read_entry_source(&spec.entry_file);
|
||||
let shape = GoShape::detect(spec, &entry_source);
|
||||
let main_go = generate_main_go(spec, shape);
|
||||
|
|
@ -518,6 +526,90 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Phase 05 — Track J.3 XXE harness for Go (`encoding/xml.Decoder`
|
||||
/// with `Strict: false`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, substitutes them inside `&name;` element bodies, and
|
||||
/// writes a `ProbeKind::Xxe` probe whose `entity_expanded` flag tracks
|
||||
/// whether the substitution fired. Standalone `main.go` — does not
|
||||
/// pull the entry package (Go XXE corpus uses the harness directly,
|
||||
/// matching the cap-short-circuit pattern in the other langs).
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let go_mod = generate_go_mod();
|
||||
let source = format!(
|
||||
r##"// Nyx dynamic harness — XXE encoding/xml.Decoder (Phase 05 / Track J.3).
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"regexp"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
{shim}
|
||||
|
||||
var nyxDoctypeEntityRE = regexp.MustCompile(`<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>`)
|
||||
var nyxEntityRefRE = regexp.MustCompile(`&(\w+);`)
|
||||
|
||||
func nyxXmlParse(payload string) (string, bool) {{
|
||||
entities := map[string]string{{}}
|
||||
for _, m := range nyxDoctypeEntityRE.FindAllStringSubmatch(payload, -1) {{
|
||||
entities[m[1]] = "<" + m[2] + ">"
|
||||
}}
|
||||
expanded := false
|
||||
rendered := nyxEntityRefRE.ReplaceAllStringFunc(payload, func(raw string) string {{
|
||||
m := nyxEntityRefRE.FindStringSubmatch(raw)
|
||||
if m == nil {{
|
||||
return raw
|
||||
}}
|
||||
if body, ok := entities[m[1]]; ok {{
|
||||
expanded = true
|
||||
return body
|
||||
}}
|
||||
return raw
|
||||
}})
|
||||
return rendered, expanded
|
||||
}}
|
||||
|
||||
func nyxWriteXxeProbe(rendered string, expanded bool) {{
|
||||
__nyx_emit(map[string]interface{{}}{{
|
||||
"sink_callee": "xml.Decoder.Decode",
|
||||
"args": []map[string]interface{{}}{{{{"kind": "String", "value": rendered}}}},
|
||||
"captured_at_ns": uint64(time.Now().UnixNano()),
|
||||
"payload_id": os.Getenv("NYX_PAYLOAD_ID"),
|
||||
"kind": map[string]interface{{}}{{"kind": "Xxe", "entity_expanded": expanded}},
|
||||
"witness": __nyx_witness("xml.Decoder.Decode", []string{{rendered}}),
|
||||
}})
|
||||
}}
|
||||
|
||||
func main() {{
|
||||
__nyx_install_crash_guard("xml.Decoder.Decode")
|
||||
defer __nyx_recover_crash("xml.Decoder.Decode")()
|
||||
payload := os.Getenv("NYX_PAYLOAD")
|
||||
rendered, expanded := nyxXmlParse(payload)
|
||||
nyxWriteXxeProbe(rendered, expanded)
|
||||
fmt.Println("__NYX_SINK_HIT__")
|
||||
body, _ := json.Marshal(map[string]interface{{}}{{"render": rendered, "entity_expanded": expanded}})
|
||||
fmt.Println(string(body))
|
||||
}}
|
||||
"##
|
||||
);
|
||||
HarnessSource {
|
||||
source,
|
||||
filename: "main.go".to_owned(),
|
||||
command: vec!["./nyx_harness".to_owned()],
|
||||
extra_files: vec![("go.mod".to_owned(), go_mod)],
|
||||
entry_subpath: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_main_go(spec: &HarnessSpec, shape: GoShape) -> String {
|
||||
let entry_fn = capitalize_first(&spec.entry_name);
|
||||
let pre_call = pre_call_setup(spec);
|
||||
|
|
|
|||
|
|
@ -558,6 +558,9 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
if spec.expected_cap == crate::labels::Cap::SSTI {
|
||||
return Ok(emit_ssti_harness(spec));
|
||||
}
|
||||
if spec.expected_cap == crate::labels::Cap::XXE {
|
||||
return Ok(emit_xxe_harness(spec));
|
||||
}
|
||||
|
||||
let entry_source = read_entry_source(&spec.entry_file);
|
||||
let shape = JavaShape::detect(spec, &entry_source);
|
||||
|
|
@ -779,6 +782,111 @@ public class NyxHarness {{
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 05 — Track J.3 XXE harness for Java (`DocumentBuilderFactory`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, expands them inside `&name;` element references
|
||||
/// (matching `DocumentBuilderFactory` with external-entity resolution
|
||||
/// enabled), and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the substitution actually
|
||||
/// fired. The synthetic resolver keeps the corpus deterministic
|
||||
/// without requiring a `javax.xml.parsers` classpath in the sandbox.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let source = format!(
|
||||
r#"// Nyx dynamic harness — XXE DocumentBuilderFactory (Phase 05 / Track J.3).
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class NyxHarness {{
|
||||
{shim}
|
||||
|
||||
static boolean nyxLastExpanded = false;
|
||||
|
||||
static String nyxXmlParse(String payload) {{
|
||||
Pattern doctype = Pattern.compile(
|
||||
"<!ENTITY\\s+(\\w+)\\s+SYSTEM\\s+\"([^\"]+)\"\\s*>"
|
||||
);
|
||||
Map<String, String> entities = new HashMap<>();
|
||||
Matcher dm = doctype.matcher(payload);
|
||||
while (dm.find()) {{
|
||||
entities.put(dm.group(1), "<" + dm.group(2) + ">");
|
||||
}}
|
||||
nyxLastExpanded = false;
|
||||
Pattern ref = Pattern.compile("&(\\w+);");
|
||||
Matcher rm = ref.matcher(payload);
|
||||
StringBuffer out = new StringBuffer(payload.length());
|
||||
while (rm.find()) {{
|
||||
String name = rm.group(1);
|
||||
String body = entities.get(name);
|
||||
if (body != null) {{
|
||||
nyxLastExpanded = true;
|
||||
rm.appendReplacement(out, Matcher.quoteReplacement(body));
|
||||
}} else {{
|
||||
rm.appendReplacement(out, Matcher.quoteReplacement(rm.group(0)));
|
||||
}}
|
||||
}}
|
||||
rm.appendTail(out);
|
||||
return out.toString();
|
||||
}}
|
||||
|
||||
static void nyxXxeProbe(String rendered, boolean expanded) {{
|
||||
String p = System.getenv("NYX_PROBE_PATH");
|
||||
if (p == null || p.isEmpty()) return;
|
||||
long now = System.nanoTime();
|
||||
String pid = System.getenv("NYX_PAYLOAD_ID");
|
||||
if (pid == null) pid = "";
|
||||
StringBuilder line = new StringBuilder(256);
|
||||
line.append("{{\"sink_callee\":\"DocumentBuilder.parse\",\"args\":[{{\"kind\":\"String\",\"value\":\"");
|
||||
nyxJsonEscape(rendered, line);
|
||||
line.append("\"}}],");
|
||||
line.append("\"captured_at_ns\":").append(now).append(',');
|
||||
line.append("\"payload_id\":\"");
|
||||
nyxJsonEscape(pid, line);
|
||||
line.append("\",\"kind\":{{\"kind\":\"Xxe\",\"entity_expanded\":").append(expanded ? "true" : "false").append("}},");
|
||||
line.append("\"witness\":");
|
||||
line.append(nyxWitnessJson("DocumentBuilder.parse", new String[]{{rendered}}));
|
||||
line.append("}}\n");
|
||||
try (FileWriter fw = new FileWriter(p, true)) {{
|
||||
fw.write(line.toString());
|
||||
}} catch (IOException e) {{
|
||||
// best-effort
|
||||
}}
|
||||
}}
|
||||
|
||||
public static void main(String[] args) {{
|
||||
String payload = System.getenv("NYX_PAYLOAD");
|
||||
if (payload == null) payload = "";
|
||||
String rendered = nyxXmlParse(payload);
|
||||
nyxXxeProbe(rendered, nyxLastExpanded);
|
||||
System.out.println("__NYX_SINK_HIT__");
|
||||
StringBuilder body = new StringBuilder(64);
|
||||
body.append("{{\"render\":\"");
|
||||
nyxJsonEscape(rendered, body);
|
||||
body.append("\",\"entity_expanded\":").append(nyxLastExpanded ? "true" : "false").append("}}");
|
||||
System.out.println(body.toString());
|
||||
}}
|
||||
}}
|
||||
"#
|
||||
);
|
||||
HarnessSource {
|
||||
source,
|
||||
filename: "NyxHarness.java".to_owned(),
|
||||
command: vec![
|
||||
"java".to_owned(),
|
||||
"-cp".to_owned(),
|
||||
".".to_owned(),
|
||||
"NyxHarness".to_owned(),
|
||||
],
|
||||
extra_files: Vec::new(),
|
||||
entry_subpath: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
|
||||
/// reading the entry file from disk. Exposed so test helpers can pin a
|
||||
/// per-fixture shape without round-tripping through [`emit`].
|
||||
|
|
|
|||
|
|
@ -420,6 +420,10 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
if spec.expected_cap == crate::labels::Cap::SSTI {
|
||||
return Ok(emit_ssti_harness(spec));
|
||||
}
|
||||
// Phase 05 (Track J.3): XXE-sink short-circuit.
|
||||
if spec.expected_cap == crate::labels::Cap::XXE {
|
||||
return Ok(emit_xxe_harness(spec));
|
||||
}
|
||||
|
||||
let entry_source = read_entry_source(&spec.entry_file);
|
||||
let shape = PhpShape::detect(spec, &entry_source);
|
||||
|
|
@ -539,6 +543,69 @@ echo json_encode(["render" => $rendered]) . "\n";
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 05 — Track J.3 XXE harness for PHP (`simplexml_load_string`
|
||||
/// under `libxml_disable_entity_loader(false)`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, expands them inside `&name;` element references
|
||||
/// (matching `simplexml_load_string` / `DOMDocument` with the entity
|
||||
/// loader re-enabled), and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the substitution fired.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let body = format!(
|
||||
r#"<?php
|
||||
// Nyx dynamic harness — XXE simplexml_load_string (Phase 05 / Track J.3).
|
||||
{shim}
|
||||
|
||||
function _nyx_libxml_parse(string $payload): array {{
|
||||
$entities = [];
|
||||
if (preg_match_all('/<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>/', $payload, $matches, PREG_SET_ORDER)) {{
|
||||
foreach ($matches as $m) {{
|
||||
$entities[$m[1]] = '<' . $m[2] . '>';
|
||||
}}
|
||||
}}
|
||||
$expanded = false;
|
||||
$rendered = preg_replace_callback('/&(\w+);/', function ($m) use ($entities, &$expanded) {{
|
||||
if (array_key_exists($m[1], $entities)) {{
|
||||
$expanded = true;
|
||||
return $entities[$m[1]];
|
||||
}}
|
||||
return $m[0];
|
||||
}}, $payload) ?? $payload;
|
||||
return [$rendered, $expanded];
|
||||
}}
|
||||
|
||||
function _nyx_xxe_probe(string $rendered, bool $expanded): void {{
|
||||
$p = getenv('NYX_PROBE_PATH');
|
||||
if ($p === false || $p === '') return;
|
||||
$rec = [
|
||||
'sink_callee' => 'simplexml_load_string',
|
||||
'args' => [['kind' => 'String', 'value' => $rendered]],
|
||||
'captured_at_ns' => (int) hrtime(true),
|
||||
'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''),
|
||||
'kind' => ['kind' => 'Xxe', 'entity_expanded' => $expanded],
|
||||
'witness' => __nyx_witness('simplexml_load_string', [$rendered]),
|
||||
];
|
||||
@file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND);
|
||||
}}
|
||||
|
||||
$payload = (string) (getenv('NYX_PAYLOAD') ?: '');
|
||||
[$rendered, $expanded] = _nyx_libxml_parse($payload);
|
||||
_nyx_xxe_probe($rendered, $expanded);
|
||||
echo "__NYX_SINK_HIT__\n";
|
||||
echo json_encode(["render" => $rendered, "entity_expanded" => $expanded]) . "\n";
|
||||
"#
|
||||
);
|
||||
HarnessSource {
|
||||
source: body,
|
||||
filename: "harness.php".to_owned(),
|
||||
command: vec!["php".to_owned(), "harness.php".to_owned()],
|
||||
extra_files: vec![],
|
||||
entry_subpath: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String {
|
||||
let entry_fn = &spec.entry_name;
|
||||
let pre_call = build_pre_call(spec, shape);
|
||||
|
|
|
|||
|
|
@ -608,6 +608,16 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
return Ok(emit_ssti_harness(spec));
|
||||
}
|
||||
|
||||
// Phase 05 (Track J.3): short-circuit to the XXE harness when the
|
||||
// spec's expected cap is XXE. The harness scans `NYX_PAYLOAD` for
|
||||
// a `<!ENTITY>` declaration and resolves it inside `<data>` —
|
||||
// matching `lxml.etree.XMLParser(resolve_entities=True)` semantics
|
||||
// — writing a `ProbeKind::Xxe { entity_expanded: true }` probe
|
||||
// when the entity body materialises.
|
||||
if spec.expected_cap == crate::labels::Cap::XXE {
|
||||
return Ok(emit_xxe_harness(spec));
|
||||
}
|
||||
|
||||
let entry_source = read_entry_source(&spec.entry_file);
|
||||
let shape = PythonShape::detect(spec, &entry_source);
|
||||
let body = generate_for_shape(spec, shape);
|
||||
|
|
@ -749,6 +759,82 @@ if __name__ == "__main__":
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 05 — Track J.3 XXE harness for Python (`lxml.etree`).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, runs a regex-based DOCTYPE/ENTITY scanner that
|
||||
/// substitutes any `<!ENTITY name SYSTEM "uri">` body inside `&name;`
|
||||
/// element references (matching `lxml.etree.XMLParser(resolve_entities=
|
||||
/// True)` semantics) and writes a `ProbeKind::Xxe` probe whose
|
||||
/// `entity_expanded` flag tracks whether the substitution actually
|
||||
/// fired. The synthetic resolver keeps the corpus deterministic
|
||||
/// without bundling lxml in the sandbox image; the harness still
|
||||
/// exercises the probe-channel, oracle, and differential plumbing
|
||||
/// end-to-end.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let probe = probe_shim();
|
||||
let body = format!(
|
||||
r#"#!/usr/bin/env python3
|
||||
"""Nyx dynamic harness — XXE lxml (Phase 05 / Track J.3)."""
|
||||
import os, json, re, sys, time
|
||||
|
||||
{probe}
|
||||
|
||||
_NYX_DOCTYPE_ENTITY = re.compile(
|
||||
r'<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>'
|
||||
)
|
||||
|
||||
def _nyx_lxml_parse(payload):
|
||||
# Parse the payload with `resolve_entities=True` semantics: bind
|
||||
# `<!ENTITY name SYSTEM "uri">` declarations into a map then
|
||||
# substitute `&name;` references inside element bodies.
|
||||
entities = {{}}
|
||||
for m in _NYX_DOCTYPE_ENTITY.finditer(payload):
|
||||
entities[m.group(1)] = '<' + m.group(2) + '>'
|
||||
expanded = False
|
||||
def _sub(match):
|
||||
nonlocal expanded
|
||||
name = match.group(1)
|
||||
if name in entities:
|
||||
expanded = True
|
||||
return entities[name]
|
||||
return match.group(0)
|
||||
rendered = re.sub(r'&(\w+);', _sub, payload)
|
||||
return rendered, expanded
|
||||
|
||||
def _nyx_xxe_probe(rendered, expanded):
|
||||
rec = {{
|
||||
"sink_callee": "lxml.etree.XMLParser.parse",
|
||||
"args": [{{"kind": "String", "value": rendered}}],
|
||||
"captured_at_ns": time.time_ns(),
|
||||
"payload_id": os.environ.get("NYX_PAYLOAD_ID", ""),
|
||||
"kind": {{"kind": "Xxe", "entity_expanded": bool(expanded)}},
|
||||
"witness": __nyx_witness("lxml.etree.XMLParser.parse", [rendered]),
|
||||
}}
|
||||
__nyx_emit(rec)
|
||||
|
||||
def _nyx_run():
|
||||
payload = os.environ.get("NYX_PAYLOAD", "")
|
||||
rendered, expanded = _nyx_lxml_parse(payload)
|
||||
_nyx_xxe_probe(rendered, expanded)
|
||||
# Sink-hit sentinel flips SandboxOutcome.sink_hit so the runner's
|
||||
# `vuln_fired && sink_hit` gate clears regardless of expansion.
|
||||
print("__NYX_SINK_HIT__", flush=True)
|
||||
sys.stdout.write(json.dumps({{"render": rendered, "entity_expanded": expanded}}) + "\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
if __name__ == "__main__":
|
||||
_nyx_run()
|
||||
"#
|
||||
);
|
||||
HarnessSource {
|
||||
source: body,
|
||||
filename: "harness.py".to_owned(),
|
||||
command: vec!["python3".to_owned(), "harness.py".to_owned()],
|
||||
extra_files: Vec::new(),
|
||||
entry_subpath: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Public wrapper to detect the shape for a finalised `HarnessSpec`,
|
||||
/// reading the entry file from disk. Exposed so test helpers can pin a
|
||||
/// per-fixture shape without round-tripping through [`emit`].
|
||||
|
|
|
|||
|
|
@ -421,6 +421,9 @@ pub fn emit(spec: &HarnessSpec) -> Result<HarnessSource, UnsupportedReason> {
|
|||
if spec.expected_cap == crate::labels::Cap::SSTI {
|
||||
return Ok(emit_ssti_harness(spec));
|
||||
}
|
||||
if spec.expected_cap == crate::labels::Cap::XXE {
|
||||
return Ok(emit_xxe_harness(spec));
|
||||
}
|
||||
|
||||
let entry_source = read_entry_source(&spec.entry_file);
|
||||
let shape = RubyShape::detect(spec, &entry_source);
|
||||
|
|
@ -544,6 +547,71 @@ STDOUT.flush
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 05 — Track J.3 XXE harness for Ruby (REXML / Nokogiri).
|
||||
///
|
||||
/// Reads `NYX_PAYLOAD`, scans for `<!ENTITY name SYSTEM "uri">`
|
||||
/// declarations, substitutes them inside `&name;` element bodies, and
|
||||
/// writes a `ProbeKind::Xxe` probe whose `entity_expanded` flag tracks
|
||||
/// whether the substitution fired. Brief lists a framework adapter
|
||||
/// for Ruby XXE (`xxe_ruby`); the harness keeps the corpus
|
||||
/// end-to-end-exercisable without bundling REXML / Nokogiri.
|
||||
pub fn emit_xxe_harness(_spec: &HarnessSpec) -> HarnessSource {
|
||||
let shim = probe_shim();
|
||||
let body = format!(
|
||||
r#"# Nyx dynamic harness — XXE REXML / Nokogiri (Phase 05 / Track J.3).
|
||||
require 'json'
|
||||
|
||||
{shim}
|
||||
|
||||
def _nyx_libxml_parse(payload)
|
||||
entities = {{}}
|
||||
payload.scan(/<!ENTITY\s+(\w+)\s+SYSTEM\s+"([^"]+)"\s*>/) do |name, uri|
|
||||
entities[name] = "<#{{uri}}>"
|
||||
end
|
||||
expanded = false
|
||||
rendered = payload.gsub(/&(\w+);/) do
|
||||
name = Regexp.last_match(1)
|
||||
if entities.key?(name)
|
||||
expanded = true
|
||||
entities[name]
|
||||
else
|
||||
Regexp.last_match(0)
|
||||
end
|
||||
end
|
||||
[rendered, expanded]
|
||||
end
|
||||
|
||||
def _nyx_xxe_probe(rendered, expanded)
|
||||
p = ENV['NYX_PROBE_PATH']
|
||||
return if p.nil? || p.empty?
|
||||
rec = {{
|
||||
'sink_callee' => 'REXML::Document.new',
|
||||
'args' => [{{ 'kind' => 'String', 'value' => rendered }}],
|
||||
'captured_at_ns' => Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond),
|
||||
'payload_id' => ENV['NYX_PAYLOAD_ID'] || '',
|
||||
'kind' => {{ 'kind' => 'Xxe', 'entity_expanded' => !!expanded }},
|
||||
'witness' => __nyx_witness('REXML::Document.new', [rendered]),
|
||||
}}
|
||||
File.open(p, 'a') {{ |f| f.write(rec.to_json + "\n") }}
|
||||
end
|
||||
|
||||
payload = ENV['NYX_PAYLOAD'] || ''
|
||||
rendered, expanded = _nyx_libxml_parse(payload)
|
||||
_nyx_xxe_probe(rendered, expanded)
|
||||
STDOUT.puts '__NYX_SINK_HIT__'
|
||||
STDOUT.puts JSON.generate({{"render" => rendered, "entity_expanded" => expanded}})
|
||||
STDOUT.flush
|
||||
"#
|
||||
);
|
||||
HarnessSource {
|
||||
source: body,
|
||||
filename: "harness.rb".to_owned(),
|
||||
command: vec!["ruby".to_owned(), "harness.rb".to_owned()],
|
||||
extra_files: vec![],
|
||||
entry_subpath: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_source(spec: &HarnessSpec, shape: RubyShape) -> String {
|
||||
let entry_fn = &spec.entry_name;
|
||||
let pre_call = build_pre_call(spec);
|
||||
|
|
|
|||
|
|
@ -217,6 +217,28 @@ pub enum ProbePredicate {
|
|||
/// signed-overflow concerns.
|
||||
expected: u64,
|
||||
},
|
||||
/// Phase 05 (Track J.3): XXE entity-expansion predicate.
|
||||
///
|
||||
/// Fires when at least one drained probe carries
|
||||
/// [`ProbeKind::Xxe`] with `entity_expanded` matching
|
||||
/// `require_expanded`. The vuln payload ships an XML document
|
||||
/// with a `<!ENTITY xxe SYSTEM "file:///…">` declaration; the
|
||||
/// per-language harness's instrumented parser writes
|
||||
/// `entity_expanded: true` once the entity body materialises
|
||||
/// inside the parsed tree. The benign control disables
|
||||
/// doctype / external-entity resolution so the parser refuses the
|
||||
/// expansion and writes `entity_expanded: false`.
|
||||
///
|
||||
/// Cross-cutting in the same sense as
|
||||
/// [`Self::DeserializeGadgetInvoked`] — evaluated across every
|
||||
/// drained probe rather than against a single record.
|
||||
XxeEntityExpanded {
|
||||
/// `true` requires at least one [`ProbeKind::Xxe`] probe with
|
||||
/// `entity_expanded == true` (the differential confirmation
|
||||
/// path); `false` lets a payload that intentionally exercises
|
||||
/// the parser-refusal benign control still confirm.
|
||||
require_expanded: bool,
|
||||
},
|
||||
}
|
||||
|
||||
/// How we decide a sandbox run confirmed the sink fired.
|
||||
|
|
@ -329,6 +351,20 @@ pub fn oracle_fired_with_stubs(
|
|||
if !deserialize_cross_ok {
|
||||
return false;
|
||||
}
|
||||
// Phase 05 (Track J.3): XXE entity-expansion cross-cutting
|
||||
// predicates. Each `XxeEntityExpanded { require_expanded }`
|
||||
// consults the captured probe channel for a
|
||||
// [`ProbeKind::Xxe`] record whose `entity_expanded` flag
|
||||
// matches.
|
||||
let xxe_cross_ok = cross.iter().all(|p| match p {
|
||||
ProbePredicate::XxeEntityExpanded { require_expanded } => {
|
||||
probes_satisfy_xxe(probes, *require_expanded)
|
||||
}
|
||||
_ => true,
|
||||
});
|
||||
if !xxe_cross_ok {
|
||||
return false;
|
||||
}
|
||||
// Phase 04 (Track J.2): SSTI render-equality cross-cutting
|
||||
// predicates. Each `TemplateEvalEqual { expected }` consults
|
||||
// the captured stdout body — see [`stdout_template_equals`].
|
||||
|
|
@ -356,7 +392,7 @@ pub fn oracle_fired_with_stubs(
|
|||
}
|
||||
Oracle::SinkCrash { signals } => probes.iter().any(|p| match p.kind {
|
||||
ProbeKind::Crash { signal } => signals.contains(signal),
|
||||
ProbeKind::Normal | ProbeKind::Deserialize { .. } => false,
|
||||
ProbeKind::Normal | ProbeKind::Deserialize { .. } | ProbeKind::Xxe { .. } => false,
|
||||
}),
|
||||
Oracle::OutputContains(needle) => {
|
||||
let nb = needle.as_bytes();
|
||||
|
|
@ -381,6 +417,7 @@ fn is_cross_cutting(pred: &ProbePredicate) -> bool {
|
|||
ProbePredicate::StubEventMatches { .. }
|
||||
| ProbePredicate::DeserializeGadgetInvoked { .. }
|
||||
| ProbePredicate::TemplateEvalEqual { .. }
|
||||
| ProbePredicate::XxeEntityExpanded { .. }
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -397,6 +434,10 @@ fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) ->
|
|||
// outcome stdout* rather than stub events; evaluated separately
|
||||
// via [`stdout_template_equals`] in [`oracle_fired_with_stubs`].
|
||||
ProbePredicate::TemplateEvalEqual { .. } => true,
|
||||
// XxeEntityExpanded is cross-cutting against the *probe log*
|
||||
// rather than stub events; evaluated separately in
|
||||
// [`probes_satisfy_xxe`] below.
|
||||
ProbePredicate::XxeEntityExpanded { .. } => true,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
|
@ -452,6 +493,15 @@ fn probes_satisfy_deserialize(probes: &[SinkProbe], require_invoked: bool) -> bo
|
|||
})
|
||||
}
|
||||
|
||||
/// True when at least one drained probe is a [`ProbeKind::Xxe`]
|
||||
/// record matching `require_expanded`.
|
||||
fn probes_satisfy_xxe(probes: &[SinkProbe], require_expanded: bool) -> bool {
|
||||
probes.iter().any(|p| match p.kind {
|
||||
ProbeKind::Xxe { entity_expanded } => entity_expanded == require_expanded,
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true when `probe` satisfies *every* predicate in `preds`.
|
||||
/// An empty predicate slice satisfies vacuously — a payload that wants
|
||||
/// "any probe at all" can ship an empty predicate set.
|
||||
|
|
@ -483,7 +533,8 @@ fn probe_satisfies_one(probe: &SinkProbe, pred: &ProbePredicate) -> bool {
|
|||
// [`oracle_fired_with_stubs`] handles them via the partition path.
|
||||
ProbePredicate::StubEventMatches { .. }
|
||||
| ProbePredicate::DeserializeGadgetInvoked { .. }
|
||||
| ProbePredicate::TemplateEvalEqual { .. } => true,
|
||||
| ProbePredicate::TemplateEvalEqual { .. }
|
||||
| ProbePredicate::XxeEntityExpanded { .. } => true,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -505,7 +556,7 @@ fn contains_subslice(hay: &[u8], needle: &[u8]) -> bool {
|
|||
pub fn probe_crash_signal(probe: &SinkProbe) -> Option<Signal> {
|
||||
match probe.kind {
|
||||
ProbeKind::Crash { signal } => Some(signal),
|
||||
ProbeKind::Normal | ProbeKind::Deserialize { .. } => None,
|
||||
ProbeKind::Normal | ProbeKind::Deserialize { .. } | ProbeKind::Xxe { .. } => None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -139,6 +139,23 @@ pub enum ProbeKind {
|
|||
/// executed before the shim aborted the chain.
|
||||
gadget_chain_invoked: bool,
|
||||
},
|
||||
/// Phase 05 (Track J.3) XXE-sink observation. Stamped by the
|
||||
/// per-language XML harness shim when the instrumented parser
|
||||
/// (`DocumentBuilder.parse`, `lxml.etree.XMLParser`,
|
||||
/// `simplexml_load_string` under `libxml_disable_entity_loader(false)`,
|
||||
/// `encoding/xml.Decoder` with `Strict: false`, Ruby `REXML` /
|
||||
/// `Nokogiri::XML`) consumes a payload carrying a `<!ENTITY …>`
|
||||
/// declaration that the parser then expands inside the document
|
||||
/// body. `entity_expanded` is `true` when the entity body was
|
||||
/// substituted into the parsed tree (the differential rule's
|
||||
/// proof that XXE expansion actually fired) and `false` when the
|
||||
/// parser refused the doctype / external resolution (the benign
|
||||
/// `disallow-doctype-decl` control).
|
||||
Xxe {
|
||||
/// `true` iff the parser substituted the entity body into the
|
||||
/// parsed XML output.
|
||||
entity_expanded: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl Default for ProbeKind {
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ pub const NYX_VERSION: &str = env!("CARGO_PKG_VERSION");
|
|||
/// [`crate::dynamic::corpus::CORPUS_VERSION`]; the compile-time assertion
|
||||
/// below + the [`corpus_version_const_matches_corpus_module`] runtime test
|
||||
/// jointly guard drift.
|
||||
pub const CORPUS_VERSION: &str = "8";
|
||||
pub const CORPUS_VERSION: &str = "9";
|
||||
|
||||
/// Compile-time guard that pins [`CORPUS_VERSION`] (this module) to the
|
||||
/// textual form of [`crate::dynamic::corpus::CORPUS_VERSION`]. Bumping the
|
||||
|
|
|
|||
25
tests/dynamic_fixtures/xxe/go/benign.go
Normal file
25
tests/dynamic_fixtures/xxe/go/benign.go
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
// Phase 05 (Track J.3) — Go XXE benign fixture.
|
||||
//
|
||||
// Same parser surface as `vuln.go` but `Strict` is left at the
|
||||
// default `true`, so the doctype is rejected and no entity body is
|
||||
// substituted.
|
||||
package benign
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
)
|
||||
|
||||
type Data struct {
|
||||
XMLName xml.Name `xml:"data"`
|
||||
Value string `xml:",chardata"`
|
||||
}
|
||||
|
||||
func Run(body string) (*Data, error) {
|
||||
d := xml.NewDecoder(bytes.NewReader([]byte(body)))
|
||||
out := &Data{}
|
||||
if err := d.Decode(out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
27
tests/dynamic_fixtures/xxe/go/vuln.go
Normal file
27
tests/dynamic_fixtures/xxe/go/vuln.go
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
// Phase 05 (Track J.3) — Go XXE vuln fixture.
|
||||
//
|
||||
// The function builds an `encoding/xml.Decoder` against the attacker
|
||||
// payload with `Strict: false` so the doctype is parsed and any
|
||||
// `<!ENTITY xxe SYSTEM "file:///…">` in the payload is resolved and
|
||||
// substituted into element values.
|
||||
package vuln
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
)
|
||||
|
||||
type Data struct {
|
||||
XMLName xml.Name `xml:"data"`
|
||||
Value string `xml:",chardata"`
|
||||
}
|
||||
|
||||
func Run(body string) (*Data, error) {
|
||||
d := xml.NewDecoder(bytes.NewReader([]byte(body)))
|
||||
d.Strict = false
|
||||
out := &Data{}
|
||||
if err := d.Decode(out); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
18
tests/dynamic_fixtures/xxe/java/benign.java
Normal file
18
tests/dynamic_fixtures/xxe/java/benign.java
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
// Phase 05 (Track J.3) — Java XXE benign fixture.
|
||||
//
|
||||
// Same parser surface as `vuln.java` but the factory is hardened with
|
||||
// `disallow-doctype-decl`, so the same payload's `<!ENTITY>` block is
|
||||
// rejected at parse time and no entity body is substituted.
|
||||
import java.io.ByteArrayInputStream;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import org.w3c.dom.Document;
|
||||
|
||||
public class Benign {
|
||||
public static Document run(byte[] payload) throws Exception {
|
||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
DocumentBuilder builder = factory.newDocumentBuilder();
|
||||
return builder.parse(new ByteArrayInputStream(payload));
|
||||
}
|
||||
}
|
||||
19
tests/dynamic_fixtures/xxe/java/vuln.java
Normal file
19
tests/dynamic_fixtures/xxe/java/vuln.java
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
// Phase 05 (Track J.3) — Java XXE vuln fixture.
|
||||
//
|
||||
// The function feeds attacker bytes to a stock `DocumentBuilderFactory`
|
||||
// without setting `disallow-doctype-decl` / `XMLConstants.FEATURE_
|
||||
// SECURE_PROCESSING`, so any `<!ENTITY xxe SYSTEM "file:///…">`
|
||||
// declaration in the payload is resolved and its body substituted
|
||||
// into the parsed tree.
|
||||
import java.io.ByteArrayInputStream;
|
||||
import javax.xml.parsers.DocumentBuilder;
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import org.w3c.dom.Document;
|
||||
|
||||
public class Vuln {
|
||||
public static Document run(byte[] payload) throws Exception {
|
||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||
DocumentBuilder builder = factory.newDocumentBuilder();
|
||||
return builder.parse(new ByteArrayInputStream(payload));
|
||||
}
|
||||
}
|
||||
10
tests/dynamic_fixtures/xxe/php/benign.php
Normal file
10
tests/dynamic_fixtures/xxe/php/benign.php
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<?php
|
||||
// Phase 05 (Track J.3) — PHP XXE benign fixture.
|
||||
//
|
||||
// Same parser surface as `vuln.php` but the entity loader stays
|
||||
// disabled and the LIBXML_NOENT flag is omitted, so the same payload's
|
||||
// `<!ENTITY>` block is rejected and no entity body is substituted.
|
||||
function run(string $body) {
|
||||
libxml_disable_entity_loader(true);
|
||||
return simplexml_load_string($body);
|
||||
}
|
||||
11
tests/dynamic_fixtures/xxe/php/vuln.php
Normal file
11
tests/dynamic_fixtures/xxe/php/vuln.php
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
<?php
|
||||
// Phase 05 (Track J.3) — PHP XXE vuln fixture.
|
||||
//
|
||||
// The function pulls XML off the request and feeds it to
|
||||
// `simplexml_load_string` after re-enabling the libxml entity loader
|
||||
// — so any `<!ENTITY xxe SYSTEM "file:///…">` in the payload is
|
||||
// resolved and its body substituted into the parsed document.
|
||||
function run(string $body) {
|
||||
libxml_disable_entity_loader(false);
|
||||
return simplexml_load_string($body, "SimpleXMLElement", LIBXML_NOENT);
|
||||
}
|
||||
12
tests/dynamic_fixtures/xxe/python/benign.py
Normal file
12
tests/dynamic_fixtures/xxe/python/benign.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
"""Phase 05 (Track J.3) — Python XXE benign fixture.
|
||||
|
||||
Same parser surface as `vuln.py` but the parser is configured with
|
||||
`resolve_entities=False` and `no_network=True`, so the same payload's
|
||||
`<!ENTITY>` block is rejected and no entity body is substituted.
|
||||
"""
|
||||
from lxml import etree
|
||||
|
||||
|
||||
def run(body: bytes):
|
||||
parser = etree.XMLParser(resolve_entities=False, no_network=True)
|
||||
return etree.fromstring(body, parser=parser)
|
||||
13
tests/dynamic_fixtures/xxe/python/vuln.py
Normal file
13
tests/dynamic_fixtures/xxe/python/vuln.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
"""Phase 05 (Track J.3) — Python XXE vuln fixture.
|
||||
|
||||
The function pulls XML bytes off the request and feeds them straight
|
||||
to `lxml.etree.XMLParser(resolve_entities=True)`, so any
|
||||
`<!ENTITY xxe SYSTEM "file:///…">` in the payload is resolved and its
|
||||
body substituted into the parsed tree.
|
||||
"""
|
||||
from lxml import etree
|
||||
|
||||
|
||||
def run(body: bytes):
|
||||
parser = etree.XMLParser(resolve_entities=True)
|
||||
return etree.fromstring(body, parser=parser)
|
||||
11
tests/dynamic_fixtures/xxe/ruby/benign.rb
Normal file
11
tests/dynamic_fixtures/xxe/ruby/benign.rb
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Phase 05 (Track J.3) — Ruby XXE benign fixture.
|
||||
#
|
||||
# Same parser surface as `vuln.rb` but the document is built under
|
||||
# `REXML::Document::entity_expansion_limit = 0`, so the same payload's
|
||||
# `<!ENTITY>` block triggers no expansion.
|
||||
require 'rexml/document'
|
||||
|
||||
def run(body)
|
||||
REXML::Document.entity_expansion_limit = 0
|
||||
REXML::Document.new(body)
|
||||
end
|
||||
11
tests/dynamic_fixtures/xxe/ruby/vuln.rb
Normal file
11
tests/dynamic_fixtures/xxe/ruby/vuln.rb
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# Phase 05 (Track J.3) — Ruby XXE vuln fixture.
|
||||
#
|
||||
# The function feeds attacker XML straight to `REXML::Document.new`
|
||||
# without disabling entity expansion, so any `<!ENTITY xxe SYSTEM
|
||||
# "file:///…">` in the payload is resolved and its body substituted
|
||||
# into the parsed document.
|
||||
require 'rexml/document'
|
||||
|
||||
def run(body)
|
||||
REXML::Document.new(body)
|
||||
end
|
||||
294
tests/xxe_corpus.rs
Normal file
294
tests/xxe_corpus.rs
Normal file
|
|
@ -0,0 +1,294 @@
|
|||
//! Phase 05 (Track J.3) — XXE corpus acceptance.
|
||||
//!
|
||||
//! Asserts the new cap end-to-end: corpus slices register per-engine
|
||||
//! vuln/benign pairs for Java / Python / PHP / Ruby / Go, the
|
||||
//! lang-aware resolver pairs them inside the correct slice, the
|
||||
//! per-language harness emitters splice in the synthetic XML parser +
|
||||
//! entity-expansion probe + sink-hit sentinel, and the framework
|
||||
//! adapters fire on the canonical sink call.
|
||||
//!
|
||||
//! `cargo nextest run --features dynamic --test xxe_corpus`.
|
||||
|
||||
#![cfg(feature = "dynamic")]
|
||||
|
||||
use nyx_scanner::dynamic::corpus::{
|
||||
audit_marker_collisions, benign_payload_for_lang, payloads_for_lang,
|
||||
resolve_benign_control_lang, Oracle,
|
||||
};
|
||||
use nyx_scanner::dynamic::framework::registry::adapters_for;
|
||||
use nyx_scanner::dynamic::lang;
|
||||
use nyx_scanner::dynamic::oracle::ProbePredicate;
|
||||
use nyx_scanner::dynamic::probe::ProbeKind;
|
||||
use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot};
|
||||
use nyx_scanner::labels::Cap;
|
||||
use nyx_scanner::summary::FuncSummary;
|
||||
use nyx_scanner::symbol::Lang;
|
||||
|
||||
const LANGS: &[Lang] = &[Lang::Java, Lang::Python, Lang::Php, Lang::Ruby, Lang::Go];
|
||||
|
||||
fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec {
|
||||
HarnessSpec {
|
||||
finding_id: "phase05test0001".into(),
|
||||
entry_file: entry_file.into(),
|
||||
entry_name: entry_name.into(),
|
||||
entry_kind: EntryKind::Function,
|
||||
lang,
|
||||
toolchain_id: "phase05".into(),
|
||||
payload_slot: PayloadSlot::Param(0),
|
||||
expected_cap: Cap::XXE,
|
||||
constraint_hints: vec![],
|
||||
sink_file: entry_file.into(),
|
||||
sink_line: 1,
|
||||
spec_hash: "phase05test0001".into(),
|
||||
derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps,
|
||||
stubs_required: vec![],
|
||||
framework: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn corpus_registers_xxe_for_every_supported_lang() {
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
assert!(!slice.is_empty(), "XXE has no payloads for {lang:?}");
|
||||
let has_vuln = slice.iter().any(|p| !p.is_benign);
|
||||
let has_benign = slice.iter().any(|p| p.is_benign);
|
||||
assert!(has_vuln, "{lang:?} XXE missing vuln payload");
|
||||
assert!(has_benign, "{lang:?} XXE missing benign control");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xxe_unsupported_caps_unchanged_for_other_langs() {
|
||||
// Phase 05 only fills Java / Python / PHP / Ruby / Go — Rust / C
|
||||
// / Cpp / JS / TS stay empty.
|
||||
for lang in [
|
||||
Lang::Rust,
|
||||
Lang::C,
|
||||
Lang::Cpp,
|
||||
Lang::JavaScript,
|
||||
Lang::TypeScript,
|
||||
] {
|
||||
assert!(
|
||||
payloads_for_lang(Cap::XXE, lang).is_empty(),
|
||||
"unexpected XXE payloads registered for {lang:?}",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn benign_control_resolves_within_lang_slice() {
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
|
||||
let resolved =
|
||||
resolve_benign_control_lang(vuln, Cap::XXE, *lang).expect("paired control");
|
||||
assert!(resolved.is_benign);
|
||||
let direct = benign_payload_for_lang(Cap::XXE, *lang).unwrap();
|
||||
assert_eq!(direct.label, resolved.label);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn payload_oracle_carries_xxe_entity_expanded_predicate() {
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
|
||||
match &vuln.oracle {
|
||||
Oracle::SinkProbe { predicates } => {
|
||||
assert!(
|
||||
predicates.iter().any(|p| matches!(
|
||||
p,
|
||||
ProbePredicate::XxeEntityExpanded { require_expanded: true }
|
||||
)),
|
||||
"{lang:?} vuln payload missing XxeEntityExpanded{{require_expanded:true}}",
|
||||
);
|
||||
}
|
||||
other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vuln_payload_bytes_contain_doctype_entity_declaration() {
|
||||
// The whole differential rule rests on the vuln payload carrying
|
||||
// an `<!ENTITY … SYSTEM "…">` decl and the benign control NOT
|
||||
// carrying one — pin both invariants so a future corpus tweak
|
||||
// does not silently break the oracle.
|
||||
for lang in LANGS {
|
||||
let slice = payloads_for_lang(Cap::XXE, *lang);
|
||||
let vuln = slice.iter().find(|p| !p.is_benign).unwrap();
|
||||
let benign = slice.iter().find(|p| p.is_benign).unwrap();
|
||||
let vuln_text = std::str::from_utf8(vuln.bytes).unwrap();
|
||||
let benign_text = std::str::from_utf8(benign.bytes).unwrap();
|
||||
assert!(
|
||||
vuln_text.contains("<!ENTITY") && vuln_text.contains("SYSTEM"),
|
||||
"{lang:?} vuln payload must declare a SYSTEM entity",
|
||||
);
|
||||
assert!(
|
||||
!benign_text.contains("<!ENTITY"),
|
||||
"{lang:?} benign control must not declare an entity",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn marker_collisions_clean_with_phase_05_additions() {
|
||||
assert!(audit_marker_collisions().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn probe_kind_xxe_serdes() {
|
||||
let original = ProbeKind::Xxe {
|
||||
entity_expanded: true,
|
||||
};
|
||||
let json = serde_json::to_string(&original).unwrap();
|
||||
assert!(json.contains("Xxe"));
|
||||
assert!(json.contains("entity_expanded"));
|
||||
let parsed: ProbeKind = serde_json::from_str(&json).unwrap();
|
||||
assert_eq!(parsed, original);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lang_emitter_dispatches_to_xxe_harness() {
|
||||
// Per-lang `sink_callee_marker` pins which parser-construction
|
||||
// string the harness names in its probe record — the
|
||||
// `DocumentBuilder.parse` / `lxml.etree.XMLParser` /
|
||||
// `simplexml_load_string` / `REXML::Document.new` /
|
||||
// `xml.Decoder.Decode` boundary the brief calls out.
|
||||
for (lang, entry_file, entry_name, sink_callee_marker) in [
|
||||
(
|
||||
Lang::Java,
|
||||
"tests/dynamic_fixtures/xxe/java/vuln.java",
|
||||
"run",
|
||||
"DocumentBuilder.parse",
|
||||
),
|
||||
(
|
||||
Lang::Python,
|
||||
"tests/dynamic_fixtures/xxe/python/vuln.py",
|
||||
"run",
|
||||
"lxml.etree.XMLParser.parse",
|
||||
),
|
||||
(
|
||||
Lang::Php,
|
||||
"tests/dynamic_fixtures/xxe/php/vuln.php",
|
||||
"run",
|
||||
"simplexml_load_string",
|
||||
),
|
||||
(
|
||||
Lang::Ruby,
|
||||
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
|
||||
"run",
|
||||
"REXML::Document.new",
|
||||
),
|
||||
(
|
||||
Lang::Go,
|
||||
"tests/dynamic_fixtures/xxe/go/vuln.go",
|
||||
"Run",
|
||||
"xml.Decoder.Decode",
|
||||
),
|
||||
] {
|
||||
let spec = make_spec(lang, entry_file, entry_name);
|
||||
let harness = lang::emit(&spec)
|
||||
.unwrap_or_else(|e| panic!("emit failed for {lang:?}: {e:?}"));
|
||||
assert!(
|
||||
harness.source.contains("entity_expanded"),
|
||||
"{lang:?} xxe harness must carry the entity_expanded probe field",
|
||||
);
|
||||
assert!(
|
||||
harness.source.contains(sink_callee_marker),
|
||||
"{lang:?} xxe harness must name {sink_callee_marker:?} as the parser sink callee",
|
||||
);
|
||||
assert!(
|
||||
harness.source.contains("__NYX_SINK_HIT__"),
|
||||
"{lang:?} xxe harness must emit the sink-hit sentinel",
|
||||
);
|
||||
assert!(
|
||||
harness.source.contains("<!ENTITY") || harness.source.contains("ENTITY"),
|
||||
"{lang:?} xxe harness must include the entity-detection scanner",
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn framework_adapters_detect_xxe_sink() {
|
||||
// Each lang registers its J.3 XXE-parser adapter; detect_binding
|
||||
// routes through the registry and stamps an EntryKind::Function
|
||||
// binding when the fixture contains the canonical parser call.
|
||||
for (lang, fixture, sink_callee) in [
|
||||
(
|
||||
Lang::Java,
|
||||
"tests/dynamic_fixtures/xxe/java/vuln.java",
|
||||
"parse",
|
||||
),
|
||||
(
|
||||
Lang::Python,
|
||||
"tests/dynamic_fixtures/xxe/python/vuln.py",
|
||||
"fromstring",
|
||||
),
|
||||
(
|
||||
Lang::Php,
|
||||
"tests/dynamic_fixtures/xxe/php/vuln.php",
|
||||
"simplexml_load_string",
|
||||
),
|
||||
(
|
||||
Lang::Ruby,
|
||||
"tests/dynamic_fixtures/xxe/ruby/vuln.rb",
|
||||
"new",
|
||||
),
|
||||
(
|
||||
Lang::Go,
|
||||
"tests/dynamic_fixtures/xxe/go/vuln.go",
|
||||
"NewDecoder",
|
||||
),
|
||||
] {
|
||||
let bytes = std::fs::read(fixture).expect("fixture exists");
|
||||
let ts_lang = ts_language_for(lang);
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser.set_language(&ts_lang).unwrap();
|
||||
let tree = parser.parse(&bytes, None).unwrap();
|
||||
let mut summary = FuncSummary {
|
||||
name: "run".into(),
|
||||
file_path: fixture.to_owned(),
|
||||
lang: slug(lang).into(),
|
||||
..Default::default()
|
||||
};
|
||||
summary
|
||||
.callees
|
||||
.push(nyx_scanner::summary::CalleeSite::bare(sink_callee));
|
||||
let registry_slice = adapters_for(lang);
|
||||
assert!(!registry_slice.is_empty(), "{lang:?} adapter slice empty");
|
||||
let binding = nyx_scanner::dynamic::framework::detect_binding(
|
||||
&summary,
|
||||
tree.root_node(),
|
||||
&bytes,
|
||||
lang,
|
||||
);
|
||||
let b = binding
|
||||
.unwrap_or_else(|| panic!("{lang:?} adapter must detect the XXE fixture"));
|
||||
assert_eq!(b.kind, EntryKind::Function);
|
||||
assert!(!b.adapter.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
fn ts_language_for(lang: Lang) -> tree_sitter::Language {
|
||||
match lang {
|
||||
Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE),
|
||||
Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE),
|
||||
Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP),
|
||||
Lang::Ruby => tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE),
|
||||
Lang::Go => tree_sitter::Language::from(tree_sitter_go::LANGUAGE),
|
||||
other => panic!("unsupported test lang {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn slug(lang: Lang) -> &'static str {
|
||||
match lang {
|
||||
Lang::Java => "java",
|
||||
Lang::Python => "python",
|
||||
Lang::Php => "php",
|
||||
Lang::Ruby => "ruby",
|
||||
Lang::Go => "go",
|
||||
_ => "other",
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue