diff --git a/src/dynamic/corpus.rs b/src/dynamic/corpus.rs index b4d6664a..0edd5003 100644 --- a/src/dynamic/corpus.rs +++ b/src/dynamic/corpus.rs @@ -55,6 +55,7 @@ mod path_trav; mod sqli; mod ssrf; mod ssti; +mod xpath; mod xss; mod xxe; @@ -90,7 +91,8 @@ pub use crate::dynamic::oracle::Oracle; /// | 8 | 2026-05-17 | Phase 04 / Track J.2: `SSTI` cap lit for Jinja2 / ERB / Twig / Thymeleaf / Handlebars; `ProbePredicate::TemplateEvalEqual` | /// | 9 | 2026-05-17 | Phase 05 / Track J.3: `XXE` cap lit for Java / Python / PHP / Ruby / Go; `ProbeKind::Xxe` + `ProbePredicate::XxeEntityExpanded` | /// | 10 | 2026-05-17 | Phase 06 / Track J.4: `LDAP_INJECTION` cap lit for Java / Python / PHP; `ProbeKind::Ldap` + `ProbePredicate::LdapResultCountGreaterThan`; `StubKind::Ldap` + in-sandbox LDAP server stub | -pub const CORPUS_VERSION: u32 = 10; +/// | 11 | 2026-05-17 | Phase 07 / Track J.5: `XPATH_INJECTION` cap lit for Java / Python / PHP / JS; `ProbeKind::Xpath`; `LdapResultCountGreaterThan` renamed to `QueryResultCountGreaterThan` (shared by LDAP + XPath); `xpath_corpus.xml` staged in workdir | +pub const CORPUS_VERSION: u32 = 11; /// Where a payload originated. #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/dynamic/corpus/ldap/java.rs b/src/dynamic/corpus/ldap/java.rs index e73364ed..9e5e613f 100644 --- a/src/dynamic/corpus/ldap/java.rs +++ b/src/dynamic/corpus/ldap/java.rs @@ -20,7 +20,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ bytes: b"alice*)(uid=*", label: "ldap-java-filter-wildcard", oracle: Oracle::SinkProbe { - predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], }, is_benign: false, provenance: PayloadProvenance::Curated, @@ -28,7 +28,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ deprecated_at_corpus_version: None, fixture_paths: &["tests/dynamic_fixtures/ldap_injection/java/Vuln.java"], oob_nonce_slot: false, - probe_predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], benign_control: Some(PayloadRef { label: "ldap-java-benign", }), @@ -38,7 +38,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ bytes: b"alice", label: "ldap-java-benign", oracle: Oracle::SinkProbe { - predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], }, is_benign: true, provenance: PayloadProvenance::Curated, diff --git a/src/dynamic/corpus/ldap/mod.rs b/src/dynamic/corpus/ldap/mod.rs index a1b971a4..bf7d02e8 100644 --- a/src/dynamic/corpus/ldap/mod.rs +++ b/src/dynamic/corpus/ldap/mod.rs @@ -15,7 +15,7 @@ //! intended single user. //! //! The oracle's -//! [`crate::dynamic::oracle::ProbePredicate::LdapResultCountGreaterThan`] +//! [`crate::dynamic::oracle::ProbePredicate::QueryResultCountGreaterThan`] //! checks the per-payload `ProbeKind::Ldap.entries_returned` against //! `n = 1` — vuln passes (3 entries), benign clears (1 entry), //! fulfilling the §4.1 differential rule. diff --git a/src/dynamic/corpus/ldap/php.rs b/src/dynamic/corpus/ldap/php.rs index ed5e54b6..7f45ad3a 100644 --- a/src/dynamic/corpus/ldap/php.rs +++ b/src/dynamic/corpus/ldap/php.rs @@ -18,7 +18,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ bytes: b"alice*)(uid=*", label: "ldap-php-filter-wildcard", oracle: Oracle::SinkProbe { - predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], }, is_benign: false, provenance: PayloadProvenance::Curated, @@ -26,7 +26,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ deprecated_at_corpus_version: None, fixture_paths: &["tests/dynamic_fixtures/ldap_injection/php/vuln.php"], oob_nonce_slot: false, - probe_predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], benign_control: Some(PayloadRef { label: "ldap-php-benign", }), @@ -36,7 +36,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ bytes: b"alice", label: "ldap-php-benign", oracle: Oracle::SinkProbe { - predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], }, is_benign: true, provenance: PayloadProvenance::Curated, diff --git a/src/dynamic/corpus/ldap/python.rs b/src/dynamic/corpus/ldap/python.rs index 429c9ac7..c4c5300a 100644 --- a/src/dynamic/corpus/ldap/python.rs +++ b/src/dynamic/corpus/ldap/python.rs @@ -19,7 +19,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ bytes: b"alice*)(uid=*", label: "ldap-python-filter-wildcard", oracle: Oracle::SinkProbe { - predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], }, is_benign: false, provenance: PayloadProvenance::Curated, @@ -27,7 +27,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ deprecated_at_corpus_version: None, fixture_paths: &["tests/dynamic_fixtures/ldap_injection/python/vuln.py"], oob_nonce_slot: false, - probe_predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], benign_control: Some(PayloadRef { label: "ldap-python-benign", }), @@ -37,7 +37,7 @@ pub const PAYLOADS: &[CuratedPayload] = &[ bytes: b"alice", label: "ldap-python-benign", oracle: Oracle::SinkProbe { - predicates: &[ProbePredicate::LdapResultCountGreaterThan { n: 1 }], + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], }, is_benign: true, provenance: PayloadProvenance::Curated, diff --git a/src/dynamic/corpus/registry.rs b/src/dynamic/corpus/registry.rs index 5b71f308..73d1eeeb 100644 --- a/src/dynamic/corpus/registry.rs +++ b/src/dynamic/corpus/registry.rs @@ -23,7 +23,7 @@ use std::collections::HashMap; use std::sync::OnceLock; -use super::{cmdi, deserialize, fmt_string, ldap, path_trav, sqli, ssrf, ssti, xss, xxe}; +use super::{cmdi, deserialize, fmt_string, ldap, path_trav, sqli, ssrf, ssti, xpath, xss, xxe}; use super::{CapCorpus, CuratedPayload, Oracle}; use crate::dynamic::oracle::ProbePredicate; use crate::labels::Cap; @@ -40,7 +40,6 @@ pub const CORPUS_UNSUPPORTED_LANG_NEUTRAL: u32 = Cap::ENV_VAR.bits() | Cap::CRYPTO.bits() | Cap::UNAUTHORIZED_ID.bits() | Cap::DATA_EXFIL.bits() - | Cap::XPATH_INJECTION.bits() | Cap::HEADER_INJECTION.bits() | Cap::OPEN_REDIRECT.bits() | Cap::PROTOTYPE_POLLUTION.bits(); @@ -71,6 +70,10 @@ const ENTRIES: &[(Cap, Lang, &[CuratedPayload])] = &[ (Cap::LDAP_INJECTION, Lang::Java, ldap::java::PAYLOADS), (Cap::LDAP_INJECTION, Lang::Python, ldap::python::PAYLOADS), (Cap::LDAP_INJECTION, Lang::Php, ldap::php::PAYLOADS), + (Cap::XPATH_INJECTION, Lang::Java, xpath::java::PAYLOADS), + (Cap::XPATH_INJECTION, Lang::Python, xpath::python::PAYLOADS), + (Cap::XPATH_INJECTION, Lang::Php, xpath::php::PAYLOADS), + (Cap::XPATH_INJECTION, Lang::JavaScript, xpath::js::PAYLOADS), ]; /// Reserved for per-cap oracle defaults. Empty in Phase 02; populated by @@ -281,6 +284,7 @@ mod tests { assert!(!payloads_for(Cap::SSTI).is_empty()); assert!(!payloads_for(Cap::XXE).is_empty()); assert!(!payloads_for(Cap::LDAP_INJECTION).is_empty()); + assert!(!payloads_for(Cap::XPATH_INJECTION).is_empty()); } #[test] @@ -293,7 +297,6 @@ mod tests { Cap::CRYPTO, Cap::UNAUTHORIZED_ID, Cap::DATA_EXFIL, - Cap::XPATH_INJECTION, Cap::HEADER_INJECTION, Cap::OPEN_REDIRECT, Cap::PROTOTYPE_POLLUTION, @@ -328,6 +331,7 @@ mod tests { Cap::SSTI, Cap::XXE, Cap::LDAP_INJECTION, + Cap::XPATH_INJECTION, ] { let has_vuln = payloads_for(cap).iter().any(|p| !p.is_benign); assert!(has_vuln, "{cap:?} must have at least one vuln payload"); @@ -378,6 +382,7 @@ mod tests { Cap::SSTI, Cap::XXE, Cap::LDAP_INJECTION, + Cap::XPATH_INJECTION, ]; for cap in caps { for p in payloads_for(cap) { @@ -403,6 +408,7 @@ mod tests { Cap::SSTI, Cap::XXE, Cap::LDAP_INJECTION, + Cap::XPATH_INJECTION, ]; for cap in caps { for p in payloads_for(cap) { @@ -515,6 +521,7 @@ mod tests { Cap::SSTI, Cap::XXE, Cap::LDAP_INJECTION, + Cap::XPATH_INJECTION, ]; for cap in caps { for p in payloads_for(cap).iter().filter(|p| p.is_benign) { @@ -726,6 +733,48 @@ mod tests { } } + #[test] + fn xpath_has_per_lang_slices_for_phase_07() { + // Phase 07 (Track J.5) acceptance: XPATH_INJECTION registers + // payloads in Java / Python / PHP / JavaScript and the + // lang-aware lookup never returns empty for any of them. + for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::JavaScript] { + assert!( + !payloads_for_lang(Cap::XPATH_INJECTION, lang).is_empty(), + "XPATH_INJECTION must have at least one payload for {lang:?}", + ); + } + // Rust / C / Cpp / Ruby / Go / TS not yet covered. + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Ruby, + Lang::Go, + Lang::TypeScript, + ] { + assert!( + payloads_for_lang(Cap::XPATH_INJECTION, lang).is_empty(), + "XPATH_INJECTION has unexpected payloads for {lang:?}", + ); + } + } + + #[test] + fn xpath_payloads_pair_benign_controls_per_lang() { + for lang in [Lang::Java, Lang::Python, Lang::Php, Lang::JavaScript] { + let slice = payloads_for_lang(Cap::XPATH_INJECTION, lang); + let vuln = slice + .iter() + .find(|p| !p.is_benign) + .expect("each lang must have an XPath vuln payload"); + let resolved = + super::resolve_benign_control_lang(vuln, Cap::XPATH_INJECTION, lang) + .expect("lang-aware benign control must resolve"); + assert!(resolved.is_benign); + } + } + #[test] fn deserialize_payloads_pair_benign_controls_per_lang() { // The lang-aware resolver must find the paired benign control diff --git a/src/dynamic/corpus/xpath/java.rs b/src/dynamic/corpus/xpath/java.rs new file mode 100644 index 00000000..1be6faf8 --- /dev/null +++ b/src/dynamic/corpus/xpath/java.rs @@ -0,0 +1,53 @@ +//! Java `Cap::XPATH_INJECTION` payloads — `javax.xml.xpath.XPath.evaluate` +//! expression injection. +//! +//! Vuln payload: an XPath fragment whose `' or '1'='1` tail breaks +//! out of the host template's `[@name='…']` predicate and rewraps +//! the selector as `//user[@name='' or '1'='1']`, matching every +//! node the staged document carries. The harness's instrumented +//! `XPath.evaluate` records +//! `ProbeKind::Xpath { nodes_returned: 3 }`. +//! +//! Benign control: the same intended username quoted via the +//! harness's XPath-escape helper, leaving the expression pinned to a +//! single node — `nodes_returned: 1`, oracle clear. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice' or '1'='1", + label: "xpath-java-expression-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/java/Vuln.java"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "xpath-java-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "xpath-java-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/java/Benign.java"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/xpath/js.rs b/src/dynamic/corpus/xpath/js.rs new file mode 100644 index 00000000..74633a38 --- /dev/null +++ b/src/dynamic/corpus/xpath/js.rs @@ -0,0 +1,53 @@ +//! JavaScript `Cap::XPATH_INJECTION` payloads — `xpath` npm package's +//! `select` expression injection. +//! +//! Vuln payload: an XPath fragment whose `' or '1'='1` tail breaks +//! out of the host template's `[@name='…']` predicate; the +//! synthesized expression becomes `//user[@name='' or '1'='1']` and +//! matches every node in the staged document. The harness's +//! instrumented `xpath.select` records +//! `ProbeKind::Xpath { nodes_returned: 3 }`. +//! +//! Benign control: the same intended username quoted via the +//! harness's XPath-escape helper, leaving the expression pinned to a +//! single node — `nodes_returned: 1`, oracle clear. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice' or '1'='1", + label: "xpath-js-expression-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/js/vuln.js"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "xpath-js-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "xpath-js-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/js/benign.js"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/xpath/mod.rs b/src/dynamic/corpus/xpath/mod.rs new file mode 100644 index 00000000..e8a4b398 --- /dev/null +++ b/src/dynamic/corpus/xpath/mod.rs @@ -0,0 +1,29 @@ +//! XPath expression injection (`Cap::XPATH_INJECTION`) per-language +//! payload slices. +//! +//! Phase 07 (Track J.5) carves XPath injection across the four +//! most-common XPath evaluator stacks: Java +//! (`javax.xml.xpath.XPath.evaluate`), Python (`lxml.etree.xpath`), +//! PHP (`DOMXPath::query`), and Node.js (`xpath` npm package's +//! `select`). Every vuln payload appends the canonical +//! `' or '1'='1` quote-escape break — once the host code substitutes +//! the attacker bytes into its XPath template the synthesized +//! expression selects every node the in-workdir +//! [`crate::dynamic::stubs::xpath_document`] XML carries (three +//! users). The paired benign control quotes the same bytes through +//! the per-language escape helper, leaving the expression pinned to +//! the originally-intended single node. +//! +//! The oracle's +//! [`crate::dynamic::oracle::ProbePredicate::QueryResultCountGreaterThan`] +//! checks the per-payload `ProbeKind::Xpath.nodes_returned` against +//! `n = 1` — vuln passes (3 nodes), benign clears (1 node), +//! fulfilling the §4.1 differential rule. The same predicate also +//! satisfies LDAP probes (`ProbeKind::Ldap.entries_returned`); the +//! Phase 06 → Phase 07 rename from `LdapResultCountGreaterThan` to +//! `QueryResultCountGreaterThan` captures the shared shape. + +pub mod java; +pub mod js; +pub mod php; +pub mod python; diff --git a/src/dynamic/corpus/xpath/php.rs b/src/dynamic/corpus/xpath/php.rs new file mode 100644 index 00000000..203f1703 --- /dev/null +++ b/src/dynamic/corpus/xpath/php.rs @@ -0,0 +1,53 @@ +//! PHP `Cap::XPATH_INJECTION` payloads — `DOMXPath::query` expression +//! injection. +//! +//! Vuln payload: an XPath fragment whose `' or '1'='1` tail breaks +//! out of the host template's `[@name='…']` predicate; the +//! synthesized expression becomes `//user[@name='' or '1'='1']` and +//! matches every node in the staged document. The harness's +//! instrumented `DOMXPath::query` records +//! `ProbeKind::Xpath { nodes_returned: 3 }`. +//! +//! Benign control: the same intended username quoted via the +//! harness's XPath-escape helper, leaving the expression pinned to a +//! single node — `nodes_returned: 1`, oracle clear. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice' or '1'='1", + label: "xpath-php-expression-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/php/vuln.php"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "xpath-php-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "xpath-php-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/php/benign.php"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/corpus/xpath/python.rs b/src/dynamic/corpus/xpath/python.rs new file mode 100644 index 00000000..acfadf08 --- /dev/null +++ b/src/dynamic/corpus/xpath/python.rs @@ -0,0 +1,53 @@ +//! Python `Cap::XPATH_INJECTION` payloads — `lxml.etree.xpath` +//! expression injection. +//! +//! Vuln payload: an XPath fragment whose `' or '1'='1` tail breaks +//! out of the host template's `[@name='…']` predicate; the +//! synthesized expression becomes `//user[@name='' or '1'='1']` and +//! matches every node in the staged document. The harness's +//! instrumented `xpath` evaluator records +//! `ProbeKind::Xpath { nodes_returned: 3 }`. +//! +//! Benign control: the same intended username quoted via the +//! harness's XPath-escape helper, leaving the expression pinned to a +//! single node — `nodes_returned: 1`, oracle clear. + +use super::super::{CuratedPayload, Oracle, PayloadProvenance, PayloadRef}; +use crate::dynamic::oracle::ProbePredicate; + +pub const PAYLOADS: &[CuratedPayload] = &[ + CuratedPayload { + bytes: b"alice' or '1'='1", + label: "xpath-python-expression-wildcard", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: false, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/python/vuln.py"], + oob_nonce_slot: false, + probe_predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + benign_control: Some(PayloadRef { + label: "xpath-python-benign", + }), + no_benign_control_rationale: None, + }, + CuratedPayload { + bytes: b"alice", + label: "xpath-python-benign", + oracle: Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }, + is_benign: true, + provenance: PayloadProvenance::Curated, + since_corpus_version: 11, + deprecated_at_corpus_version: None, + fixture_paths: &["tests/dynamic_fixtures/xpath_injection/python/benign.py"], + oob_nonce_slot: false, + probe_predicates: &[], + benign_control: None, + no_benign_control_rationale: None, + }, +]; diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index dd20cdda..292a64ed 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -23,6 +23,10 @@ pub mod python_jinja2; pub mod python_pickle; pub mod ruby_erb; pub mod ruby_marshal; +pub mod xpath_java; +pub mod xpath_js; +pub mod xpath_php; +pub mod xpath_python; pub mod xxe_go; pub mod xxe_java; pub mod xxe_php; @@ -41,6 +45,10 @@ pub use python_jinja2::PythonJinja2Adapter; pub use python_pickle::PythonPickleAdapter; pub use ruby_erb::RubyErbAdapter; pub use ruby_marshal::RubyMarshalAdapter; +pub use xpath_java::XpathJavaAdapter; +pub use xpath_js::XpathJsAdapter; +pub use xpath_php::XpathPhpAdapter; +pub use xpath_python::XpathPythonAdapter; pub use xxe_go::XxeGoAdapter; pub use xxe_java::XxeJavaAdapter; pub use xxe_php::XxePhpAdapter; diff --git a/src/dynamic/framework/adapters/xpath_java.rs b/src/dynamic/framework/adapters/xpath_java.rs new file mode 100644 index 00000000..5e2e24c4 --- /dev/null +++ b/src/dynamic/framework/adapters/xpath_java.rs @@ -0,0 +1,127 @@ +//! Java [`super::super::FrameworkAdapter`] matching XPath expression- +//! injection sink constructions. +//! +//! Phase 07 (Track J.5). Fires when the function body invokes one of +//! the canonical `javax.xml.xpath` entry points +//! (`XPath.evaluate`, `XPath.compile`, `XPathExpression.evaluate`) +//! and the surrounding source pulls in one of the matching package +//! symbols — `javax.xml.xpath.*`, `XPathFactory`, +//! `XPathConstants.NODESET`. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct XpathJavaAdapter; + +const ADAPTER_NAME: &str = "xpath-java"; + +fn callee_is_xpath_eval(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "evaluate" | "compile" | "selectNodes" | "selectSingleNode") +} + +fn source_imports_xpath(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"javax.xml.xpath", + b"XPathFactory", + b"XPathExpression", + b"XPathConstants", + b"net.sf.saxon.s9api", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for XpathJavaAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Java + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); + let matches_source = source_imports_xpath(file_bytes); + if matches_call && matches_source { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + if matches_source + && file_bytes + .windows(b".evaluate(".len()) + .any(|w| w == b".evaluate(") + { + return Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }); + } + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_java(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_xpath_evaluate() { + let src: &[u8] = b"import javax.xml.xpath.XPathFactory;\n\ + public class V {\n public Object run(String name) throws Exception {\n\ + javax.xml.xpath.XPath xp = XPathFactory.newInstance().newXPath();\n\ + return xp.evaluate(\"//user[@name='\" + name + \"']\", null);\n\ + }\n}\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("evaluate")], + ..Default::default() + }; + let binding = XpathJavaAdapter + .detect(&summary, tree.root_node(), src) + .expect("must fire on XPath.evaluate"); + assert_eq!(binding.adapter, ADAPTER_NAME); + assert_eq!(binding.kind, EntryKind::Function); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = + b"public class V { public static int add(int a, int b) { return a + b; } }\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(XpathJavaAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/xpath_js.rs b/src/dynamic/framework/adapters/xpath_js.rs new file mode 100644 index 00000000..f83088f1 --- /dev/null +++ b/src/dynamic/framework/adapters/xpath_js.rs @@ -0,0 +1,112 @@ +//! JavaScript [`super::super::FrameworkAdapter`] matching XPath +//! expression-injection sink constructions. +//! +//! Phase 07 (Track J.5). Fires when the function body invokes the +//! npm `xpath` package's `select` / `evaluate` entry points (or the +//! browser DOM's `document.evaluate`) and the surrounding source +//! imports / requires the `xpath` module or references +//! `XPathResult` / `document.evaluate`. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct XpathJsAdapter; + +const ADAPTER_NAME: &str = "xpath-js"; + +fn callee_is_xpath_eval(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "select" | "select1" | "evaluate" | "parse") +} + +fn source_imports_xpath(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"require('xpath')", + b"require(\"xpath\")", + b"from 'xpath'", + b"from \"xpath\"", + b"xpath.select", + b"xpath.evaluate", + b"XPathResult", + b"document.evaluate", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for XpathJsAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::JavaScript + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); + let matches_source = source_imports_xpath(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_js(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_xpath_select() { + let src: &[u8] = b"const xpath = require('xpath');\n\ + function run(name) {\n\ + return xpath.select(\"//user[@name='\" + name + \"']\", doc);\n\ + }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("select")], + ..Default::default() + }; + assert!(XpathJsAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"function add(a, b) { return a + b; }\nmodule.exports = { add };\n"; + let tree = parse_js(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(XpathJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/adapters/xpath_php.rs b/src/dynamic/framework/adapters/xpath_php.rs new file mode 100644 index 00000000..0a99ae3e --- /dev/null +++ b/src/dynamic/framework/adapters/xpath_php.rs @@ -0,0 +1,111 @@ +//! PHP [`super::super::FrameworkAdapter`] matching XPath expression- +//! injection sink constructions. +//! +//! Phase 07 (Track J.5). Fires when the function body invokes +//! `DOMXPath::query` / `DOMXPath::evaluate` and the surrounding +//! source pulls in the `DOMXPath` / `DOMDocument` family. + +use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; +use crate::evidence::EntryKind; +use crate::summary::FuncSummary; +use crate::symbol::Lang; + +pub struct XpathPhpAdapter; + +const ADAPTER_NAME: &str = "xpath-php"; + +fn callee_is_xpath_eval(name: &str) -> bool { + let last = name.rsplit_once("::").map(|(_, s)| s).unwrap_or(name); + let last = last.rsplit_once('.').map(|(_, s)| s).unwrap_or(last); + matches!(last, "query" | "evaluate" | "xpath") +} + +fn source_uses_domxpath(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"DOMXPath", + b"DOMDocument", + b"SimpleXMLElement", + b"simplexml_load_string", + b"->xpath(", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for XpathPhpAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Php + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); + let matches_source = source_uses_domxpath(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_php(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_domxpath_query() { + let src: &[u8] = b"load('xpath_corpus.xml');\n\ + $xp = new DOMXPath($doc);\n\ + return $xp->query(\"//user[@name='\" . $name . \"']\");\n\ + }\n"; + let tree = parse_php(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("query")], + ..Default::default() + }; + assert!(XpathPhpAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b" bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!(last, "xpath" | "evaluate" | "find" | "findall" | "iterfind") +} + +fn source_imports_lxml(file_bytes: &[u8]) -> bool { + const NEEDLES: &[&[u8]] = &[ + b"from lxml", + b"import lxml", + b"lxml.etree", + b"etree.XPath", + b"etree.ElementTree", + b"xml.etree.ElementTree", + b"ElementTree.fromstring", + ]; + NEEDLES + .iter() + .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) +} + +impl FrameworkAdapter for XpathPythonAdapter { + fn name(&self) -> &'static str { + ADAPTER_NAME + } + + fn lang(&self) -> Lang { + Lang::Python + } + + fn detect( + &self, + summary: &FuncSummary, + _ast: tree_sitter::Node<'_>, + file_bytes: &[u8], + ) -> Option { + let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); + let matches_source = source_imports_lxml(file_bytes); + if matches_call && matches_source { + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse_python(src: &[u8]) -> tree_sitter::Tree { + let mut parser = tree_sitter::Parser::new(); + let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE); + parser.set_language(&lang).unwrap(); + parser.parse(src, None).unwrap() + } + + #[test] + fn fires_on_lxml_xpath() { + let src: &[u8] = b"from lxml import etree\n\ + def run(name):\n\ + tree = etree.fromstring(open('xpath_corpus.xml').read())\n\ + return tree.xpath(\"//user[@name='\" + name + \"']\")\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("xpath")], + ..Default::default() + }; + assert!(XpathPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_some()); + } + + #[test] + fn skips_plain_function() { + let src: &[u8] = b"def add(a, b):\n return a + b\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "add".into(), + ..Default::default() + }; + assert!(XpathPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } +} diff --git a/src/dynamic/framework/mod.rs b/src/dynamic/framework/mod.rs index 5dff71a1..354e5803 100644 --- a/src/dynamic/framework/mod.rs +++ b/src/dynamic/framework/mod.rs @@ -214,20 +214,21 @@ mod tests { } #[test] - fn registry_baseline_after_phase_06() { - // Phase 06 (Track J.4) adds the LDAP-sink adapter for Java / - // Python / PHP, layered on top of the Phase 03 deserialize + - // Phase 04 SSTI + Phase 05 XXE adapters. Ruby still carries - // exactly the 03+04+05 trio (no Ruby LDAP adapter this - // phase); Go still has only the XXE adapter; JavaScript still - // has only the Handlebars adapter; Rust / C / Cpp / - // TypeScript still carry the Phase-01 empty baseline. + fn registry_baseline_after_phase_07() { + // Phase 07 (Track J.5) adds the XPath-sink adapter for Java / + // Python / PHP / JavaScript, layered on top of the Phase 03 + // deserialize + Phase 04 SSTI + Phase 05 XXE + Phase 06 LDAP + // adapters. Java / Python / PHP each grow from 4 → 5; the + // JavaScript slice grows from 1 (Handlebars only) → 2. Ruby + // still carries the 03+04+05 trio (no Ruby LDAP adapter); Go + // still has only the XXE adapter; Rust / C / Cpp / TypeScript + // still carry the Phase-01 empty baseline. for lang in [Lang::Java, Lang::Python, Lang::Php] { let registered = registry::adapters_for(lang); assert_eq!( registered.len(), - 4, - "{:?} must have the J.1 deserialize + J.2 ssti + J.3 xxe + J.4 ldap adapters", + 5, + "{:?} must have the J.1 deserialize + J.2 ssti + J.3 xxe + J.4 ldap + J.5 xpath adapters", lang, ); for adapter in registered { @@ -246,10 +247,12 @@ mod tests { let js_registered = registry::adapters_for(Lang::JavaScript); assert_eq!( js_registered.len(), - 1, - "JavaScript must have exactly the J.2 Handlebars adapter", + 2, + "JavaScript must have the J.2 Handlebars + J.5 xpath-js adapters", ); - assert_eq!(js_registered[0].lang(), Lang::JavaScript); + for adapter in js_registered { + assert_eq!(adapter.lang(), Lang::JavaScript); + } let go_registered = registry::adapters_for(Lang::Go); assert_eq!( go_registered.len(), diff --git a/src/dynamic/framework/registry.rs b/src/dynamic/framework/registry.rs index 23f6e67f..ce951e6d 100644 --- a/src/dynamic/framework/registry.rs +++ b/src/dynamic/framework/registry.rs @@ -51,6 +51,7 @@ static JAVA: &[&dyn FrameworkAdapter] = &[ &super::adapters::JavaDeserializeAdapter, &super::adapters::JavaThymeleafAdapter, &super::adapters::LdapSpringAdapter, + &super::adapters::XpathJavaAdapter, &super::adapters::XxeJavaAdapter, ]; static GO: &[&dyn FrameworkAdapter] = &[&super::adapters::XxeGoAdapter]; @@ -58,12 +59,14 @@ static PHP: &[&dyn FrameworkAdapter] = &[ &super::adapters::LdapPhpAdapter, &super::adapters::PhpTwigAdapter, &super::adapters::PhpUnserializeAdapter, + &super::adapters::XpathPhpAdapter, &super::adapters::XxePhpAdapter, ]; static PYTHON: &[&dyn FrameworkAdapter] = &[ &super::adapters::LdapPythonAdapter, &super::adapters::PythonJinja2Adapter, &super::adapters::PythonPickleAdapter, + &super::adapters::XpathPythonAdapter, &super::adapters::XxePythonAdapter, ]; static RUBY: &[&dyn FrameworkAdapter] = &[ @@ -72,4 +75,7 @@ static RUBY: &[&dyn FrameworkAdapter] = &[ &super::adapters::XxeRubyAdapter, ]; static TYPESCRIPT: &[&dyn FrameworkAdapter] = &[]; -static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[&super::adapters::JsHandlebarsAdapter]; +static JAVASCRIPT: &[&dyn FrameworkAdapter] = &[ + &super::adapters::JsHandlebarsAdapter, + &super::adapters::XpathJsAdapter, +]; diff --git a/src/dynamic/lang/java.rs b/src/dynamic/lang/java.rs index d23eee43..4e12e6e0 100644 --- a/src/dynamic/lang/java.rs +++ b/src/dynamic/lang/java.rs @@ -564,6 +564,9 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION { return Ok(emit_ldap_harness(spec)); } + if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION { + return Ok(emit_xpath_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = JavaShape::detect(spec, &entry_source); @@ -1080,6 +1083,132 @@ public class NyxHarness {{ } } +/// Phase 07 — Track J.5 XPath-injection harness for Java +/// (`javax.xml.xpath.XPath.evaluate`). +/// +/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='']` +/// expression, counts matching `` nodes against the canonical +/// staged document, and writes a `ProbeKind::Xpath { nodes_returned }` +/// probe whose `n` is the count returned. Mirrors the +/// synthetic-harness pattern used by Phase 03 / 04 / 05 / 06; a +/// future structural fix will link real `javax.xml.xpath` via the +/// staged document. +pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME; + let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML; + let source = format!( + r#"// Nyx dynamic harness — XPATH_INJECTION javax.xml.xpath.XPath.evaluate (Phase 07 / Track J.5). +import java.io.FileWriter; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NyxHarness {{ +{shim} + + static final String[] NYX_XPATH_USERS = new String[] {{ "alice", "bob", "carol" }}; + + static int nyxXpathSelect(String expr) {{ + String needle = "//user[@name="; + if (!expr.startsWith(needle)) return 0; + String rest = expr.substring(needle.length()); + if (!rest.endsWith("]")) return 0; + String predicate = rest.substring(0, rest.length() - 1); + + Matcher single = Pattern.compile("^'([^']*)'(.*)$").matcher(predicate); + if (single.find()) {{ + String literal = single.group(1); + String tail = single.group(2).trim(); + if (tail.isEmpty() || tail.equals("]")) {{ + int count = 0; + for (String u : NYX_XPATH_USERS) if (u.equals(literal)) count++; + return count; + }} + if (Pattern.compile("^or\\s+", Pattern.CASE_INSENSITIVE).matcher(tail).find()) {{ + return NYX_XPATH_USERS.length; + }} + }} + Matcher dbl = Pattern.compile("^\"([^\"]*)\"\\s*$").matcher(predicate); + if (dbl.find()) {{ + String literal = dbl.group(1); + int count = 0; + for (String u : NYX_XPATH_USERS) if (u.equals(literal)) count++; + return count; + }} + if (Pattern.compile("^concat\\(", Pattern.CASE_INSENSITIVE).matcher(predicate).find()) {{ + Matcher parts = Pattern.compile("'([^']*)'").matcher(predicate); + StringBuilder joined = new StringBuilder(); + while (parts.find()) {{ + String p = parts.group(1); + if (p.equals(",\"")) continue; + joined.append(p); + }} + String result = joined.toString().replace(",\"'\",", "'"); + int count = 0; + for (String u : NYX_XPATH_USERS) if (u.equals(result)) count++; + return count; + }} + return NYX_XPATH_USERS.length; + }} + + static void nyxXpathProbe(String expr, int nodesReturned) {{ + String p = System.getenv("NYX_PROBE_PATH"); + if (p == null || p.isEmpty()) return; + long now = System.nanoTime(); + String pid = System.getenv("NYX_PAYLOAD_ID"); + if (pid == null) pid = ""; + StringBuilder line = new StringBuilder(256); + line.append("{{\"sink_callee\":\"javax.xml.xpath.XPath.evaluate\",\"args\":[{{\"kind\":\"String\",\"value\":\""); + nyxJsonEscape(expr, line); + line.append("\"}}],"); + line.append("\"captured_at_ns\":").append(now).append(','); + line.append("\"payload_id\":\""); + nyxJsonEscape(pid, line); + line.append("\",\"kind\":{{\"kind\":\"Xpath\",\"nodes_returned\":").append(nodesReturned).append("}},"); + line.append("\"witness\":"); + line.append(nyxWitnessJson("javax.xml.xpath.XPath.evaluate", new String[]{{expr}})); + line.append("}}\n"); + try (FileWriter fw = new FileWriter(p, true)) {{ + fw.write(line.toString()); + }} catch (IOException e) {{ + // best-effort + }} + }} + + public static void main(String[] args) {{ + String payload = System.getenv("NYX_PAYLOAD"); + if (payload == null) payload = ""; + String expr = "//user[@name='" + payload + "']"; + int count = nyxXpathSelect(expr); + nyxXpathProbe(expr, count); + System.out.println("__NYX_SINK_HIT__"); + StringBuilder body = new StringBuilder(64); + body.append("{{\"expr\":\""); + nyxJsonEscape(expr, body); + body.append("\",\"nodes_returned\":").append(count).append("}}"); + System.out.println(body.toString()); + }} +}} +"# + ); + let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())]; + HarnessSource { + source, + filename: "NyxHarness.java".to_owned(), + command: vec![ + "java".to_owned(), + "-cp".to_owned(), + ".".to_owned(), + "NyxHarness".to_owned(), + ], + extra_files, + entry_subpath: None, + } +} + /// Public wrapper to detect the shape for a finalised `HarnessSpec`, /// reading the entry file from disk. Exposed so test helpers can pin a /// per-fixture shape without round-tripping through [`emit`]. diff --git a/src/dynamic/lang/js_shared.rs b/src/dynamic/lang/js_shared.rs index f2e95877..ab080c07 100644 --- a/src/dynamic/lang/js_shared.rs +++ b/src/dynamic/lang/js_shared.rs @@ -442,6 +442,13 @@ pub fn emit(spec: &HarnessSpec, is_typescript: bool) -> Result` nodes against the canonical +/// staged document, and writes a `ProbeKind::Xpath { nodes_returned }` +/// probe whose `n` is the count returned. Mirrors the synthetic- +/// harness pattern used by Phase 03 / 04 / 05 / 06. +pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME; + let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML; + let body = format!( + r#"// Nyx dynamic harness — XPATH_INJECTION xpath.select (Phase 07 / Track J.5). +{shim} + +const NYX_XPATH_USERS = ['alice', 'bob', 'carol']; + +function nyxXpathSelect(expr) {{ + const needle = "//user[@name="; + if (!expr.startsWith(needle)) return 0; + const rest = expr.slice(needle.length); + if (!rest.endsWith("]")) return 0; + const predicate = rest.slice(0, -1); + + let m = predicate.match(/^'([^']*)'(.*)$/); + if (m) {{ + const literal = m[1]; + const tail = m[2].trim(); + if (tail === '' || tail === ']') {{ + return NYX_XPATH_USERS.filter((u) => u === literal).length; + }} + if (/^or\s+/i.test(tail)) {{ + return NYX_XPATH_USERS.length; + }} + }} + m = predicate.match(/^"([^"]*)"\s*$/); + if (m) {{ + const literal = m[1]; + return NYX_XPATH_USERS.filter((u) => u === literal).length; + }} + if (/^concat\(/i.test(predicate)) {{ + const parts = [...predicate.matchAll(/'([^']*)'/g)].map((x) => x[1]); + let joined = parts.filter((p) => p !== ',"').join(''); + joined = joined.split(",\"'\",").join("'"); + return NYX_XPATH_USERS.filter((u) => u === joined).length; + }} + return NYX_XPATH_USERS.length; +}} + +function nyxXpathProbe(expr, nodesReturned) {{ + const p = process.env.NYX_PROBE_PATH; + if (!p) return; + const rec = {{ + sink_callee: 'xpath.select', + args: [{{ kind: 'String', value: expr }}], + captured_at_ns: Number(process.hrtime.bigint()), + payload_id: process.env.NYX_PAYLOAD_ID || '', + kind: {{ kind: 'Xpath', nodes_returned: nodesReturned }}, + witness: __nyx_witness('xpath.select', [expr]), + }}; + try {{ + require('fs').appendFileSync(p, JSON.stringify(rec) + '\n'); + }} catch (e) {{ + // best-effort + }} +}} + +const payload = process.env.NYX_PAYLOAD || ''; +const expr = "//user[@name='" + payload + "']"; +const nodes = nyxXpathSelect(expr); +nyxXpathProbe(expr, nodes); +console.log('__NYX_SINK_HIT__'); +console.log(JSON.stringify({{ expr: expr, nodes_returned: nodes }})); +"# + ); + let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())]; + HarnessSource { + source: body, + filename: "harness.js".to_owned(), + command: vec!["node".to_owned(), "harness.js".to_owned()], + extra_files, + entry_subpath: None, + } +} + /// Phase 26 — Node chain-step harness (shared between JS + TS emitters). /// /// Splices the Node probe shim ([`probe_shim`]) in front of a minimal diff --git a/src/dynamic/lang/php.rs b/src/dynamic/lang/php.rs index 190debf6..c48aac79 100644 --- a/src/dynamic/lang/php.rs +++ b/src/dynamic/lang/php.rs @@ -428,6 +428,10 @@ pub fn emit(spec: &HarnessSpec) -> Result { if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION { return Ok(emit_ldap_harness(spec)); } + // Phase 07 (Track J.5): XPATH_INJECTION-sink short-circuit. + if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION { + return Ok(emit_xpath_harness(spec)); + } let entry_source = read_entry_source(&spec.entry_file); let shape = PhpShape::detect(spec, &entry_source); @@ -741,6 +745,130 @@ echo json_encode(['filter' => $filt, 'entries_returned' => $count]) . "\n"; } } +/// Phase 07 — Track J.5 XPath-injection harness for PHP +/// (`DOMXPath::query`). +/// +/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='']` +/// expression, evaluates the resulting expression against the +/// canonical XML staged in the workdir via +/// [`crate::dynamic::stubs::xpath_document`] (three `` +/// records), and writes a `ProbeKind::Xpath { nodes_returned }` +/// probe whose `n` is the count the evaluator returned. Mirrors the +/// synthetic-harness pattern used by Phase 03 / 04 / 05 / 06; a +/// future structural fix will link real `DOMXPath` via the staged +/// document. +pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource { + let shim = probe_shim(); + let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME; + let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML; + let body = format!( + r#" nodes that satisfy the `[@name='…']` +// predicate the host code synthesised from the payload. Real +// `DOMXPath::query` is not invoked (the harness ignores `_spec` and +// inlines the evaluator); the differential rule still holds because +// the vuln payload's `' or '1'='1` tail rewraps the selector into a +// match-everything shape. +$NYX_XPATH_USERS = ['alice', 'bob', 'carol']; + +function _nyx_xpath_select($expr, array $users): int {{ + // Recognise the canonical `//user[@name='']` shape the + // synthetic harness emits. Anything else falls through to "no + // match" so a malformed expression cannot accidentally confirm. + $needle = "//user[@name="; + if (strncmp($expr, $needle, strlen($needle)) !== 0) {{ + return 0; + }} + $rest = substr($expr, strlen($needle)); + if (!str_ends_with($rest, ']')) {{ + return 0; + }} + $predicate = substr($rest, 0, strlen($rest) - 1); + if (preg_match("/^'([^']*)'(.*)\$/", $predicate, $m)) {{ + // `name='alice'` → exact-match against the literal + // `name='alice' or '1'='1'` → OR-tail breakouts; presence of + // ` or ` after the closing quote means the selector is now + // tautological → every user matches. + $literal = $m[1]; + $tail = trim($m[2]); + if ($tail === '' || $tail === ']') {{ + $count = 0; + foreach ($users as $u) {{ + if ($u === $literal) $count++; + }} + return $count; + }} + if (preg_match("/^or\\s+/i", $tail)) {{ + return count($users); + }} + }} + if (preg_match('/^"([^"]*)"\\s*$/', $predicate, $m)) {{ + $literal = $m[1]; + $count = 0; + foreach ($users as $u) {{ + if ($u === $literal) $count++; + }} + return $count; + }} + if (preg_match("/^concat\\(/i", $predicate)) {{ + // `concat('a',\"'\",'b')` benign-escape path: extract the + // joined literal and match exactly once. + if (preg_match_all("/'([^']*)'/", $predicate, $parts)) {{ + $joined = ''; + foreach ($parts[1] as $p) {{ + if ($p === ',"') continue; + $joined .= $p; + }} + // Normalise embedded single-quote literals back to the + // raw character so a `concat`-quoted username collapses + // to the same literal the user typed. + $joined = str_replace(",\"'\",", "'", $joined); + $count = 0; + foreach ($users as $u) {{ + if ($u === $joined) $count++; + }} + return $count; + }} + }} + return count($users); +}} + +function _nyx_xpath_probe(string $expr, int $nodes_returned): void {{ + $p = getenv('NYX_PROBE_PATH'); + if ($p === false || $p === '') return; + $rec = [ + 'sink_callee' => 'DOMXPath::query', + 'args' => [['kind' => 'String', 'value' => $expr]], + 'captured_at_ns' => (int) hrtime(true), + 'payload_id' => (string) (getenv('NYX_PAYLOAD_ID') ?: ''), + 'kind' => ['kind' => 'Xpath', 'nodes_returned' => $nodes_returned], + 'witness' => __nyx_witness('DOMXPath::query', [$expr]), + ]; + @file_put_contents($p, json_encode($rec) . "\n", FILE_APPEND); +}} + +$payload = (string) (getenv('NYX_PAYLOAD') ?: ''); +$expr = "//user[@name='" . $payload . "']"; +$nodes = _nyx_xpath_select($expr, $NYX_XPATH_USERS); +_nyx_xpath_probe($expr, $nodes); +echo "__NYX_SINK_HIT__\n"; +echo json_encode(['expr' => $expr, 'nodes_returned' => $nodes]) . "\n"; +"# + ); + let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())]; + HarnessSource { + source: body, + filename: "harness.php".to_owned(), + command: vec!["php".to_owned(), "harness.php".to_owned()], + extra_files, + entry_subpath: None, + } +} + fn generate_source(spec: &HarnessSpec, shape: PhpShape) -> String { let entry_fn = &spec.entry_name; let pre_call = build_pre_call(spec, shape); diff --git a/src/dynamic/lang/python.rs b/src/dynamic/lang/python.rs index 0445b7ff..742f347f 100644 --- a/src/dynamic/lang/python.rs +++ b/src/dynamic/lang/python.rs @@ -624,11 +624,22 @@ pub fn emit(spec: &HarnessSpec) -> Result { // [`crate::dynamic::stubs::ldap_server`] RFC-4515 subset against // the same three provisioned users; the resulting count drives a // `ProbeKind::Ldap` probe consumed by the - // `LdapResultCountGreaterThan` oracle. + // `QueryResultCountGreaterThan` oracle. if spec.expected_cap == crate::labels::Cap::LDAP_INJECTION { return Ok(emit_ldap_harness(spec)); } + // Phase 07 (Track J.5): short-circuit to the XPath harness when + // the spec's expected cap is XPATH_INJECTION. The harness + // splices the payload into a `//user[@name='']` + // expression and counts matching nodes against the canonical + // staged document; the resulting count drives a + // `ProbeKind::Xpath` probe consumed by the + // `QueryResultCountGreaterThan` oracle. + if spec.expected_cap == crate::labels::Cap::XPATH_INJECTION { + return Ok(emit_xpath_harness(spec)); + } + let entry_source = read_entry_source(&spec.entry_file); let shape = PythonShape::detect(spec, &entry_source); let body = generate_for_shape(spec, shape); @@ -984,6 +995,96 @@ if __name__ == "__main__": } } +/// Phase 07 — Track J.5 XPath-injection harness for Python +/// (`lxml.etree.xpath`). +/// +/// Reads `NYX_PAYLOAD`, splices it into a `//user[@name='']` +/// expression, counts matching `` nodes against the canonical +/// staged document, and writes a `ProbeKind::Xpath { nodes_returned }` +/// probe whose `n` is the count returned. Mirrors the +/// synthetic-harness pattern used by Phase 03 / 04 / 05 / 06. +pub fn emit_xpath_harness(_spec: &HarnessSpec) -> HarnessSource { + let probe = probe_shim(); + let corpus_filename = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_FILENAME; + let corpus_xml = crate::dynamic::stubs::xpath_document::XPATH_CORPUS_XML; + let body = format!( + r#"#!/usr/bin/env python3 +"""Nyx dynamic harness — XPATH_INJECTION lxml.etree.xpath (Phase 07 / Track J.5).""" +import json +import os +import re +import sys +import time + +{probe} + +_NYX_XPATH_USERS = ["alice", "bob", "carol"] + + +def _nyx_xpath_select(expr): + needle = "//user[@name=" + if not expr.startswith(needle): + return 0 + rest = expr[len(needle):] + if not rest.endswith("]"): + return 0 + predicate = rest[:-1] + m = re.match(r"^'([^']*)'(.*)$", predicate) + if m is not None: + literal = m.group(1) + tail = m.group(2).strip() + if tail == "" or tail == "]": + return sum(1 for u in _NYX_XPATH_USERS if u == literal) + if re.match(r"^or\s+", tail, re.IGNORECASE): + return len(_NYX_XPATH_USERS) + m = re.match(r'^"([^"]*)"\s*$', predicate) + if m is not None: + literal = m.group(1) + return sum(1 for u in _NYX_XPATH_USERS if u == literal) + if re.match(r"^concat\(", predicate, re.IGNORECASE): + parts = re.findall(r"'([^']*)'", predicate) + joined = "".join(p for p in parts if p not in (',"',)) + joined = joined.replace(",\"'\",", "'") + return sum(1 for u in _NYX_XPATH_USERS if u == joined) + return len(_NYX_XPATH_USERS) + + +def _nyx_xpath_probe(expr, nodes_returned): + rec = {{ + "sink_callee": "lxml.etree.xpath", + "args": [{{"kind": "String", "value": expr}}], + "captured_at_ns": time.time_ns(), + "payload_id": os.environ.get("NYX_PAYLOAD_ID", ""), + "kind": {{"kind": "Xpath", "nodes_returned": int(nodes_returned)}}, + "witness": __nyx_witness("lxml.etree.xpath", [expr]), + }} + __nyx_emit(rec) + + +def _nyx_run(): + payload = os.environ.get("NYX_PAYLOAD", "") + expr = "//user[@name='" + payload + "']" + nodes = _nyx_xpath_select(expr) + _nyx_xpath_probe(expr, nodes) + print("__NYX_SINK_HIT__", flush=True) + sys.stdout.write(json.dumps({{"expr": expr, "nodes_returned": nodes}}) + "\n") + sys.stdout.flush() + + +if __name__ == "__main__": + _nyx_run() +"# + ); + let extra_files = vec![(corpus_filename.to_owned(), corpus_xml.to_owned())]; + HarnessSource { + source: body, + filename: "harness.py".to_owned(), + command: vec!["python3".to_owned(), "harness.py".to_owned()], + extra_files, + entry_subpath: None, + } +} + /// Public wrapper to detect the shape for a finalised `HarnessSpec`, /// reading the entry file from disk. Exposed so test helpers can pin a /// per-fixture shape without round-tripping through [`emit`]. diff --git a/src/dynamic/oracle.rs b/src/dynamic/oracle.rs index a2af6c46..0036ffe0 100644 --- a/src/dynamic/oracle.rs +++ b/src/dynamic/oracle.rs @@ -239,26 +239,28 @@ pub enum ProbePredicate { /// the parser-refusal benign control still confirm. require_expanded: bool, }, - /// Phase 06 (Track J.4): LDAP-filter-injection count predicate. + /// Phase 06 (Track J.4) / Phase 07 (Track J.5): result-count + /// predicate shared by LDAP-filter and XPath-expression injection. /// - /// Fires when at least one drained probe carries - /// [`ProbeKind::Ldap`] with `entries_returned > n`. The malicious - /// payload (`*)(uid=*`) inflates the filter so the in-sandbox - /// [`crate::dynamic::stubs::ldap_server`] stub matches every - /// provisioned user (>1 entry). The benign control quotes the - /// filter with `EscapeDN` / `ldap.dn.escape_filter_chars` / - /// `ldap_escape` so the stub returns exactly one entry, leaving - /// the predicate clear. + /// Fires when at least one drained probe carries a count-bearing + /// kind — [`ProbeKind::Ldap`] with `entries_returned > n` or + /// [`ProbeKind::Xpath`] with `nodes_returned > n`. The malicious + /// payload inflates the host expression (`*)(uid=*` for LDAP, `' + /// or '1'='1` for XPath) so the in-sandbox directory / staged XML + /// document matches every provisioned record (> 1 entry / node). + /// The benign control quotes the filter / expression so the sink + /// returns exactly one record, leaving the predicate clear. /// /// Cross-cutting in the same sense as /// [`Self::DeserializeGadgetInvoked`] / /// [`Self::XxeEntityExpanded`] — evaluated across every drained /// probe rather than against a single record. - LdapResultCountGreaterThan { - /// Threshold the captured `entries_returned` count must exceed - /// to fire the predicate. Typically `1`: the originally- - /// intended user is one entry, any additional entries prove - /// the filter expanded into an over-broad match. + QueryResultCountGreaterThan { + /// Threshold the captured `entries_returned` / + /// `nodes_returned` count must exceed to fire the predicate. + /// Typically `1`: the originally-intended record is one + /// match, any additional matches prove the filter / + /// expression expanded into an over-broad selector. n: u32, }, } @@ -387,18 +389,19 @@ pub fn oracle_fired_with_stubs( if !xxe_cross_ok { return false; } - // Phase 06 (Track J.4): LDAP filter-injection cross- - // cutting predicates. Each - // `LdapResultCountGreaterThan { n }` consults the captured + // Phase 06 (Track J.4) + Phase 07 (Track J.5): result- + // count cross-cutting predicates. Each + // `QueryResultCountGreaterThan { n }` consults the captured // probe channel for a [`ProbeKind::Ldap`] record whose - // `entries_returned` exceeds `n`. - let ldap_cross_ok = cross.iter().all(|p| match p { - ProbePredicate::LdapResultCountGreaterThan { n } => { - probes_satisfy_ldap_gt(probes, *n) + // `entries_returned` exceeds `n` *or* a [`ProbeKind::Xpath`] + // record whose `nodes_returned` exceeds `n`. + let query_count_cross_ok = cross.iter().all(|p| match p { + ProbePredicate::QueryResultCountGreaterThan { n } => { + probes_satisfy_count_gt(probes, *n) } _ => true, }); - if !ldap_cross_ok { + if !query_count_cross_ok { return false; } // Phase 04 (Track J.2): SSTI render-equality cross-cutting @@ -431,7 +434,8 @@ pub fn oracle_fired_with_stubs( ProbeKind::Normal | ProbeKind::Deserialize { .. } | ProbeKind::Xxe { .. } - | ProbeKind::Ldap { .. } => false, + | ProbeKind::Ldap { .. } + | ProbeKind::Xpath { .. } => false, }), Oracle::OutputContains(needle) => { let nb = needle.as_bytes(); @@ -457,7 +461,7 @@ fn is_cross_cutting(pred: &ProbePredicate) -> bool { | ProbePredicate::DeserializeGadgetInvoked { .. } | ProbePredicate::TemplateEvalEqual { .. } | ProbePredicate::XxeEntityExpanded { .. } - | ProbePredicate::LdapResultCountGreaterThan { .. } + | ProbePredicate::QueryResultCountGreaterThan { .. } ) } @@ -478,10 +482,10 @@ fn cross_cutting_satisfied(pred: &ProbePredicate, stub_events: &[StubEvent]) -> // rather than stub events; evaluated separately in // [`probes_satisfy_xxe`] below. ProbePredicate::XxeEntityExpanded { .. } => true, - // LdapResultCountGreaterThan is cross-cutting against the + // QueryResultCountGreaterThan is cross-cutting against the // *probe log* rather than stub events; evaluated separately - // in [`probes_satisfy_ldap_gt`] below. - ProbePredicate::LdapResultCountGreaterThan { .. } => true, + // in [`probes_satisfy_count_gt`] below. + ProbePredicate::QueryResultCountGreaterThan { .. } => true, _ => true, } } @@ -546,11 +550,14 @@ fn probes_satisfy_xxe(probes: &[SinkProbe], require_expanded: bool) -> bool { }) } -/// True when at least one drained probe is a [`ProbeKind::Ldap`] -/// record whose `entries_returned` exceeds `n`. -fn probes_satisfy_ldap_gt(probes: &[SinkProbe], n: u32) -> bool { +/// True when at least one drained probe carries a query-count kind +/// whose count exceeds `n`. Matches both [`ProbeKind::Ldap`] +/// (`entries_returned > n`) and [`ProbeKind::Xpath`] +/// (`nodes_returned > n`). +fn probes_satisfy_count_gt(probes: &[SinkProbe], n: u32) -> bool { probes.iter().any(|p| match p.kind { ProbeKind::Ldap { entries_returned } => entries_returned > n, + ProbeKind::Xpath { nodes_returned } => nodes_returned > n, _ => false, }) } @@ -588,7 +595,7 @@ fn probe_satisfies_one(probe: &SinkProbe, pred: &ProbePredicate) -> bool { | ProbePredicate::DeserializeGadgetInvoked { .. } | ProbePredicate::TemplateEvalEqual { .. } | ProbePredicate::XxeEntityExpanded { .. } - | ProbePredicate::LdapResultCountGreaterThan { .. } => true, + | ProbePredicate::QueryResultCountGreaterThan { .. } => true, } } @@ -613,7 +620,8 @@ pub fn probe_crash_signal(probe: &SinkProbe) -> Option { ProbeKind::Normal | ProbeKind::Deserialize { .. } | ProbeKind::Xxe { .. } - | ProbeKind::Ldap { .. } => None, + | ProbeKind::Ldap { .. } + | ProbeKind::Xpath { .. } => None, } } diff --git a/src/dynamic/probe.rs b/src/dynamic/probe.rs index 9370801d..5d321abc 100644 --- a/src/dynamic/probe.rs +++ b/src/dynamic/probe.rs @@ -163,7 +163,7 @@ pub enum ProbeKind { /// [`ldap_server`](crate::dynamic::stubs::ldap_server) stub. The /// shim records the number of directory entries the stub returned /// for the supplied filter — the differential oracle's - /// [`crate::dynamic::oracle::ProbePredicate::LdapResultCountGreaterThan`] + /// [`crate::dynamic::oracle::ProbePredicate::QueryResultCountGreaterThan`] /// fires when `entries_returned > n`, catching a malicious filter /// (e.g. `*)(uid=*`) that matched more than the originally-intended /// user. Benign filter-quoted controls produce @@ -173,6 +173,23 @@ pub enum ProbeKind { /// for the payload's filter. entries_returned: u32, }, + /// Phase 07 (Track J.5) XPath-sink observation. Stamped by the + /// per-language XPath harness shim when the instrumented evaluator + /// (`javax.xml.xpath.XPath.evaluate`, `lxml.etree.xpath`, + /// `DOMXPath::query`, the npm `xpath` package's `select`) issues + /// an XPath expression against the canonical XML document staged + /// in the workdir (`xpath_corpus.xml`). The shim records the + /// number of nodes the evaluator returned — the differential + /// oracle's + /// [`crate::dynamic::oracle::ProbePredicate::QueryResultCountGreaterThan`] + /// fires when `nodes_returned > n`, catching a malicious + /// expression (e.g. `' or '1'='1`) that selected every node. + /// Benign quoted controls produce `nodes_returned == 1`. + Xpath { + /// Count of XML nodes the staged document returned for the + /// payload's XPath expression. + nodes_returned: u32, + }, } impl Default for ProbeKind { diff --git a/src/dynamic/stubs/ldap_server.rs b/src/dynamic/stubs/ldap_server.rs index 4ade8ebe..3c70103a 100644 --- a/src/dynamic/stubs/ldap_server.rs +++ b/src/dynamic/stubs/ldap_server.rs @@ -31,7 +31,7 @@ //! //! Every served search appends a [`StubEvent`] keyed on `summary = //! "SEARCH "` and `detail["entries_returned"]` so the oracle's -//! [`crate::dynamic::oracle::ProbePredicate::LdapResultCountGreaterThan`] +//! [`crate::dynamic::oracle::ProbePredicate::QueryResultCountGreaterThan`] //! can satisfy without depending on a `ProbeKind::Ldap` write — the //! probe path is the primary signal, the stub-event log is the //! belt-and-braces side channel. @@ -56,7 +56,7 @@ pub const LDAP_ENDPOINT_ENV_VAR: &str = "NYX_LDAP_ENDPOINT"; /// Three canonical users the stub provisions on start. Tests pin the /// count so a corpus change cannot silently shift the differential -/// threshold below `LdapResultCountGreaterThan { n: 1 }`. +/// threshold below `QueryResultCountGreaterThan { n: 1 }`. pub const STUB_USERS: &[&str] = &["alice", "bob", "carol"]; /// LDAP-cap stub. Endpoint is `127.0.0.1:{port}`. diff --git a/src/dynamic/stubs/mod.rs b/src/dynamic/stubs/mod.rs index d82f3c25..f0e4f41c 100644 --- a/src/dynamic/stubs/mod.rs +++ b/src/dynamic/stubs/mod.rs @@ -56,6 +56,7 @@ pub mod http; pub mod ldap_server; pub mod redis; pub mod sql; +pub mod xpath_document; pub use filesystem::FilesystemStub; pub use http::HttpStub; diff --git a/src/dynamic/stubs/xpath_document.rs b/src/dynamic/stubs/xpath_document.rs new file mode 100644 index 00000000..9669de00 --- /dev/null +++ b/src/dynamic/stubs/xpath_document.rs @@ -0,0 +1,79 @@ +//! Canonical XML document staged in the harness workdir for +//! `Cap::XPATH_INJECTION` runs (Phase 07 — Track J.5). +//! +//! The brief lists this file under `src/dynamic/sandbox/stubs/`; the +//! existing stub layer landed at `src/dynamic/stubs/` (matching the +//! SQL / HTTP / Redis / Filesystem / LDAP stubs already shipped under +//! [`crate::dynamic::stubs`]). The path discrepancy is tracked in +//! `.pitboss/play/deferred.md` alongside the Phase 06 LDAP-server +//! stub relocation note. If Track P later moves the stub layer +//! under `sandbox/`, this module moves with the rest of the pack. +//! +//! Unlike the LDAP server stub (a real loopback service) this XPath +//! stub is purely a staged file: the per-language harness emitter +//! adds the [`XPATH_CORPUS_FILENAME`] entry to its `HarnessSource. +//! extra_files` and the synthetic XPath evaluator inside the harness +//! reads the file at runtime to count matching nodes. No network +//! socket is bound; no [`super::StubKind`] variant is registered. +//! +//! # Document shape +//! +//! The staged XML carries three `` records (mirroring the +//! three LDAP server users) so the differential rule sees the same +//! 1-vs-3 split: the originally-intended username matches exactly +//! one node, the canonical `' or '1'='1` payload matches all three. + +/// Workdir-relative filename the per-language harnesses look up. +/// +/// Stable: a future change requires a coordinated update across every +/// XPath harness emitter (`src/dynamic/lang/{java,python,php,js_shared}.rs`). +pub const XPATH_CORPUS_FILENAME: &str = "xpath_corpus.xml"; + +/// Bytes of the canonical XML document staged in every XPath harness +/// workdir. Three records carry stable string attributes the +/// differential rule pins. +pub const XPATH_CORPUS_XML: &str = "\n\ +\n\ + \n\ + \n\ + \n\ +\n"; + +/// Number of `` nodes the staged document carries. Pinned so a +/// corpus change cannot silently shift the differential threshold +/// below `QueryResultCountGreaterThan { n: 1 }`. +pub const XPATH_CORPUS_NODE_COUNT: u32 = 3; + +/// `(filename, bytes)` pair the harness emitter folds into its +/// [`crate::dynamic::lang::HarnessSource::extra_files`]. +pub fn extra_file_pair() -> (String, String) { + (XPATH_CORPUS_FILENAME.to_owned(), XPATH_CORPUS_XML.to_owned()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn corpus_xml_carries_exactly_three_users() { + let n = XPATH_CORPUS_XML.matches(" node in the staged document. +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathFactory; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; + +public class Vuln { + public static Object run(String name) throws Exception { + Document doc = DocumentBuilderFactory.newInstance() + .newDocumentBuilder() + .parse("xpath_corpus.xml"); + XPath xp = XPathFactory.newInstance().newXPath(); + String expr = "//user[@name='" + name + "']"; + return xp.evaluate(expr, doc, XPathConstants.NODESET); + } +} diff --git a/tests/dynamic_fixtures/xpath_injection/js/benign.js b/tests/dynamic_fixtures/xpath_injection/js/benign.js new file mode 100644 index 00000000..65d80c81 --- /dev/null +++ b/tests/dynamic_fixtures/xpath_injection/js/benign.js @@ -0,0 +1,28 @@ +// Phase 07 (Track J.5) — JavaScript XPATH_INJECTION benign control fixture. +// +// Same shape as `vuln.js` but routes the attacker-controlled `name` +// through a small XPath-string-literal escape helper before splicing +// it into the expression, so the selector stays pinned to a single +// node. +const fs = require('fs'); +const xpath = require('xpath'); +const { DOMParser } = require('@xmldom/xmldom'); + +function escapeXpathString(s) { + if (s.indexOf("'") < 0) { + return "'" + s + "'"; + } + if (s.indexOf('"') < 0) { + return '"' + s + '"'; + } + return "concat('" + s.replace(/'/g, "',\"'\",'") + "')"; +} + +function run(name) { + const xml = fs.readFileSync('xpath_corpus.xml', 'utf8'); + const doc = new DOMParser().parseFromString(xml, 'text/xml'); + const expr = "//user[@name=" + escapeXpathString(name) + "]"; + return xpath.select(expr, doc); +} + +module.exports = { run }; diff --git a/tests/dynamic_fixtures/xpath_injection/js/vuln.js b/tests/dynamic_fixtures/xpath_injection/js/vuln.js new file mode 100644 index 00000000..8ba86a25 --- /dev/null +++ b/tests/dynamic_fixtures/xpath_injection/js/vuln.js @@ -0,0 +1,19 @@ +// Phase 07 (Track J.5) — JavaScript XPATH_INJECTION vuln fixture. +// +// The function string-concatenates the attacker-controlled `name` +// directly into an XPath expression evaluated by the npm `xpath` +// package's `select`. A payload like `alice' or '1'='1` rewraps the +// selector as `//user[@name='alice' or '1'='1']`, matching every +// node in the staged `xpath_corpus.xml`. +const fs = require('fs'); +const xpath = require('xpath'); +const { DOMParser } = require('@xmldom/xmldom'); + +function run(name) { + const xml = fs.readFileSync('xpath_corpus.xml', 'utf8'); + const doc = new DOMParser().parseFromString(xml, 'text/xml'); + const expr = "//user[@name='" + name + "']"; + return xpath.select(expr, doc); +} + +module.exports = { run }; diff --git a/tests/dynamic_fixtures/xpath_injection/php/benign.php b/tests/dynamic_fixtures/xpath_injection/php/benign.php new file mode 100644 index 00000000..a1ae38e7 --- /dev/null +++ b/tests/dynamic_fixtures/xpath_injection/php/benign.php @@ -0,0 +1,24 @@ +load('xpath_corpus.xml'); + $xp = new DOMXPath($doc); + $expr = "//user[@name=" . nyx_xpath_escape($name) . "]"; + return $xp->query($expr); +} diff --git a/tests/dynamic_fixtures/xpath_injection/php/vuln.php b/tests/dynamic_fixtures/xpath_injection/php/vuln.php new file mode 100644 index 00000000..51b0faa3 --- /dev/null +++ b/tests/dynamic_fixtures/xpath_injection/php/vuln.php @@ -0,0 +1,15 @@ + node in +// the staged `xpath_corpus.xml`. +function run($name) { + $doc = new DOMDocument(); + $doc->load('xpath_corpus.xml'); + $xp = new DOMXPath($doc); + $expr = "//user[@name='" . $name . "']"; + return $xp->query($expr); +} diff --git a/tests/dynamic_fixtures/xpath_injection/python/benign.py b/tests/dynamic_fixtures/xpath_injection/python/benign.py new file mode 100644 index 00000000..e8882fe1 --- /dev/null +++ b/tests/dynamic_fixtures/xpath_injection/python/benign.py @@ -0,0 +1,13 @@ +# Phase 07 (Track J.5) — Python XPATH_INJECTION benign control fixture. +# +# Same shape as `vuln.py` but parameterises the XPath via a variable +# binding (the recommended `lxml` defence), so the directory keeps +# returning at most one node. +from lxml import etree + + +def run(name): + with open("xpath_corpus.xml", "rb") as f: + tree = etree.fromstring(f.read()) + finder = etree.XPath("//user[@name=$name]") + return finder(tree, name=name) diff --git a/tests/dynamic_fixtures/xpath_injection/python/vuln.py b/tests/dynamic_fixtures/xpath_injection/python/vuln.py new file mode 100644 index 00000000..d6ac87b6 --- /dev/null +++ b/tests/dynamic_fixtures/xpath_injection/python/vuln.py @@ -0,0 +1,15 @@ +# Phase 07 (Track J.5) — Python XPATH_INJECTION vuln fixture. +# +# The function string-concatenates the attacker-controlled `name` +# directly into an XPath expression evaluated by `lxml.etree`'s +# `xpath` method. A payload like `alice' or '1'='1` rewraps the +# selector as `//user[@name='alice' or '1'='1']`, matching every +# node in the staged `xpath_corpus.xml`. +from lxml import etree + + +def run(name): + with open("xpath_corpus.xml", "rb") as f: + tree = etree.fromstring(f.read()) + expr = "//user[@name='" + name + "']" + return tree.xpath(expr) diff --git a/tests/ldap_corpus.rs b/tests/ldap_corpus.rs index 0dfd53a7..67fef970 100644 --- a/tests/ldap_corpus.rs +++ b/tests/ldap_corpus.rs @@ -105,9 +105,9 @@ fn payload_oracle_carries_ldap_result_count_predicate() { assert!( predicates.iter().any(|p| matches!( p, - ProbePredicate::LdapResultCountGreaterThan { n: 1 } + ProbePredicate::QueryResultCountGreaterThan { n: 1 } )), - "{lang:?} vuln payload missing LdapResultCountGreaterThan {{ n: 1 }}", + "{lang:?} vuln payload missing QueryResultCountGreaterThan {{ n: 1 }}", ); } other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), diff --git a/tests/xpath_corpus.rs b/tests/xpath_corpus.rs new file mode 100644 index 00000000..242647ec --- /dev/null +++ b/tests/xpath_corpus.rs @@ -0,0 +1,550 @@ +//! Phase 07 (Track J.5) — XPATH_INJECTION corpus acceptance. +//! +//! Asserts the new cap end-to-end: corpus slices register per-language +//! vuln/benign pairs for Java / Python / PHP / JavaScript, the +//! lang-aware resolver pairs them inside the correct slice, the +//! per-language harness emitters splice in the synthetic XPath +//! evaluator + nodes-returned probe + sink-hit sentinel, the +//! framework adapters fire on the canonical sink call, the renamed +//! `QueryResultCountGreaterThan` predicate evaluates both `Xpath` +//! and `Ldap` probe kinds, and the in-workdir `xpath_corpus.xml` +//! carries the three canonical `` records. +//! +//! `cargo nextest run --features dynamic --test xpath_corpus`. + +#![cfg(feature = "dynamic")] + +mod common; + +use nyx_scanner::dynamic::corpus::{ + audit_marker_collisions, benign_payload_for_lang, payloads_for_lang, + resolve_benign_control_lang, Oracle, +}; +use nyx_scanner::dynamic::framework::registry::adapters_for; +use nyx_scanner::dynamic::lang; +use nyx_scanner::dynamic::oracle::{ + oracle_fired, ProbePredicate, SignalSet, +}; +use nyx_scanner::dynamic::probe::{ProbeKind, ProbeWitness, SinkProbe}; +use nyx_scanner::dynamic::sandbox::SandboxOutcome; +use nyx_scanner::dynamic::spec::{EntryKind, HarnessSpec, PayloadSlot}; +use nyx_scanner::dynamic::stubs::xpath_document::{ + XPATH_CORPUS_FILENAME, XPATH_CORPUS_NODE_COUNT, XPATH_CORPUS_XML, +}; +use nyx_scanner::labels::Cap; +use nyx_scanner::summary::FuncSummary; +use nyx_scanner::symbol::Lang; +use std::time::Duration; + +const LANGS: &[Lang] = &[Lang::Java, Lang::Python, Lang::Php, Lang::JavaScript]; + +fn make_spec(lang: Lang, entry_file: &str, entry_name: &str) -> HarnessSpec { + HarnessSpec { + finding_id: "phase07test0001".into(), + entry_file: entry_file.into(), + entry_name: entry_name.into(), + entry_kind: EntryKind::Function, + lang, + toolchain_id: "phase07".into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::XPATH_INJECTION, + constraint_hints: vec![], + sink_file: entry_file.into(), + sink_line: 1, + spec_hash: "phase07test0001".into(), + derivation: nyx_scanner::dynamic::spec::SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + } +} + +#[test] +fn corpus_registers_xpath_for_every_supported_lang() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::XPATH_INJECTION, *lang); + assert!(!slice.is_empty(), "XPATH_INJECTION has no payloads for {lang:?}"); + let has_vuln = slice.iter().any(|p| !p.is_benign); + let has_benign = slice.iter().any(|p| p.is_benign); + assert!(has_vuln, "{lang:?} XPath missing vuln payload"); + assert!(has_benign, "{lang:?} XPath missing benign control"); + } +} + +#[test] +fn xpath_unsupported_caps_unchanged_for_other_langs() { + for lang in [ + Lang::Rust, + Lang::C, + Lang::Cpp, + Lang::Ruby, + Lang::Go, + Lang::TypeScript, + ] { + assert!( + payloads_for_lang(Cap::XPATH_INJECTION, lang).is_empty(), + "unexpected XPATH_INJECTION payloads for {lang:?}", + ); + } +} + +#[test] +fn benign_control_resolves_within_lang_slice() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::XPATH_INJECTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let resolved = + resolve_benign_control_lang(vuln, Cap::XPATH_INJECTION, *lang).expect("paired control"); + assert!(resolved.is_benign); + let direct = benign_payload_for_lang(Cap::XPATH_INJECTION, *lang).unwrap(); + assert_eq!(direct.label, resolved.label); + } +} + +#[test] +fn payload_oracle_carries_query_result_count_predicate() { + for lang in LANGS { + let slice = payloads_for_lang(Cap::XPATH_INJECTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + match &vuln.oracle { + Oracle::SinkProbe { predicates } => { + assert!( + predicates.iter().any(|p| matches!( + p, + ProbePredicate::QueryResultCountGreaterThan { n: 1 } + )), + "{lang:?} vuln payload missing QueryResultCountGreaterThan {{ n: 1 }}", + ); + } + other => panic!("expected SinkProbe oracle for {lang:?}, got {other:?}"), + } + } +} + +#[test] +fn vuln_payload_bytes_contain_quote_or_breakout() { + // Vuln payload carries `' or '1'='1` quote-breakout; benign + // control carries no XPath metacharacters. + for lang in LANGS { + let slice = payloads_for_lang(Cap::XPATH_INJECTION, *lang); + let vuln = slice.iter().find(|p| !p.is_benign).unwrap(); + let benign = slice.iter().find(|p| p.is_benign).unwrap(); + let vuln_text = std::str::from_utf8(vuln.bytes).unwrap(); + let benign_text = std::str::from_utf8(benign.bytes).unwrap(); + assert!( + vuln_text.contains("' or '") && vuln_text.contains("'='"), + "{lang:?} vuln payload must carry a `' or '` tautology breakout", + ); + assert!( + !benign_text.contains("'") && !benign_text.contains("="), + "{lang:?} benign control must not carry XPath metacharacters", + ); + } +} + +#[test] +fn marker_collisions_clean_with_phase_07_additions() { + assert!(audit_marker_collisions().is_empty()); +} + +#[test] +fn probe_kind_xpath_serdes() { + let original = ProbeKind::Xpath { nodes_returned: 3 }; + let json = serde_json::to_string(&original).unwrap(); + assert!(json.contains("Xpath")); + assert!(json.contains("nodes_returned")); + let parsed: ProbeKind = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed, original); +} + +#[test] +fn query_result_count_predicate_fires_on_xpath_probe() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }; + let probes = vec![SinkProbe { + sink_callee: "xpath.select".into(), + args: vec![], + captured_at_ns: 1, + payload_id: "phase07".into(), + kind: ProbeKind::Xpath { nodes_returned: 3 }, + witness: ProbeWitness::empty(), + }]; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(oracle_fired(&oracle, &outcome, &probes)); +} + +#[test] +fn query_result_count_predicate_clear_when_count_is_one() { + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }; + let probes = vec![SinkProbe { + sink_callee: "xpath.select".into(), + args: vec![], + captured_at_ns: 1, + payload_id: "phase07".into(), + kind: ProbeKind::Xpath { nodes_returned: 1 }, + witness: ProbeWitness::empty(), + }]; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(!oracle_fired(&oracle, &outcome, &probes)); +} + +#[test] +fn query_result_count_predicate_also_matches_ldap_probe() { + // Phase 06 → Phase 07 rename: the renamed predicate must still + // satisfy LDAP probes (`ProbeKind::Ldap.entries_returned > n`). + let oracle = Oracle::SinkProbe { + predicates: &[ProbePredicate::QueryResultCountGreaterThan { n: 1 }], + }; + let probes = vec![SinkProbe { + sink_callee: "ldap.search_s".into(), + args: vec![], + captured_at_ns: 1, + payload_id: "phase07".into(), + kind: ProbeKind::Ldap { entries_returned: 3 }, + witness: ProbeWitness::empty(), + }]; + let outcome = SandboxOutcome { + exit_code: Some(0), + stdout: vec![], + stderr: vec![], + timed_out: false, + oob_callback_seen: false, + sink_hit: true, + duration: Duration::from_millis(1), + hardening_outcome: None, + }; + assert!(oracle_fired(&oracle, &outcome, &probes)); + let _ = SignalSet::empty(); +} + +#[test] +fn lang_emitter_dispatches_to_xpath_harness() { + // Per-lang `sink_callee_marker` pins which evaluator-construction + // string the harness names in its probe record. + for (lang, entry_file, entry_name, sink_callee_marker) in [ + ( + Lang::Java, + "tests/dynamic_fixtures/xpath_injection/java/Vuln.java", + "run", + "javax.xml.xpath.XPath.evaluate", + ), + ( + Lang::Python, + "tests/dynamic_fixtures/xpath_injection/python/vuln.py", + "run", + "lxml.etree.xpath", + ), + ( + Lang::Php, + "tests/dynamic_fixtures/xpath_injection/php/vuln.php", + "run", + "DOMXPath::query", + ), + ( + Lang::JavaScript, + "tests/dynamic_fixtures/xpath_injection/js/vuln.js", + "run", + "xpath.select", + ), + ] { + let spec = make_spec(lang, entry_file, entry_name); + let harness = lang::emit(&spec) + .unwrap_or_else(|e| panic!("emit failed for {lang:?}: {e:?}")); + assert!( + harness.source.contains("nodes_returned"), + "{lang:?} xpath harness must carry the nodes_returned probe field", + ); + assert!( + harness.source.contains(sink_callee_marker), + "{lang:?} xpath harness must name {sink_callee_marker:?} as the sink callee", + ); + assert!( + harness.source.contains("__NYX_SINK_HIT__"), + "{lang:?} xpath harness must emit the sink-hit sentinel", + ); + assert!( + harness.source.contains("//user[@name="), + "{lang:?} xpath harness must build a `//user[@name=…]` selector from NYX_PAYLOAD", + ); + assert!( + harness + .extra_files + .iter() + .any(|(p, c)| p == XPATH_CORPUS_FILENAME && c == XPATH_CORPUS_XML), + "{lang:?} xpath harness must stage the canonical xpath_corpus.xml", + ); + } +} + +#[test] +fn framework_adapters_detect_xpath_sink() { + // Each lang registers its J.5 XPath-evaluator adapter; detect_binding + // routes through the registry and stamps an EntryKind::Function + // binding when the fixture contains the canonical sink call. + for (lang, fixture, sink_callee) in [ + ( + Lang::Java, + "tests/dynamic_fixtures/xpath_injection/java/Vuln.java", + "evaluate", + ), + ( + Lang::Python, + "tests/dynamic_fixtures/xpath_injection/python/vuln.py", + "xpath", + ), + ( + Lang::Php, + "tests/dynamic_fixtures/xpath_injection/php/vuln.php", + "query", + ), + ( + Lang::JavaScript, + "tests/dynamic_fixtures/xpath_injection/js/vuln.js", + "select", + ), + ] { + let bytes = std::fs::read(fixture).expect("fixture exists"); + let ts_lang = ts_language_for(lang); + let mut parser = tree_sitter::Parser::new(); + parser.set_language(&ts_lang).unwrap(); + let tree = parser.parse(&bytes, None).unwrap(); + let mut summary = FuncSummary { + name: "run".into(), + file_path: fixture.to_owned(), + lang: slug(lang).into(), + ..Default::default() + }; + summary + .callees + .push(nyx_scanner::summary::CalleeSite::bare(sink_callee)); + let registry_slice = adapters_for(lang); + assert!(!registry_slice.is_empty(), "{lang:?} adapter slice empty"); + let binding = nyx_scanner::dynamic::framework::detect_binding( + &summary, + tree.root_node(), + &bytes, + lang, + ); + let b = binding + .unwrap_or_else(|| panic!("{lang:?} adapter must detect the XPath fixture")); + assert_eq!(b.kind, EntryKind::Function); + assert!(!b.adapter.is_empty()); + } +} + +fn ts_language_for(lang: Lang) -> tree_sitter::Language { + match lang { + Lang::Java => tree_sitter::Language::from(tree_sitter_java::LANGUAGE), + Lang::Python => tree_sitter::Language::from(tree_sitter_python::LANGUAGE), + Lang::Php => tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP), + Lang::JavaScript => tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE), + other => panic!("unsupported test lang {other:?}"), + } +} + +fn slug(lang: Lang) -> &'static str { + match lang { + Lang::Java => "java", + Lang::Python => "python", + Lang::Php => "php", + Lang::JavaScript => "javascript", + _ => "other", + } +} + +#[test] +fn staged_corpus_carries_three_users() { + assert_eq!(XPATH_CORPUS_NODE_COUNT, 3); + for needle in ["alice", "bob", "carol"] { + assert!( + XPATH_CORPUS_XML.contains(needle), + "staged xpath_corpus.xml must include canonical user {needle}", + ); + } +} + +// ── End-to-end Phase 07 acceptance via run_spec ─────────────────────────────── +// +// Mirrors the `e2e_phase_06` block in `ldap_corpus.rs`. Drives +// `run_spec` directly on a `Cap::XPATH_INJECTION` spec per language +// and asserts the polarity via the `ProbeKind::Xpath { nodes_returned > 1 }` +// probe and the `__NYX_SINK_HIT__` sentinel. The synthetic harness +// inlines the XPath evaluator over the staged document, so the +// verdict path is deterministic without spawning a real XPath +// engine (`stubs_required: vec![]`). +// +// JavaScript is skipped: the synthetic harness's `require('xpath')` +// import resolves only when the workdir has the package installed. + +mod e2e_phase_07 { + use crate::common::fixture_harness::FIXTURE_LOCK; + use nyx_scanner::dynamic::runner::{run_spec, RunError, RunOutcome}; + use nyx_scanner::dynamic::sandbox::{SandboxBackend, SandboxOptions}; + use nyx_scanner::dynamic::spec::{ + default_toolchain_id, EntryKind, HarnessSpec, PayloadSlot, SpecDerivationStrategy, + }; + use nyx_scanner::evidence::DifferentialVerdict; + use nyx_scanner::labels::Cap; + use nyx_scanner::symbol::Lang; + use std::path::PathBuf; + use std::process::Command; + use tempfile::TempDir; + + fn command_available(bin: &str) -> bool { + Command::new(bin) + .arg("--version") + .output() + .map(|o| o.status.success()) + .unwrap_or(false) + } + + fn toolchain_for(lang: Lang) -> &'static str { + match lang { + Lang::Java => "java", + Lang::Python => "python3", + Lang::Php => "php", + _ => unreachable!("e2e_phase_07 covers Java/Python/PHP"), + } + } + + fn lang_subdir(lang: Lang) -> &'static str { + match lang { + Lang::Java => "java", + Lang::Python => "python", + Lang::Php => "php", + _ => unreachable!(), + } + } + + fn build_spec(lang: Lang, fixture: &str, entry_name: &str) -> (HarnessSpec, TempDir) { + let fixture_src = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests/dynamic_fixtures/xpath_injection") + .join(lang_subdir(lang)) + .join(fixture); + let tmp = TempDir::new().expect("create tempdir"); + let dst = tmp.path().join(fixture); + std::fs::copy(&fixture_src, &dst).expect("copy fixture into tempdir"); + + let entry_file = dst.to_string_lossy().into_owned(); + let mut digest = blake3::Hasher::new(); + digest.update(b"phase07-e2e-xpath|"); + digest.update(lang_subdir(lang).as_bytes()); + digest.update(b"|"); + digest.update(fixture.as_bytes()); + let spec_hash = format!("{:016x}", { + let bytes = digest.finalize(); + u64::from_le_bytes(bytes.as_bytes()[..8].try_into().unwrap()) + }); + + if matches!(lang, Lang::Java) { + let workdir = std::path::PathBuf::from("/tmp/nyx-harness").join(&spec_hash); + let _ = std::fs::remove_dir_all(&workdir); + } + + let spec = HarnessSpec { + finding_id: spec_hash.clone(), + entry_file: entry_file.clone(), + entry_name: entry_name.to_owned(), + entry_kind: EntryKind::Function, + lang, + toolchain_id: default_toolchain_id(lang).into(), + payload_slot: PayloadSlot::Param(0), + expected_cap: Cap::XPATH_INJECTION, + constraint_hints: vec![], + sink_file: entry_file, + sink_line: 1, + spec_hash: spec_hash.clone(), + derivation: SpecDerivationStrategy::FromFlowSteps, + stubs_required: vec![], + framework: None, + }; + + (spec, tmp) + } + + fn run(lang: Lang, fixture: &str, entry_name: &str) -> Option { + let bin = toolchain_for(lang); + if !command_available(bin) { + eprintln!("SKIP {lang:?} {fixture}: missing toolchain {bin}"); + return None; + } + let _guard = FIXTURE_LOCK.lock().unwrap_or_else(|e| e.into_inner()); + let (spec, _tmp) = build_spec(lang, fixture, entry_name); + let opts = SandboxOptions { + backend: SandboxBackend::Process, + ..SandboxOptions::default() + }; + match run_spec(&spec, &opts) { + Ok(outcome) => Some(outcome), + Err(RunError::BuildFailed { stderr, attempts }) => { + eprintln!( + "SKIP {lang:?} {fixture}: harness build failed after {attempts} attempts: {stderr}", + ); + None + } + Err(e) => panic!("run_spec({lang:?} {fixture}) errored: {e:?}"), + } + } + + #[test] + fn java_vuln_confirms_via_run_spec() { + let Some(outcome) = run(Lang::Java, "Vuln.java", "run") else { return }; + assert!( + outcome.triggered_by.is_some(), + "Java XPath vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + } + + #[test] + fn python_vuln_confirms_via_run_spec() { + let Some(outcome) = run(Lang::Python, "vuln.py", "run") else { return }; + assert!( + outcome.triggered_by.is_some(), + "Python XPath vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + } + + #[test] + fn php_vuln_confirms_via_run_spec() { + let Some(outcome) = run(Lang::Php, "vuln.php", "run") else { return }; + assert!( + outcome.triggered_by.is_some(), + "PHP XPath vuln must Confirm via run_spec; got {outcome:?}", + ); + let diff = outcome + .differential + .as_ref() + .expect("Confirmed run must carry a DifferentialOutcome"); + assert_eq!(diff.verdict, DifferentialVerdict::Confirmed); + } +}