[pitboss] phase 07: Track J.5 + Track L.5 — XPATH_INJECTION corpus + XPath / DOM / lxml adapters

This commit is contained in:
pitboss 2026-05-17 23:47:12 -05:00
parent b2eeaabb09
commit a32075a756
38 changed files with 2111 additions and 67 deletions

View file

@ -23,6 +23,10 @@ pub mod python_jinja2;
pub mod python_pickle;
pub mod ruby_erb;
pub mod ruby_marshal;
pub mod xpath_java;
pub mod xpath_js;
pub mod xpath_php;
pub mod xpath_python;
pub mod xxe_go;
pub mod xxe_java;
pub mod xxe_php;
@ -41,6 +45,10 @@ pub use python_jinja2::PythonJinja2Adapter;
pub use python_pickle::PythonPickleAdapter;
pub use ruby_erb::RubyErbAdapter;
pub use ruby_marshal::RubyMarshalAdapter;
pub use xpath_java::XpathJavaAdapter;
pub use xpath_js::XpathJsAdapter;
pub use xpath_php::XpathPhpAdapter;
pub use xpath_python::XpathPythonAdapter;
pub use xxe_go::XxeGoAdapter;
pub use xxe_java::XxeJavaAdapter;
pub use xxe_php::XxePhpAdapter;

View file

@ -0,0 +1,127 @@
//! Java [`super::super::FrameworkAdapter`] matching XPath expression-
//! injection sink constructions.
//!
//! Phase 07 (Track J.5). Fires when the function body invokes one of
//! the canonical `javax.xml.xpath` entry points
//! (`XPath.evaluate`, `XPath.compile`, `XPathExpression.evaluate`)
//! and the surrounding source pulls in one of the matching package
//! symbols — `javax.xml.xpath.*`, `XPathFactory`,
//! `XPathConstants.NODESET`.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct XpathJavaAdapter;
const ADAPTER_NAME: &str = "xpath-java";
fn callee_is_xpath_eval(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "evaluate" | "compile" | "selectNodes" | "selectSingleNode")
}
fn source_imports_xpath(file_bytes: &[u8]) -> bool {
const NEEDLES: &[&[u8]] = &[
b"javax.xml.xpath",
b"XPathFactory",
b"XPathExpression",
b"XPathConstants",
b"net.sf.saxon.s9api",
];
NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XpathJavaAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Java
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval);
let matches_source = source_imports_xpath(file_bytes);
if matches_call && matches_source {
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
if matches_source
&& file_bytes
.windows(b".evaluate(".len())
.any(|w| w == b".evaluate(")
{
return Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
});
}
None
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_xpath_evaluate() {
let src: &[u8] = b"import javax.xml.xpath.XPathFactory;\n\
public class V {\n public Object run(String name) throws Exception {\n\
javax.xml.xpath.XPath xp = XPathFactory.newInstance().newXPath();\n\
return xp.evaluate(\"//user[@name='\" + name + \"']\", null);\n\
}\n}\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("evaluate")],
..Default::default()
};
let binding = XpathJavaAdapter
.detect(&summary, tree.root_node(), src)
.expect("must fire on XPath.evaluate");
assert_eq!(binding.adapter, ADAPTER_NAME);
assert_eq!(binding.kind, EntryKind::Function);
}
#[test]
fn skips_plain_function() {
let src: &[u8] =
b"public class V { public static int add(int a, int b) { return a + b; } }\n";
let tree = parse_java(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(XpathJavaAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,112 @@
//! JavaScript [`super::super::FrameworkAdapter`] matching XPath
//! expression-injection sink constructions.
//!
//! Phase 07 (Track J.5). Fires when the function body invokes the
//! npm `xpath` package's `select` / `evaluate` entry points (or the
//! browser DOM's `document.evaluate`) and the surrounding source
//! imports / requires the `xpath` module or references
//! `XPathResult` / `document.evaluate`.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct XpathJsAdapter;
const ADAPTER_NAME: &str = "xpath-js";
fn callee_is_xpath_eval(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "select" | "select1" | "evaluate" | "parse")
}
fn source_imports_xpath(file_bytes: &[u8]) -> bool {
const NEEDLES: &[&[u8]] = &[
b"require('xpath')",
b"require(\"xpath\")",
b"from 'xpath'",
b"from \"xpath\"",
b"xpath.select",
b"xpath.evaluate",
b"XPathResult",
b"document.evaluate",
];
NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XpathJsAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::JavaScript
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval);
let matches_source = source_imports_xpath(file_bytes);
if matches_call && matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_js(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_xpath_select() {
let src: &[u8] = b"const xpath = require('xpath');\n\
function run(name) {\n\
return xpath.select(\"//user[@name='\" + name + \"']\", doc);\n\
}\nmodule.exports = { run };\n";
let tree = parse_js(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("select")],
..Default::default()
};
assert!(XpathJsAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"function add(a, b) { return a + b; }\nmodule.exports = { add };\n";
let tree = parse_js(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(XpathJsAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,111 @@
//! PHP [`super::super::FrameworkAdapter`] matching XPath expression-
//! injection sink constructions.
//!
//! Phase 07 (Track J.5). Fires when the function body invokes
//! `DOMXPath::query` / `DOMXPath::evaluate` and the surrounding
//! source pulls in the `DOMXPath` / `DOMDocument` family.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct XpathPhpAdapter;
const ADAPTER_NAME: &str = "xpath-php";
fn callee_is_xpath_eval(name: &str) -> bool {
let last = name.rsplit_once("::").map(|(_, s)| s).unwrap_or(name);
let last = last.rsplit_once('.').map(|(_, s)| s).unwrap_or(last);
matches!(last, "query" | "evaluate" | "xpath")
}
fn source_uses_domxpath(file_bytes: &[u8]) -> bool {
const NEEDLES: &[&[u8]] = &[
b"DOMXPath",
b"DOMDocument",
b"SimpleXMLElement",
b"simplexml_load_string",
b"->xpath(",
];
NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XpathPhpAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Php
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval);
let matches_source = source_uses_domxpath(file_bytes);
if matches_call && matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_php(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_domxpath_query() {
let src: &[u8] = b"<?php\n\
function run($name) {\n\
$doc = new DOMDocument();\n\
$doc->load('xpath_corpus.xml');\n\
$xp = new DOMXPath($doc);\n\
return $xp->query(\"//user[@name='\" . $name . \"']\");\n\
}\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("query")],
..Default::default()
};
assert!(XpathPhpAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"<?php\nfunction add($a, $b) { return $a + $b; }\n";
let tree = parse_php(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(XpathPhpAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}

View file

@ -0,0 +1,109 @@
//! Python [`super::super::FrameworkAdapter`] matching XPath expression-
//! injection sink constructions.
//!
//! Phase 07 (Track J.5). Fires when the function body invokes
//! `lxml.etree`'s XPath entry points (`Element.xpath`, `xpath`,
//! `XPath` evaluator) and the surrounding source imports `lxml`.
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
use crate::evidence::EntryKind;
use crate::summary::FuncSummary;
use crate::symbol::Lang;
pub struct XpathPythonAdapter;
const ADAPTER_NAME: &str = "xpath-python";
fn callee_is_xpath_eval(name: &str) -> bool {
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
matches!(last, "xpath" | "evaluate" | "find" | "findall" | "iterfind")
}
fn source_imports_lxml(file_bytes: &[u8]) -> bool {
const NEEDLES: &[&[u8]] = &[
b"from lxml",
b"import lxml",
b"lxml.etree",
b"etree.XPath",
b"etree.ElementTree",
b"xml.etree.ElementTree",
b"ElementTree.fromstring",
];
NEEDLES
.iter()
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
}
impl FrameworkAdapter for XpathPythonAdapter {
fn name(&self) -> &'static str {
ADAPTER_NAME
}
fn lang(&self) -> Lang {
Lang::Python
}
fn detect(
&self,
summary: &FuncSummary,
_ast: tree_sitter::Node<'_>,
file_bytes: &[u8],
) -> Option<FrameworkBinding> {
let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval);
let matches_source = source_imports_lxml(file_bytes);
if matches_call && matches_source {
Some(FrameworkBinding {
adapter: ADAPTER_NAME.to_owned(),
kind: EntryKind::Function,
route: None,
request_params: Vec::new(),
response_writer: None,
middleware: Vec::new(),
})
} else {
None
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
let mut parser = tree_sitter::Parser::new();
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
parser.set_language(&lang).unwrap();
parser.parse(src, None).unwrap()
}
#[test]
fn fires_on_lxml_xpath() {
let src: &[u8] = b"from lxml import etree\n\
def run(name):\n\
tree = etree.fromstring(open('xpath_corpus.xml').read())\n\
return tree.xpath(\"//user[@name='\" + name + \"']\")\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "run".into(),
callees: vec![crate::summary::CalleeSite::bare("xpath")],
..Default::default()
};
assert!(XpathPythonAdapter
.detect(&summary, tree.root_node(), src)
.is_some());
}
#[test]
fn skips_plain_function() {
let src: &[u8] = b"def add(a, b):\n return a + b\n";
let tree = parse_python(src);
let summary = FuncSummary {
name: "add".into(),
..Default::default()
};
assert!(XpathPythonAdapter
.detect(&summary, tree.root_node(), src)
.is_none());
}
}