2026-05-17 23:47:12 -05:00
|
|
|
//! Python [`super::super::FrameworkAdapter`] matching XPath expression-
|
|
|
|
|
//! injection sink constructions.
|
|
|
|
|
//!
|
|
|
|
|
//! Phase 07 (Track J.5). Fires when the function body invokes
|
|
|
|
|
//! `lxml.etree`'s XPath entry points (`Element.xpath`, `xpath`,
|
|
|
|
|
//! `XPath` evaluator) and the surrounding source imports `lxml`.
|
2026-05-20 23:44:34 -05:00
|
|
|
//!
|
|
|
|
|
//! Strengthened to walk the AST and only fire when the evaluator's
|
|
|
|
|
//! expression argument carries a tainted-param identifier in its
|
|
|
|
|
//! subtree. Pre-bound parameterised queries
|
|
|
|
|
//! (`etree.XPath("//user[@name=$name]")(tree, name=name)`) keep the
|
|
|
|
|
//! template string literal-only, so the walker sees no tainted
|
|
|
|
|
//! identifier inside the call to `XPath` / `xpath` and the binding
|
|
|
|
|
//! is skipped.
|
2026-05-17 23:47:12 -05:00
|
|
|
|
|
|
|
|
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
|
|
|
|
use crate::evidence::EntryKind;
|
|
|
|
|
use crate::summary::FuncSummary;
|
|
|
|
|
use crate::symbol::Lang;
|
2026-05-20 23:44:34 -05:00
|
|
|
use tree_sitter::Node;
|
2026-05-17 23:47:12 -05:00
|
|
|
|
|
|
|
|
pub struct XpathPythonAdapter;
|
|
|
|
|
|
|
|
|
|
const ADAPTER_NAME: &str = "xpath-python";
|
|
|
|
|
|
|
|
|
|
fn callee_is_xpath_eval(name: &str) -> bool {
|
|
|
|
|
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
2026-05-20 23:44:34 -05:00
|
|
|
matches!(last, "xpath" | "evaluate" | "find" | "findall" | "iterfind" | "XPath")
|
2026-05-17 23:47:12 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn source_imports_lxml(file_bytes: &[u8]) -> bool {
|
|
|
|
|
const NEEDLES: &[&[u8]] = &[
|
|
|
|
|
b"from lxml",
|
|
|
|
|
b"import lxml",
|
|
|
|
|
b"lxml.etree",
|
|
|
|
|
b"etree.XPath",
|
|
|
|
|
b"etree.ElementTree",
|
|
|
|
|
b"xml.etree.ElementTree",
|
|
|
|
|
b"ElementTree.fromstring",
|
|
|
|
|
];
|
|
|
|
|
NEEDLES
|
|
|
|
|
.iter()
|
|
|
|
|
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-20 23:44:34 -05:00
|
|
|
fn ast_confirms_tainted_xpath(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool {
|
|
|
|
|
let mut found = false;
|
|
|
|
|
walk(root, bytes, summary, root, &mut found);
|
|
|
|
|
found
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn walk<'a>(
|
|
|
|
|
node: Node<'a>,
|
|
|
|
|
bytes: &[u8],
|
|
|
|
|
summary: &FuncSummary,
|
|
|
|
|
scope: Node<'a>,
|
|
|
|
|
found: &mut bool,
|
|
|
|
|
) {
|
|
|
|
|
if *found {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
if node.kind() == "call"
|
|
|
|
|
&& let Some(func) = node
|
|
|
|
|
.child_by_field_name("function")
|
|
|
|
|
.and_then(|n| n.utf8_text(bytes).ok())
|
|
|
|
|
&& callee_is_xpath_eval(func)
|
|
|
|
|
&& let Some(args) = node.child_by_field_name("arguments")
|
|
|
|
|
&& super::subtree_contains_tainted_param(args, bytes, summary, Some(scope))
|
|
|
|
|
{
|
|
|
|
|
*found = true;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
let mut cur = node.walk();
|
|
|
|
|
for child in node.children(&mut cur) {
|
|
|
|
|
walk(child, bytes, summary, scope, found);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-17 23:47:12 -05:00
|
|
|
impl FrameworkAdapter for XpathPythonAdapter {
|
|
|
|
|
fn name(&self) -> &'static str {
|
|
|
|
|
ADAPTER_NAME
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn lang(&self) -> Lang {
|
|
|
|
|
Lang::Python
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn detect(
|
|
|
|
|
&self,
|
|
|
|
|
summary: &FuncSummary,
|
2026-05-20 23:44:34 -05:00
|
|
|
ast: tree_sitter::Node<'_>,
|
2026-05-17 23:47:12 -05:00
|
|
|
file_bytes: &[u8],
|
|
|
|
|
) -> Option<FrameworkBinding> {
|
2026-05-20 23:44:34 -05:00
|
|
|
if !source_imports_lxml(file_bytes) {
|
|
|
|
|
return None;
|
2026-05-17 23:47:12 -05:00
|
|
|
}
|
2026-05-20 23:44:34 -05:00
|
|
|
if !super::any_callee_matches(summary, callee_is_xpath_eval) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
if !ast_confirms_tainted_xpath(ast, file_bytes, summary) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
Some(FrameworkBinding {
|
|
|
|
|
adapter: ADAPTER_NAME.to_owned(),
|
|
|
|
|
kind: EntryKind::Function,
|
|
|
|
|
route: None,
|
|
|
|
|
request_params: Vec::new(),
|
|
|
|
|
response_writer: None,
|
|
|
|
|
middleware: Vec::new(),
|
|
|
|
|
})
|
2026-05-17 23:47:12 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
|
|
|
|
|
let mut parser = tree_sitter::Parser::new();
|
|
|
|
|
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
|
|
|
|
|
parser.set_language(&lang).unwrap();
|
|
|
|
|
parser.parse(src, None).unwrap()
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-20 23:44:34 -05:00
|
|
|
fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary {
|
|
|
|
|
FuncSummary {
|
|
|
|
|
name: name.into(),
|
|
|
|
|
param_count: params.len(),
|
|
|
|
|
param_names: params.iter().map(|s| (*s).to_owned()).collect(),
|
|
|
|
|
tainted_sink_params: tainted.to_vec(),
|
|
|
|
|
callees: vec![crate::summary::CalleeSite::bare("xpath")],
|
|
|
|
|
..Default::default()
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-17 23:47:12 -05:00
|
|
|
#[test]
|
|
|
|
|
fn fires_on_lxml_xpath() {
|
|
|
|
|
let src: &[u8] = b"from lxml import etree\n\
|
|
|
|
|
def run(name):\n\
|
|
|
|
|
tree = etree.fromstring(open('xpath_corpus.xml').read())\n\
|
|
|
|
|
return tree.xpath(\"//user[@name='\" + name + \"']\")\n";
|
|
|
|
|
let tree = parse_python(src);
|
2026-05-20 23:44:34 -05:00
|
|
|
let summary = summary_for("run", &["name"], &[0]);
|
2026-05-17 23:47:12 -05:00
|
|
|
assert!(XpathPythonAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.is_some());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn skips_plain_function() {
|
|
|
|
|
let src: &[u8] = b"def add(a, b):\n return a + b\n";
|
|
|
|
|
let tree = parse_python(src);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "add".into(),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
assert!(XpathPythonAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.is_none());
|
|
|
|
|
}
|
2026-05-20 23:44:34 -05:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn skips_when_expression_uses_bound_variable() {
|
|
|
|
|
let src: &[u8] = b"from lxml import etree\n\
|
|
|
|
|
def run(name):\n\
|
|
|
|
|
tree = etree.fromstring(open('xpath_corpus.xml').read())\n\
|
|
|
|
|
q = etree.XPath(\"//user[@name=$name]\")\n\
|
|
|
|
|
return q(tree, name=name)\n";
|
|
|
|
|
let tree = parse_python(src);
|
|
|
|
|
let summary = summary_for("run", &["name"], &[0]);
|
|
|
|
|
assert!(XpathPythonAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.is_none());
|
|
|
|
|
}
|
2026-05-17 23:47:12 -05:00
|
|
|
}
|