mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
181 lines
5.5 KiB
Rust
181 lines
5.5 KiB
Rust
//! Python [`super::super::FrameworkAdapter`] matching XXE-prone XML
|
|
//! parser constructions.
|
|
//!
|
|
//! Phase 05 (Track J.3). Fires when the function body invokes one of
|
|
//! the canonical lxml / stdlib XML entry points
|
|
//! (`lxml.etree.XMLParser`, `lxml.etree.parse`, `lxml.etree.fromstring`,
|
|
//! `xml.etree.ElementTree.parse`, `xml.sax.parse`,
|
|
//! `xml.dom.minidom.parseString`) and the surrounding source mentions
|
|
//! the matching module. Callee matching is last-segment-aware so
|
|
//! receiver-prefixed calls (`etree.XMLParser`,
|
|
//! `ElementTree.fromstring`) hit the same predicate.
|
|
|
|
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
|
use crate::evidence::EntryKind;
|
|
use crate::summary::FuncSummary;
|
|
use crate::symbol::Lang;
|
|
|
|
pub struct XxePythonAdapter;
|
|
|
|
const ADAPTER_NAME: &str = "xxe-python";
|
|
|
|
fn callee_is_xml_parser(name: &str) -> bool {
|
|
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
|
matches!(
|
|
last,
|
|
"XMLParser" | "parse" | "fromstring" | "parseString" | "XMLPullParser" | "iterparse"
|
|
)
|
|
}
|
|
|
|
fn source_imports_xml(file_bytes: &[u8]) -> bool {
|
|
const NEEDLES: &[&[u8]] = &[
|
|
b"lxml.etree",
|
|
b"lxml import",
|
|
b"xml.etree",
|
|
b"ElementTree",
|
|
b"xml.sax",
|
|
b"xml.dom",
|
|
b"defusedxml",
|
|
];
|
|
NEEDLES
|
|
.iter()
|
|
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
|
}
|
|
|
|
/// Returns `true` when the surrounding source visibly hardens the
|
|
/// XML parser against external-entity expansion. Conservative: only
|
|
/// recognises canonical lxml `resolve_entities=False` /
|
|
/// `no_network=True` parser flags and the `defusedxml` package
|
|
/// (whose parsers are safe-by-default).
|
|
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
|
|
const HARDENING_NEEDLES: &[&[u8]] = &[
|
|
b"resolve_entities=False",
|
|
b"resolve_entities =False",
|
|
b"resolve_entities= False",
|
|
b"resolve_entities = False",
|
|
b"no_network=True",
|
|
b"no_network =True",
|
|
b"no_network= True",
|
|
b"no_network = True",
|
|
b"from defusedxml",
|
|
b"import defusedxml",
|
|
];
|
|
HARDENING_NEEDLES
|
|
.iter()
|
|
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
|
}
|
|
|
|
impl FrameworkAdapter for XxePythonAdapter {
|
|
fn name(&self) -> &'static str {
|
|
ADAPTER_NAME
|
|
}
|
|
|
|
fn lang(&self) -> Lang {
|
|
Lang::Python
|
|
}
|
|
|
|
fn detect(
|
|
&self,
|
|
summary: &FuncSummary,
|
|
_ast: tree_sitter::Node<'_>,
|
|
file_bytes: &[u8],
|
|
) -> Option<FrameworkBinding> {
|
|
if parser_is_hardened(file_bytes) {
|
|
return None;
|
|
}
|
|
let matches_call = super::any_callee_matches(summary, callee_is_xml_parser);
|
|
let matches_source = source_imports_xml(file_bytes);
|
|
if matches_call && matches_source {
|
|
Some(FrameworkBinding {
|
|
adapter: ADAPTER_NAME.to_owned(),
|
|
kind: EntryKind::Function,
|
|
route: None,
|
|
request_params: Vec::new(),
|
|
response_writer: None,
|
|
middleware: Vec::new(),
|
|
})
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn parse_python(src: &[u8]) -> tree_sitter::Tree {
|
|
let mut parser = tree_sitter::Parser::new();
|
|
let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
|
|
parser.set_language(&lang).unwrap();
|
|
parser.parse(src, None).unwrap()
|
|
}
|
|
|
|
#[test]
|
|
fn fires_on_lxml_etree_fromstring() {
|
|
let src: &[u8] = b"from lxml import etree\n\
|
|
def run(body):\n return etree.fromstring(body)\n";
|
|
let tree = parse_python(src);
|
|
let summary = FuncSummary {
|
|
name: "run".into(),
|
|
callees: vec![crate::summary::CalleeSite::bare("fromstring")],
|
|
..Default::default()
|
|
};
|
|
assert!(
|
|
XxePythonAdapter
|
|
.detect(&summary, tree.root_node(), src)
|
|
.is_some()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn skips_plain_function() {
|
|
let src: &[u8] = b"def add(a, b):\n return a + b\n";
|
|
let tree = parse_python(src);
|
|
let summary = FuncSummary {
|
|
name: "add".into(),
|
|
..Default::default()
|
|
};
|
|
assert!(
|
|
XxePythonAdapter
|
|
.detect(&summary, tree.root_node(), src)
|
|
.is_none()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn skips_when_resolve_entities_false() {
|
|
let src: &[u8] = b"from lxml import etree\n\
|
|
def run(body):\n\
|
|
parser = etree.XMLParser(resolve_entities=False, no_network=True)\n\
|
|
return etree.fromstring(body, parser)\n";
|
|
let tree = parse_python(src);
|
|
let summary = FuncSummary {
|
|
name: "run".into(),
|
|
callees: vec![crate::summary::CalleeSite::bare("fromstring")],
|
|
..Default::default()
|
|
};
|
|
assert!(
|
|
XxePythonAdapter
|
|
.detect(&summary, tree.root_node(), src)
|
|
.is_none()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn skips_when_defusedxml_imported() {
|
|
let src: &[u8] = b"from defusedxml import ElementTree\n\
|
|
def run(body):\n return ElementTree.fromstring(body)\n";
|
|
let tree = parse_python(src);
|
|
let summary = FuncSummary {
|
|
name: "run".into(),
|
|
callees: vec![crate::summary::CalleeSite::bare("fromstring")],
|
|
..Default::default()
|
|
};
|
|
assert!(
|
|
XxePythonAdapter
|
|
.detect(&summary, tree.root_node(), src)
|
|
.is_none()
|
|
);
|
|
}
|
|
}
|