2026-05-17 20:39:12 -05:00
|
|
|
//! Java [`super::super::FrameworkAdapter`] matching XXE-prone XML parser
|
|
|
|
|
//! constructions.
|
|
|
|
|
//!
|
|
|
|
|
//! Phase 05 (Track J.3). Fires when the function body invokes a
|
|
|
|
|
//! `DocumentBuilder.parse` / `SAXParser.parse` / `XMLInputFactory`
|
|
|
|
|
//! call site and the surrounding source pulls in one of the
|
|
|
|
|
//! `javax.xml.parsers` / `org.w3c.dom` / `org.xml.sax` packages —
|
|
|
|
|
//! i.e. an XML parser that, by default and without
|
|
|
|
|
//! `disallow-doctype-decl`, expands external entities.
|
|
|
|
|
|
|
|
|
|
use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding};
|
|
|
|
|
use crate::evidence::EntryKind;
|
|
|
|
|
use crate::summary::FuncSummary;
|
|
|
|
|
use crate::symbol::Lang;
|
|
|
|
|
|
|
|
|
|
pub struct XxeJavaAdapter;
|
|
|
|
|
|
|
|
|
|
const ADAPTER_NAME: &str = "xxe-java";
|
|
|
|
|
|
|
|
|
|
fn callee_is_xml_parse(name: &str) -> bool {
|
|
|
|
|
let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name);
|
|
|
|
|
matches!(
|
|
|
|
|
last,
|
|
|
|
|
"parse"
|
|
|
|
|
| "newDocumentBuilder"
|
|
|
|
|
| "newSAXParser"
|
|
|
|
|
| "createXMLEventReader"
|
|
|
|
|
| "createXMLStreamReader"
|
|
|
|
|
| "newInstance"
|
|
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn source_imports_xml_parser(file_bytes: &[u8]) -> bool {
|
|
|
|
|
const NEEDLES: &[&[u8]] = &[
|
|
|
|
|
b"javax.xml.parsers",
|
|
|
|
|
b"DocumentBuilderFactory",
|
|
|
|
|
b"DocumentBuilder",
|
|
|
|
|
b"SAXParserFactory",
|
|
|
|
|
b"XMLInputFactory",
|
|
|
|
|
b"org.xml.sax",
|
|
|
|
|
b"org.w3c.dom",
|
|
|
|
|
];
|
|
|
|
|
NEEDLES
|
|
|
|
|
.iter()
|
|
|
|
|
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-21 01:36:46 -05:00
|
|
|
/// Returns `true` when the surrounding source visibly hardens the
|
|
|
|
|
/// XML parser against external-entity / DTD expansion. Conservative:
|
|
|
|
|
/// only recognises hardening invocations in their canonical
|
|
|
|
|
/// syntactic form (quoted feature URIs or full call expressions) so
|
|
|
|
|
/// the detector ignores casual prose mentions in Javadoc / line
|
|
|
|
|
/// comments. False negatives turn into adapter fires, which the
|
|
|
|
|
/// rest of the pipeline still double-checks; false positives would
|
|
|
|
|
/// silently drop a real finding.
|
|
|
|
|
fn parser_is_hardened(file_bytes: &[u8]) -> bool {
|
|
|
|
|
const HARDENING_NEEDLES: &[&[u8]] = &[
|
|
|
|
|
b"\"http://apache.org/xml/features/disallow-doctype-decl\"",
|
|
|
|
|
b"setFeature(XMLConstants.FEATURE_SECURE_PROCESSING",
|
|
|
|
|
b"setFeature( XMLConstants.FEATURE_SECURE_PROCESSING",
|
|
|
|
|
b"setExpandEntityReferences(false)",
|
|
|
|
|
b"setExpandEntityReferences (false)",
|
|
|
|
|
b"\"http://xml.org/sax/features/external-general-entities\"",
|
|
|
|
|
b"\"http://xml.org/sax/features/external-parameter-entities\"",
|
|
|
|
|
b"XMLConstants.ACCESS_EXTERNAL_DTD,",
|
|
|
|
|
b"XMLConstants.ACCESS_EXTERNAL_SCHEMA,",
|
|
|
|
|
b"setXIncludeAware(false)",
|
|
|
|
|
];
|
|
|
|
|
HARDENING_NEEDLES
|
|
|
|
|
.iter()
|
|
|
|
|
.any(|n| file_bytes.windows(n.len()).any(|w| w == *n))
|
|
|
|
|
}
|
|
|
|
|
|
2026-05-17 20:39:12 -05:00
|
|
|
impl FrameworkAdapter for XxeJavaAdapter {
|
|
|
|
|
fn name(&self) -> &'static str {
|
|
|
|
|
ADAPTER_NAME
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn lang(&self) -> Lang {
|
|
|
|
|
Lang::Java
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fn detect(
|
|
|
|
|
&self,
|
|
|
|
|
summary: &FuncSummary,
|
|
|
|
|
_ast: tree_sitter::Node<'_>,
|
|
|
|
|
file_bytes: &[u8],
|
|
|
|
|
) -> Option<FrameworkBinding> {
|
2026-05-21 01:36:46 -05:00
|
|
|
if parser_is_hardened(file_bytes) {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
2026-05-17 20:39:12 -05:00
|
|
|
let matches_call = super::any_callee_matches(summary, callee_is_xml_parse);
|
|
|
|
|
let matches_source = source_imports_xml_parser(file_bytes);
|
|
|
|
|
if matches_call && matches_source {
|
|
|
|
|
return Some(FrameworkBinding {
|
|
|
|
|
adapter: ADAPTER_NAME.to_owned(),
|
|
|
|
|
kind: EntryKind::Function,
|
|
|
|
|
route: None,
|
|
|
|
|
request_params: Vec::new(),
|
|
|
|
|
response_writer: None,
|
|
|
|
|
middleware: Vec::new(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
// Fall-back: source clearly imports the XXE-prone parser even
|
|
|
|
|
// when the call-graph summary did not capture the parse call.
|
|
|
|
|
if matches_source
|
|
|
|
|
&& file_bytes
|
|
|
|
|
.windows(b".parse(".len())
|
|
|
|
|
.any(|w| w == b".parse(")
|
|
|
|
|
{
|
|
|
|
|
return Some(FrameworkBinding {
|
|
|
|
|
adapter: ADAPTER_NAME.to_owned(),
|
|
|
|
|
kind: EntryKind::Function,
|
|
|
|
|
route: None,
|
|
|
|
|
request_params: Vec::new(),
|
|
|
|
|
response_writer: None,
|
|
|
|
|
middleware: Vec::new(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
fn parse_java(src: &[u8]) -> tree_sitter::Tree {
|
|
|
|
|
let mut parser = tree_sitter::Parser::new();
|
|
|
|
|
let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
|
|
|
|
|
parser.set_language(&lang).unwrap();
|
|
|
|
|
parser.parse(src, None).unwrap()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn fires_on_document_builder_parse() {
|
|
|
|
|
let src: &[u8] = b"import javax.xml.parsers.DocumentBuilderFactory;\n\
|
|
|
|
|
public class V {\n public static void run(byte[] b) throws Exception {\n\
|
|
|
|
|
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();\n\
|
|
|
|
|
f.newDocumentBuilder().parse(new java.io.ByteArrayInputStream(b));\n\
|
|
|
|
|
}\n}\n";
|
|
|
|
|
let tree = parse_java(src);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "run".into(),
|
|
|
|
|
callees: vec![crate::summary::CalleeSite::bare("parse")],
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
let binding = XxeJavaAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.expect("must fire on DocumentBuilder.parse fixture");
|
|
|
|
|
assert_eq!(binding.adapter, ADAPTER_NAME);
|
|
|
|
|
assert_eq!(binding.kind, EntryKind::Function);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn skips_plain_function() {
|
|
|
|
|
let src: &[u8] =
|
|
|
|
|
b"public class V { public static void run(String b) { System.out.println(b); } }\n";
|
|
|
|
|
let tree = parse_java(src);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "run".into(),
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
assert!(XxeJavaAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.is_none());
|
|
|
|
|
}
|
2026-05-21 01:36:46 -05:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn skips_when_disallow_doctype_decl_set() {
|
|
|
|
|
let src: &[u8] = b"import javax.xml.parsers.DocumentBuilderFactory;\n\
|
|
|
|
|
public class V {\n public static void run(byte[] b) throws Exception {\n\
|
|
|
|
|
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();\n\
|
|
|
|
|
f.setFeature(\"http://apache.org/xml/features/disallow-doctype-decl\", true);\n\
|
|
|
|
|
f.newDocumentBuilder().parse(new java.io.ByteArrayInputStream(b));\n\
|
|
|
|
|
}\n}\n";
|
|
|
|
|
let tree = parse_java(src);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "run".into(),
|
|
|
|
|
callees: vec![crate::summary::CalleeSite::bare("parse")],
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
assert!(XxeJavaAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.is_none());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn skips_when_feature_secure_processing_set() {
|
|
|
|
|
let src: &[u8] = b"import javax.xml.parsers.DocumentBuilderFactory;\n\
|
|
|
|
|
import javax.xml.XMLConstants;\n\
|
|
|
|
|
public class V {\n public static void run(byte[] b) throws Exception {\n\
|
|
|
|
|
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();\n\
|
|
|
|
|
f.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);\n\
|
|
|
|
|
f.newDocumentBuilder().parse(new java.io.ByteArrayInputStream(b));\n\
|
|
|
|
|
}\n}\n";
|
|
|
|
|
let tree = parse_java(src);
|
|
|
|
|
let summary = FuncSummary {
|
|
|
|
|
name: "run".into(),
|
|
|
|
|
callees: vec![crate::summary::CalleeSite::bare("parse")],
|
|
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
assert!(XxeJavaAdapter
|
|
|
|
|
.detect(&summary, tree.root_node(), src)
|
|
|
|
|
.is_none());
|
|
|
|
|
}
|
2026-05-17 20:39:12 -05:00
|
|
|
}
|