mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
new capacity bits (#67)
This commit is contained in:
parent
afaffc0df6
commit
7d0e7320e2
261 changed files with 10591 additions and 231 deletions
|
|
@ -31,6 +31,8 @@ pub mod param_points_to;
|
|||
pub mod pointsto;
|
||||
pub mod static_map;
|
||||
pub mod type_facts;
|
||||
pub mod xml_config;
|
||||
pub mod xpath_config;
|
||||
|
||||
#[allow(unused_imports)]
|
||||
pub use ir::*;
|
||||
|
|
@ -51,6 +53,20 @@ pub struct OptimizeResult {
|
|||
pub const_values: HashMap<SsaValue, const_prop::ConstLattice>,
|
||||
/// Type fact analysis results.
|
||||
pub type_facts: type_facts::TypeFactResult,
|
||||
/// XML-parser configuration facts: per-receiver SSA value
|
||||
/// `secure_processing` / `disallow_doctype` / `external_entities`
|
||||
/// flags carried forward from setter calls and constructor kwargs.
|
||||
/// Consumed by the SSA taint engine to suppress XXE on parse-class
|
||||
/// sinks whose receiver was provably hardened.
|
||||
#[serde(default)]
|
||||
pub xml_parser_config: xml_config::XmlParserConfigResult,
|
||||
/// XPath-receiver configuration facts: per-receiver SSA value
|
||||
/// `has_resolver` flag set by `setXPathVariableResolver` calls.
|
||||
/// Consumed by the SSA taint engine to suppress XPATH_INJECTION on
|
||||
/// `evaluate` / `compile` sinks whose receiver was provably bound
|
||||
/// to a variable resolver (parameterised XPath shape).
|
||||
#[serde(default)]
|
||||
pub xpath_config: xpath_config::XPathConfigResult,
|
||||
/// Base-variable alias groups from copy propagation.
|
||||
pub alias_result: alias::BaseAliasResult,
|
||||
/// Points-to analysis: per-SSA-value abstract heap object sets.
|
||||
|
|
@ -100,6 +116,17 @@ pub fn optimize_ssa_with_param_types(
|
|||
let type_facts =
|
||||
type_facts::analyze_types_with_param_types(body, cfg, &cp.values, lang, param_types);
|
||||
|
||||
// 5b. XML-parser config analysis. Tracks per-receiver hardening
|
||||
// flags so XXE sinks can be suppressed when the parser was provably
|
||||
// configured for secure processing.
|
||||
let xml_parser_config = xml_config::analyze_xml_parser_config(body, cfg, &cp.values, lang);
|
||||
|
||||
// 5c. XPath-receiver config analysis. Tracks per-receiver
|
||||
// `has_resolver` flag so `XPath.evaluate(taintedExpr, ...)` sinks
|
||||
// can be suppressed when the receiver was bound to an
|
||||
// `XPathVariableResolver` (parameterised-XPath shape).
|
||||
let xpath_config = xpath_config::analyze_xpath_config(body, cfg, lang);
|
||||
|
||||
// 6. Points-to analysis (uses allocation site detection + SSA def-use)
|
||||
let points_to = heap::analyze_points_to(body, cfg, lang);
|
||||
|
||||
|
|
@ -113,6 +140,8 @@ pub fn optimize_ssa_with_param_types(
|
|||
OptimizeResult {
|
||||
const_values: cp.values,
|
||||
type_facts,
|
||||
xml_parser_config,
|
||||
xpath_config,
|
||||
alias_result,
|
||||
points_to,
|
||||
module_aliases,
|
||||
|
|
|
|||
|
|
@ -52,12 +52,55 @@ pub enum TypeKind {
|
|||
/// where openmrs / xwiki / keycloak Hibernate DAOs build queries
|
||||
/// via `cb.createQuery(Foo.class)` + `Root` / `Predicate` API.
|
||||
JpaCriteriaQuery,
|
||||
/// An LDAP directory-service client / connection (`DirContext`,
|
||||
/// `LdapTemplate`, `Net::LDAP`, `ldap3.Connection`, `ldap.createClient`,
|
||||
/// `ldap.DialURL`, etc.). Distinct from `DatabaseConnection` so the
|
||||
/// type-qualified `LdapClient.search` rule fires only on directory
|
||||
/// search APIs rather than every DB receiver with a `search` method.
|
||||
LdapClient,
|
||||
/// An XPath query / evaluation client (`DOMXPath`, `XPath`,
|
||||
/// `XPathExpression`, `lxml.etree.XPath`, etc.). Distinct from
|
||||
/// `DatabaseConnection` so the type-qualified `XPathClient.query` /
|
||||
/// `XPathClient.evaluate` rules fire only on XPath APIs rather than
|
||||
/// every receiver with a generic `query` / `evaluate` method (avoids
|
||||
/// collision with PHP `$pdo->query` SQL_QUERY sink).
|
||||
XPathClient,
|
||||
/// A pre-parsed template object whose `process` / `merge` /
|
||||
/// `render` method renders bound data through an already-compiled
|
||||
/// template body. The SSTI vector is when the template *source*
|
||||
/// fed to the constructor / factory was attacker-influenced; the
|
||||
/// render-time call site is the sink. Currently populated by
|
||||
/// `new freemarker.template.Template(...)`; the type-qualified
|
||||
/// resolver rewrites `tpl.process(...)` → `Template.process` so
|
||||
/// the existing flat SSTI rule fires on idiomatic
|
||||
/// `Template tpl = new Template(...); tpl.process(model, out)`
|
||||
/// shapes.
|
||||
Template,
|
||||
/// An XML parser instance produced by a JAXP factory call
|
||||
/// (`DocumentBuilderFactory.newDocumentBuilder()`,
|
||||
/// `SAXParserFactory.newSAXParser()`, `XMLReaderFactory.createXMLReader()`).
|
||||
/// `DOMXPath` and friends keep their own `XPathClient` tag. Used so
|
||||
/// the type-qualified `XmlParser.parse` rule fires on instance-style
|
||||
/// calls (`builder.parse(input)`) without needing a flat-rule
|
||||
/// matcher per concrete subclass. Also gates the XXE config-fact
|
||||
/// suppression: only XmlParser-typed receivers consult the
|
||||
/// [`crate::ssa::xml_config::XmlParserConfigResult`] sidecar.
|
||||
XmlParser,
|
||||
/// A framework-injected DTO body whose field types are known.
|
||||
/// Populated when a parameter is recognised as a typed extractor and
|
||||
/// the DTO class / struct / Pydantic model is resolvable in scope.
|
||||
/// Strictly additive, without a DTO definition, callers fall back
|
||||
/// to name-only resolution.
|
||||
Dto(DtoFields),
|
||||
/// An object created with `Object.create(null)` — has no prototype
|
||||
/// chain, so subscript-write keys cannot pollute `Object.prototype`.
|
||||
/// Populated for JS/TS values whose constructor call is
|
||||
/// `Object.create(null)`. The PROTOTYPE_POLLUTION suppression at the
|
||||
/// synthetic `__index_set__` sink consults this fact (via SSA receiver
|
||||
/// value) so the suppression is flow-sensitive: if a phi join leaves
|
||||
/// the receiver only sometimes null-prototyped, the fact widens to
|
||||
/// `Unknown` and the sink fires on the unsafe path.
|
||||
NullPrototypeObject,
|
||||
}
|
||||
|
||||
/// structural carrier for a recognised DTO type. Maps
|
||||
|
|
@ -99,6 +142,10 @@ impl TypeKind {
|
|||
Self::Url => Some("URL"),
|
||||
Self::RequestBuilder => Some("RequestBuilder"),
|
||||
Self::JpaCriteriaQuery => Some("JpaCriteriaQuery"),
|
||||
Self::LdapClient => Some("LdapClient"),
|
||||
Self::XPathClient => Some("XPathClient"),
|
||||
Self::XmlParser => Some("XmlParser"),
|
||||
Self::Template => Some("Template"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -288,9 +335,11 @@ pub fn is_safe_query_object_arg(
|
|||
/// authoritative, and consumers see Unknown instead of a wrong
|
||||
/// type tag.
|
||||
///
|
||||
/// `_args` and `_consts` are kept on the signature so we can later
|
||||
/// add arg-shape narrowing when class-literal lowering captures
|
||||
/// `Foo.class` as an arg-use.
|
||||
/// `_args` and `_consts` allow arg-shape narrowing when an arg's
|
||||
/// constant value distinguishes overloads. Reserved for future Java
|
||||
/// `createQuery(Foo.class)` shape (the `Object.create(null)` case is
|
||||
/// driven by the `produces_null_proto` CFG flag instead, since a
|
||||
/// literal `null` arg leaves no SSA value to inspect).
|
||||
fn arg_aware_call_type(
|
||||
lang: Lang,
|
||||
callee: &str,
|
||||
|
|
@ -392,6 +441,40 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
"createCriteriaUpdate" | "createCriteriaDelete" | "createTupleQuery" | "subquery" => {
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
}
|
||||
// LDAP directory-service clients. `new InitialDirContext(env)` /
|
||||
// `new InitialLdapContext(env, ctls)` instantiate the JNDI LDAP
|
||||
// provider; `new LdapTemplate(...)` / `LdapTemplate.<init>` is the
|
||||
// Spring LDAP wrapper. Both expose `search` / `searchByEntity`
|
||||
// /`searchForObject` overloads where filter/DN strings are LDAP
|
||||
// injection sinks.
|
||||
"InitialDirContext" | "InitialLdapContext" | "LdapTemplate" => {
|
||||
Some(TypeKind::LdapClient)
|
||||
}
|
||||
// JAXP factory-produced XML parser instances. Each is
|
||||
// XXE-vulnerable by default until hardened with
|
||||
// `setFeature(FEATURE_SECURE_PROCESSING, true)` (or
|
||||
// disallow-doctype-decl, etc.). The
|
||||
// [`crate::ssa::xml_config::XmlParserConfigResult`] sidecar
|
||||
// suppresses the XXE bit at the type-qualified `XmlParser.parse`
|
||||
// sink when the receiver carries a hardening fact.
|
||||
"newDocumentBuilder" | "newSAXParser" | "getXMLReader" | "newXMLReader"
|
||||
| "createXMLReader" => Some(TypeKind::XmlParser),
|
||||
// `XPathFactory.newXPath()` returns a JAXP `XPath` instance.
|
||||
// Mapping it to `XPathClient` lets the type-qualified resolver
|
||||
// pick up `xpath.evaluate(...)` against the existing
|
||||
// `XPathClient.evaluate` rule and lets the
|
||||
// [`crate::ssa::xpath_config::XPathConfigResult`] sidecar
|
||||
// suppress XPATH_INJECTION when the receiver was bound to an
|
||||
// `XPathVariableResolver`.
|
||||
"newXPath" => Some(TypeKind::XPathClient),
|
||||
// Apache FreeMarker `new Template(name, reader, cfg)` /
|
||||
// `cfg.getTemplate(name)`. The `Template` instance's
|
||||
// `.process(model, out)` is an SSTI sink when the
|
||||
// constructor source / template body came from tainted
|
||||
// input. Type-qualified resolution rewrites
|
||||
// `tpl.process(...)` → `Template.process` against the
|
||||
// existing flat rule in `labels/java.rs`.
|
||||
"Template" | "getTemplate" => Some(TypeKind::Template),
|
||||
_ => None,
|
||||
},
|
||||
Lang::JavaScript | Lang::TypeScript => match suffix {
|
||||
|
|
@ -409,6 +492,12 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
// `elementsMap.get(id)`, `origIdToDuplicateId.get(...)`,
|
||||
// `groupIdMapForOperation.set(...)` shapes).
|
||||
"Map" | "Set" | "WeakMap" | "WeakSet" | "Array" => Some(TypeKind::LocalCollection),
|
||||
// ldapjs client factory: `ldap.createClient({ url: '…' })` returns
|
||||
// a Client whose `search(base, opts, cb)` is an LDAP injection
|
||||
// sink. Match the qualified callee text rather than the bare
|
||||
// `createClient` suffix to avoid widening to unrelated factories
|
||||
// with the same verb name.
|
||||
"createClient" if callee.contains("ldap") => Some(TypeKind::LdapClient),
|
||||
_ => None,
|
||||
},
|
||||
Lang::Python => {
|
||||
|
|
@ -429,6 +518,15 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
} else if suffix == "open" && !callee.contains('.') {
|
||||
// Bare `open()` is file I/O in Python
|
||||
Some(TypeKind::FileHandle)
|
||||
} else if callee == "ldap.initialize"
|
||||
|| callee == "ldap3.Connection"
|
||||
|| callee.ends_with(".initialize") && callee.contains("ldap")
|
||||
{
|
||||
// python-ldap: `conn = ldap.initialize(url)` returns an
|
||||
// LDAPObject whose `search_s` / `search_ext_s` methods are
|
||||
// LDAP-injection sinks. ldap3: `Connection(server, ...)`
|
||||
// returns a Connection with a `search()` method.
|
||||
Some(TypeKind::LdapClient)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
@ -442,6 +540,10 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
Some(TypeKind::FileHandle)
|
||||
} else if callee.contains("url.") && suffix == "Parse" {
|
||||
Some(TypeKind::Url)
|
||||
} else if callee.contains("ldap.") && matches!(suffix, "Dial" | "DialURL" | "DialTLS") {
|
||||
// go-ldap (`github.com/go-ldap/ldap/v3`): `conn, _ := ldap.DialURL(url)`
|
||||
// returns `*ldap.Conn` whose `Search(req)` is an LDAP-injection sink.
|
||||
Some(TypeKind::LdapClient)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
@ -451,6 +553,10 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
"curl_init" => Some(TypeKind::HttpClient),
|
||||
"fopen" => Some(TypeKind::FileHandle),
|
||||
"SplFileObject" => Some(TypeKind::FileHandle),
|
||||
// DOMXPath: `$xp = new DOMXPath($doc)`. `$xp->query($expr)` /
|
||||
// `$xp->evaluate($expr)` are XPath-injection sinks; without a
|
||||
// distinct TypeKind they collide with the bare `query` SQL sink.
|
||||
"DOMXPath" => Some(TypeKind::XPathClient),
|
||||
_ => None,
|
||||
},
|
||||
Lang::C => match suffix {
|
||||
|
|
@ -524,6 +630,11 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
Some(TypeKind::DatabaseConnection)
|
||||
} else if after_colons.starts_with("File.") && matches!(suffix, "open" | "new") {
|
||||
Some(TypeKind::FileHandle)
|
||||
} else if callee.contains("Net::LDAP") && matches!(suffix, "new" | "open") {
|
||||
// net-ldap gem: `Net::LDAP.new(host: ...)` / `Net::LDAP.open`
|
||||
// returns a connection whose `search(base:, filter:)` accepts
|
||||
// an attacker-influenceable filter expression.
|
||||
Some(TypeKind::LdapClient)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
|
@ -768,8 +879,7 @@ pub fn analyze_types(
|
|||
/// Same as [`analyze_types`] but seeds [`SsaOp::Param`] values with
|
||||
/// per-position [`TypeKind`] facts from `param_types` (parallel-vec to
|
||||
/// the function's BodyMeta.params). An entry of `None` (or an out-of-
|
||||
/// range index) leaves the value at the default Param fact (Unknown),
|
||||
/// preserving the pre-Phase-3 behaviour.
|
||||
/// range index) leaves the value at the default Param fact (Unknown).
|
||||
pub fn analyze_types_with_param_types(
|
||||
body: &SsaBody,
|
||||
cfg: &Cfg,
|
||||
|
|
@ -810,8 +920,7 @@ pub fn analyze_types_with_param_types(
|
|||
SsaOp::Param { index } => {
|
||||
// Seed from the function's BodyMeta.param_types when
|
||||
// a TypeKind was recovered at CFG construction time.
|
||||
// Out-of-range / None entries fall back to Unknown,
|
||||
// matching the pre-Phase-3 behaviour.
|
||||
// Out-of-range / None entries fall back to Unknown.
|
||||
match param_types.get(*index).and_then(|t| t.clone()) {
|
||||
Some(tk) => TypeFact::from_kind(tk),
|
||||
None => TypeFact::unknown(),
|
||||
|
|
@ -820,7 +929,19 @@ pub fn analyze_types_with_param_types(
|
|||
SsaOp::SelfParam => TypeFact::from_kind(TypeKind::Object),
|
||||
SsaOp::CatchParam => TypeFact::from_kind(TypeKind::Object),
|
||||
SsaOp::Call { callee, args, .. } => {
|
||||
if let Some(ty) = lang.and_then(|l| constructor_type(l, callee)) {
|
||||
// CFG marks `Object.create(null)` (and future
|
||||
// null-prototype constructors) at lowering time.
|
||||
// Honour it ahead of generic constructor / arg-aware
|
||||
// dispatch so the returned SsaValue carries
|
||||
// `NullPrototypeObject` for prototype-pollution
|
||||
// suppression.
|
||||
let null_proto = cfg
|
||||
.node_weight(inst.cfg_node)
|
||||
.map(|ni| ni.call.produces_null_proto)
|
||||
.unwrap_or(false);
|
||||
if null_proto {
|
||||
TypeFact::from_kind(TypeKind::NullPrototypeObject)
|
||||
} else if let Some(ty) = lang.and_then(|l| constructor_type(l, callee)) {
|
||||
TypeFact::from_kind(ty)
|
||||
} else if let Some(ty) =
|
||||
lang.and_then(|l| arg_aware_call_type(l, callee, args, consts))
|
||||
|
|
@ -1667,7 +1788,7 @@ mod tests {
|
|||
|
||||
/// Param values seeded from `param_types` must surface
|
||||
/// the right TypeKind for downstream sink suppression. An out-of-
|
||||
/// range index falls back to Unknown (the pre-Phase-3 default).
|
||||
/// range index falls back to Unknown.
|
||||
#[test]
|
||||
fn param_types_seed_param_value_facts() {
|
||||
use crate::cfg::Cfg;
|
||||
|
|
@ -1728,7 +1849,7 @@ mod tests {
|
|||
// Index 99 is out of range → falls back to Unknown.
|
||||
assert_eq!(result.get_type(SsaValue(1)), Some(&TypeKind::Unknown));
|
||||
|
||||
// Empty slice = pre-Phase-3 behaviour.
|
||||
// Empty slice = type-unaware fallback (analyze_types path).
|
||||
let result2 = analyze_types(&body, &cfg, &consts, Some(Lang::Java));
|
||||
assert_eq!(result2.get_type(SsaValue(0)), Some(&TypeKind::Unknown));
|
||||
}
|
||||
|
|
@ -2364,7 +2485,7 @@ mod tests {
|
|||
));
|
||||
}
|
||||
|
||||
// ── JPA Criteria query suppression (Phase: real-repo openmrs FP) ───
|
||||
// ── JPA Criteria query suppression (real-repo openmrs FP) ─────────
|
||||
//
|
||||
// These tests pin the `TypeKind::JpaCriteriaQuery` variant + the
|
||||
// `is_safe_query_object_arg` predicate + the
|
||||
|
|
|
|||
614
src/ssa/xml_config.rs
Normal file
614
src/ssa/xml_config.rs
Normal file
|
|
@ -0,0 +1,614 @@
|
|||
//! Per-SSA-value XML-parser configuration tracking.
|
||||
//!
|
||||
//! Tracks "is this XML parser configured to disable external entities / DTD
|
||||
//! resolution" facts on parser-receiver SSA values. When a parse-class sink
|
||||
//! is reached and the receiver is provably configured for secure processing,
|
||||
//! the XXE bit is stripped from the sink's cap mask.
|
||||
//!
|
||||
//! The pass is intentionally a small forward dataflow run alongside type-fact
|
||||
//! analysis. It does NOT flow through the SSA taint engine's worklist. Phi
|
||||
//! nodes propagate the meet of operand configs (a flag is "set" only when all
|
||||
//! reaching operands set it), and copy assignments propagate the receiver's
|
||||
//! config. Recognised setter calls update the receiver's config in place;
|
||||
//! identity-style transformer calls that produce a child parser (e.g.
|
||||
//! `factory.newDocumentBuilder()`) inherit the receiver's config into the
|
||||
//! result value.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::const_prop::ConstLattice;
|
||||
use super::ir::*;
|
||||
use crate::cfg::Cfg;
|
||||
use crate::symbol::Lang;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Receiver-instance config carried forward from setter calls.
|
||||
///
|
||||
/// All flags default to `false` (parser may be unsafe). A `true` flag
|
||||
/// means: we have proven this parser was hardened along this control-flow
|
||||
/// path. The XXE-suppression check is `secure_processing ||
|
||||
/// disallow_doctype` — either gate is sufficient to neutralise external
|
||||
/// entity resolution in JAXP / lxml / xml2js.
|
||||
///
|
||||
/// `external_entities` is the *unsafe* polarity: when set to `true`, the
|
||||
/// parser was explicitly opted into external-entity resolution (e.g.
|
||||
/// `XMLParser(resolve_entities=True)`). A parse call with this flag
|
||||
/// retains XXE even if the language default would otherwise be safe.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct XmlParserConfig {
|
||||
pub secure_processing: bool,
|
||||
pub disallow_doctype: bool,
|
||||
pub external_entities: bool,
|
||||
}
|
||||
|
||||
impl XmlParserConfig {
|
||||
/// True when the parser is provably hardened against XXE.
|
||||
pub fn is_secure(&self) -> bool {
|
||||
(self.secure_processing || self.disallow_doctype) && !self.external_entities
|
||||
}
|
||||
|
||||
/// Phi-meet: a flag survives only when *both* operands set it. Used
|
||||
/// when the parser variable was reassigned across branches.
|
||||
fn meet(&self, other: &Self) -> Self {
|
||||
XmlParserConfig {
|
||||
secure_processing: self.secure_processing && other.secure_processing,
|
||||
disallow_doctype: self.disallow_doctype && other.disallow_doctype,
|
||||
// Unsafe polarity: ANY branch enabling external entities
|
||||
// contaminates the join. Conservative w.r.t. XXE.
|
||||
external_entities: self.external_entities || other.external_entities,
|
||||
}
|
||||
}
|
||||
|
||||
/// Union: caller updates the same receiver across multiple setter
|
||||
/// calls. All known-safe flags accumulate; unsafe is sticky.
|
||||
fn union(&self, other: &Self) -> Self {
|
||||
XmlParserConfig {
|
||||
secure_processing: self.secure_processing || other.secure_processing,
|
||||
disallow_doctype: self.disallow_doctype || other.disallow_doctype,
|
||||
external_entities: self.external_entities || other.external_entities,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of XML-parser config analysis.
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||
pub struct XmlParserConfigResult {
|
||||
pub configs: HashMap<SsaValue, XmlParserConfig>,
|
||||
}
|
||||
|
||||
impl XmlParserConfigResult {
|
||||
/// True when the value carries a config fact proving secure processing.
|
||||
pub fn is_secure(&self, v: SsaValue) -> bool {
|
||||
self.configs.get(&v).is_some_and(|c| c.is_secure())
|
||||
}
|
||||
|
||||
/// True when the value was explicitly opted into external-entity
|
||||
/// resolution (e.g. lxml `resolve_entities=True`).
|
||||
pub fn is_unsafe_explicit(&self, v: SsaValue) -> bool {
|
||||
self.configs.get(&v).is_some_and(|c| c.external_entities)
|
||||
}
|
||||
}
|
||||
|
||||
/// Suppress the `Cap::XXE` bit when the receiver of an XXE-class sink
|
||||
/// was provably hardened. Returns `true` when XXE should be stripped
|
||||
/// from the sink's cap mask.
|
||||
///
|
||||
/// Conservative defaults:
|
||||
/// * No receiver SSA value (free function) → returns `false` (cannot
|
||||
/// prove safety, fall through to existing classification).
|
||||
/// * Receiver carries no config fact → returns `false`.
|
||||
/// * `external_entities` flag is set → returns `false` even if a safe
|
||||
/// flag is also set, since the unsafe opt-in dominates.
|
||||
pub fn xxe_safe(receiver: Option<SsaValue>, xml_config: &XmlParserConfigResult) -> bool {
|
||||
let Some(rv) = receiver else {
|
||||
return false;
|
||||
};
|
||||
xml_config.is_secure(rv)
|
||||
}
|
||||
|
||||
/// Per-call analysis result: how this call mutates the parser-config
|
||||
/// universe.
|
||||
#[allow(dead_code)] // SeedResult reserved for future constructor-driven seeding
|
||||
enum ConfigEffect {
|
||||
/// No effect on parser configuration.
|
||||
None,
|
||||
/// Update the call's receiver in place by OR-ing the supplied config
|
||||
/// into its current config. Used for setter calls
|
||||
/// (`factory.setFeature(FEATURE_SECURE_PROCESSING, true)`).
|
||||
UpdateReceiver(XmlParserConfig),
|
||||
/// Inherit the receiver's config into the call's result value.
|
||||
/// Used for identity-style transformer calls
|
||||
/// (`factory.newDocumentBuilder()` returns a builder that shares
|
||||
/// the factory's hardening state).
|
||||
InheritFromReceiver,
|
||||
/// Initialise the call's result value with the supplied config.
|
||||
/// Used for constructor calls whose options reveal the unsafe-explicit
|
||||
/// opt-in (`new XMLParser({ processEntities: true })`,
|
||||
/// `lxml.etree.XMLParser(resolve_entities=True)`).
|
||||
SeedResult(XmlParserConfig),
|
||||
}
|
||||
|
||||
/// Classify a Call instruction's effect on the parser-config universe.
|
||||
///
|
||||
/// `arg_const` looks up the const-lattice value for an SSA arg position
|
||||
/// (returns `None` if the position is out of range or the SSA value is
|
||||
/// not a known constant). Setter detection consults arg-0 (the feature
|
||||
/// name) and arg-1 (the boolean flag).
|
||||
///
|
||||
/// `arg_idents` is the matching CFG-level [`info.call.arg_uses`] vector
|
||||
/// (per-position identifier text from the source AST). Used to recover
|
||||
/// non-literal feature names like `XMLConstants.FEATURE_SECURE_PROCESSING`
|
||||
/// or bare identifiers (`FEATURE_SECURE_PROCESSING`, `Boolean.TRUE`)
|
||||
/// that const-propagation cannot fold to a literal.
|
||||
///
|
||||
/// `arg_literals` is the matching CFG-level
|
||||
/// [`info.call.arg_string_literals`] vector (per-position literal text;
|
||||
/// strings, booleans, and null/nil/None tokens). Used to recover the
|
||||
/// boolean polarity of `setFeature(NAME, true)` since SSA lowering does
|
||||
/// not bind boolean arg literals to any SSA value (`arg_uses` skips them
|
||||
/// because they are not identifiers).
|
||||
fn classify_call(
|
||||
lang: Lang,
|
||||
callee: &str,
|
||||
args: &[smallvec::SmallVec<[SsaValue; 2]>],
|
||||
receiver: Option<SsaValue>,
|
||||
consts: &HashMap<SsaValue, ConstLattice>,
|
||||
arg_idents: &[Vec<String>],
|
||||
arg_literals: &[Option<String>],
|
||||
) -> ConfigEffect {
|
||||
let suffix = callee.rsplit(['.', ':']).next().unwrap_or(callee);
|
||||
|
||||
// Helper: lookup the const lattice for arg N's first SSA value.
|
||||
let arg_const = |n: usize| -> Option<&ConstLattice> {
|
||||
args.get(n)
|
||||
.and_then(|vals| vals.first())
|
||||
.and_then(|v| consts.get(v))
|
||||
};
|
||||
// Helper: text of the const lattice (for string/identifier comparison).
|
||||
let arg_text = |n: usize| -> Option<String> {
|
||||
match arg_const(n)? {
|
||||
ConstLattice::Str(s) => Some(s.clone()),
|
||||
ConstLattice::Bool(b) => Some(b.to_string()),
|
||||
ConstLattice::Int(i) => Some(i.to_string()),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
// Helper: textual identifier(s) at arg N from the CFG node. Non-literal
|
||||
// feature names (`XMLConstants.FEATURE_SECURE_PROCESSING`, bare
|
||||
// `FEATURE_SECURE_PROCESSING`, etc.) surface here.
|
||||
let arg_ident_text = |n: usize| -> Vec<&str> {
|
||||
arg_idents
|
||||
.get(n)
|
||||
.map(|v| v.iter().map(|s| s.as_str()).collect())
|
||||
.unwrap_or_default()
|
||||
};
|
||||
let arg_bool = |n: usize| -> Option<bool> {
|
||||
if let Some(b) = arg_const(n).and_then(|c| match c {
|
||||
ConstLattice::Bool(b) => Some(*b),
|
||||
ConstLattice::Str(s) => match s.as_str() {
|
||||
"True" | "true" => Some(true),
|
||||
"False" | "false" => Some(false),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}) {
|
||||
return Some(b);
|
||||
}
|
||||
// Fallback: tree-sitter classifies `true` / `false` as bare
|
||||
// identifiers in some grammars. Inspect the arg's use list.
|
||||
for tok in arg_ident_text(n) {
|
||||
match tok {
|
||||
"true" | "True" | "Boolean.TRUE" => return Some(true),
|
||||
"false" | "False" | "Boolean.FALSE" => return Some(false),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Fallback: literal tokens lifted by `extract_arg_string_literals`
|
||||
// (booleans / null / numeric tokens). Java `setFeature(NAME, true)`
|
||||
// does not bind the `true` token to any SSA value, but the literal
|
||||
// surfaces here so the polarity can still be read.
|
||||
if let Some(Some(lit)) = arg_literals.get(n) {
|
||||
match lit.as_str() {
|
||||
"true" | "True" | "Boolean.TRUE" => return Some(true),
|
||||
"false" | "False" | "Boolean.FALSE" => return Some(false),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
None
|
||||
};
|
||||
|
||||
match lang {
|
||||
Lang::Java => match suffix {
|
||||
// `factory.setFeature(NAME, BOOL)` — the canonical JAXP
|
||||
// hardening switch. Three feature names matter:
|
||||
// * `FEATURE_SECURE_PROCESSING` (XMLConstants.FEATURE_SECURE_PROCESSING)
|
||||
// * `http://apache.org/xml/features/disallow-doctype-decl`
|
||||
// * `http://xml.org/sax/features/external-general-entities`
|
||||
// * `http://xml.org/sax/features/external-parameter-entities`
|
||||
// The first two harden by being SET TRUE; the entity ones
|
||||
// harden by being SET FALSE.
|
||||
"setFeature" => {
|
||||
if receiver.is_none() {
|
||||
return ConfigEffect::None;
|
||||
}
|
||||
let name_lit = arg_text(0).unwrap_or_default();
|
||||
let name_idents = arg_ident_text(0);
|
||||
let value = arg_bool(1);
|
||||
let any_ident = |needle: &str| name_idents.iter().any(|s| s.contains(needle));
|
||||
let mut cfg = XmlParserConfig::default();
|
||||
if name_lit == "FEATURE_SECURE_PROCESSING"
|
||||
|| name_lit.contains("XMLConstants.FEATURE_SECURE_PROCESSING")
|
||||
|| name_lit.contains("javax.xml.XMLConstants/feature/secure-processing")
|
||||
|| any_ident("FEATURE_SECURE_PROCESSING")
|
||||
{
|
||||
if value == Some(true) {
|
||||
cfg.secure_processing = true;
|
||||
}
|
||||
} else if name_lit.contains("disallow-doctype-decl")
|
||||
|| any_ident("disallow-doctype-decl")
|
||||
{
|
||||
if value == Some(true) {
|
||||
cfg.disallow_doctype = true;
|
||||
}
|
||||
} else if (name_lit.contains("external-general-entities")
|
||||
|| name_lit.contains("external-parameter-entities")
|
||||
|| name_lit.contains("load-external-dtd")
|
||||
|| any_ident("external-general-entities")
|
||||
|| any_ident("external-parameter-entities")
|
||||
|| any_ident("load-external-dtd"))
|
||||
&& value == Some(false)
|
||||
{
|
||||
cfg.disallow_doctype = true;
|
||||
}
|
||||
if cfg == XmlParserConfig::default() {
|
||||
ConfigEffect::None
|
||||
} else {
|
||||
ConfigEffect::UpdateReceiver(cfg)
|
||||
}
|
||||
}
|
||||
// `factory.setExpandEntityReferences(false)` —
|
||||
// DocumentBuilderFactory legacy hardening switch.
|
||||
"setExpandEntityReferences" => {
|
||||
if receiver.is_none() {
|
||||
return ConfigEffect::None;
|
||||
}
|
||||
if arg_bool(0) == Some(false) {
|
||||
ConfigEffect::UpdateReceiver(XmlParserConfig {
|
||||
disallow_doctype: true,
|
||||
..Default::default()
|
||||
})
|
||||
} else {
|
||||
ConfigEffect::None
|
||||
}
|
||||
}
|
||||
// `factory.newDocumentBuilder()` / `factory.newSAXParser()` /
|
||||
// `parser.getXMLReader()` propagate the hardening state from
|
||||
// the factory (receiver) onto the produced parser instance
|
||||
// (return value). Without this propagation, a hardened
|
||||
// factory's child builder would parse with no config.
|
||||
"newDocumentBuilder" | "newSAXParser" | "getXMLReader" | "newXMLReader" => {
|
||||
if receiver.is_some() {
|
||||
ConfigEffect::InheritFromReceiver
|
||||
} else {
|
||||
ConfigEffect::None
|
||||
}
|
||||
}
|
||||
_ => ConfigEffect::None,
|
||||
},
|
||||
Lang::Python => {
|
||||
// `lxml.etree.XMLParser(resolve_entities=False)` — the lxml
|
||||
// parser default resolves entities; the keyword argument
|
||||
// changes that. Const-propagation will not generally see the
|
||||
// kwarg value here (kwargs land in `info.call.kwargs`, not
|
||||
// positional args), so we treat the constructor as a
|
||||
// best-effort initialiser keyed off the keyword's literal
|
||||
// text via the static-map. When neither keyword surfaces,
|
||||
// the parser keeps the default-empty config.
|
||||
if callee.ends_with("etree.XMLParser") || suffix == "XMLParser" {
|
||||
// Positional kwargs aren't reliable here; rely on the
|
||||
// call's static-map kwargs (handled by the per-callsite
|
||||
// pass below). Fall through to None at this layer.
|
||||
ConfigEffect::None
|
||||
} else {
|
||||
ConfigEffect::None
|
||||
}
|
||||
}
|
||||
_ => ConfigEffect::None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the XML-parser config analysis on an SSA body.
|
||||
pub fn analyze_xml_parser_config(
|
||||
body: &SsaBody,
|
||||
cfg: &Cfg,
|
||||
consts: &HashMap<SsaValue, ConstLattice>,
|
||||
lang: Option<Lang>,
|
||||
) -> XmlParserConfigResult {
|
||||
let Some(lang) = lang else {
|
||||
return XmlParserConfigResult::default();
|
||||
};
|
||||
|
||||
let mut configs: HashMap<SsaValue, XmlParserConfig> = HashMap::new();
|
||||
|
||||
// Helper: read the kwargs attached to the original CFG node for the
|
||||
// call instruction at hand. Used for languages where parser
|
||||
// hardening flags arrive as keyword arguments (Python lxml).
|
||||
let lookup_kwargs = |node_idx: petgraph::graph::NodeIndex| -> Vec<(String, Vec<String>)> {
|
||||
cfg.node_weight(node_idx)
|
||||
.map(|ni| ni.call.kwargs.clone())
|
||||
.unwrap_or_default()
|
||||
};
|
||||
// Helper: read the positional arg-use identifier vectors (e.g.
|
||||
// `XMLConstants.FEATURE_SECURE_PROCESSING` surfaces as a dotted path
|
||||
// here even when const-prop folds it to nothing).
|
||||
let lookup_arg_idents = |node_idx: petgraph::graph::NodeIndex| -> Vec<Vec<String>> {
|
||||
cfg.node_weight(node_idx)
|
||||
.map(|ni| ni.call.arg_uses.clone())
|
||||
.unwrap_or_default()
|
||||
};
|
||||
// Helper: read the per-position literal-token vector
|
||||
// (`arg_string_literals` lifts strings, booleans, null tokens, and
|
||||
// numeric tokens — see `extract_arg_string_literals`).
|
||||
let lookup_arg_literals = |node_idx: petgraph::graph::NodeIndex| -> Vec<Option<String>> {
|
||||
cfg.node_weight(node_idx)
|
||||
.map(|ni| ni.call.arg_string_literals.clone())
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
// Pass 1 — direct effects from Call instructions in source order.
|
||||
// Setter updates and constructor seeds are effectively monotone
|
||||
// (we OR safe flags onto the receiver / value), so a single pass is
|
||||
// sufficient when phi nodes only appear after the setter. Pass 2
|
||||
// below handles phi/copy propagation.
|
||||
for block in &body.blocks {
|
||||
for inst in block.body.iter() {
|
||||
if let SsaOp::Call {
|
||||
callee,
|
||||
args,
|
||||
receiver,
|
||||
..
|
||||
} = &inst.op
|
||||
{
|
||||
// Python lxml.etree.XMLParser(resolve_entities=...): the
|
||||
// kwarg lives on the CFG node's `kwargs` list, not in
|
||||
// the SSA Call args. Inspect it directly.
|
||||
if matches!(lang, Lang::Python)
|
||||
&& (callee.ends_with("etree.XMLParser")
|
||||
|| callee.rsplit(['.', ':']).next() == Some("XMLParser"))
|
||||
{
|
||||
let kwargs = lookup_kwargs(inst.cfg_node);
|
||||
for (name, values) in &kwargs {
|
||||
if name == "resolve_entities" {
|
||||
// Look up the literal text on the matching
|
||||
// argument; tree-sitter-python keywords surface
|
||||
// the value identifier in the `values` slot.
|
||||
if values.iter().any(|v| v == "True" || v == "true") {
|
||||
let entry = configs.entry(inst.value).or_default();
|
||||
entry.external_entities = true;
|
||||
} else if values.iter().any(|v| v == "False" || v == "false") {
|
||||
let entry = configs.entry(inst.value).or_default();
|
||||
entry.disallow_doctype = true;
|
||||
}
|
||||
}
|
||||
if name == "no_network" && values.iter().any(|v| v == "True" || v == "true")
|
||||
{
|
||||
let entry = configs.entry(inst.value).or_default();
|
||||
entry.disallow_doctype = true;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// JS/TS: `new XMLParser({ processEntities: true, ... })`.
|
||||
// The fast-xml-parser constructor's option-object fields
|
||||
// are not exposed via const-prop, but the CFG layer
|
||||
// captures string-literal kwargs in the call's
|
||||
// `arg_string_literals` for object-literal positions.
|
||||
// For now, mark the result as unsafe-explicit only when
|
||||
// the static-kwargs list carries `processEntities=true`.
|
||||
if matches!(lang, Lang::JavaScript | Lang::TypeScript)
|
||||
&& (callee.ends_with("XMLParser") || callee.ends_with(".XMLParser"))
|
||||
{
|
||||
let kwargs = lookup_kwargs(inst.cfg_node);
|
||||
for (name, values) in &kwargs {
|
||||
if name == "processEntities" && values.iter().any(|v| v == "true") {
|
||||
let entry = configs.entry(inst.value).or_default();
|
||||
entry.external_entities = true;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let arg_idents = lookup_arg_idents(inst.cfg_node);
|
||||
let arg_literals = lookup_arg_literals(inst.cfg_node);
|
||||
match classify_call(
|
||||
lang,
|
||||
callee,
|
||||
args,
|
||||
*receiver,
|
||||
consts,
|
||||
&arg_idents,
|
||||
&arg_literals,
|
||||
) {
|
||||
ConfigEffect::None => {}
|
||||
ConfigEffect::UpdateReceiver(delta) => {
|
||||
if let Some(rv) = *receiver {
|
||||
let entry = configs.entry(rv).or_default();
|
||||
*entry = entry.union(&delta);
|
||||
}
|
||||
}
|
||||
ConfigEffect::InheritFromReceiver => {
|
||||
if let Some(rv) = *receiver
|
||||
&& let Some(parent) = configs.get(&rv).copied()
|
||||
{
|
||||
let entry = configs.entry(inst.value).or_default();
|
||||
*entry = entry.union(&parent);
|
||||
}
|
||||
}
|
||||
ConfigEffect::SeedResult(seed) => {
|
||||
let entry = configs.entry(inst.value).or_default();
|
||||
*entry = entry.union(&seed);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pass 2 — fixed-point propagation through copy assignments and phi
|
||||
// joins. Caps the iteration count: in practice 2-3 rounds suffice
|
||||
// on intra-procedural shapes.
|
||||
for _ in 0..6 {
|
||||
let mut changed = false;
|
||||
for block in &body.blocks {
|
||||
for inst in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &inst.op {
|
||||
let mut acc: Option<XmlParserConfig> = None;
|
||||
for (_, val) in operands {
|
||||
let cfg_val = configs.get(val).copied().unwrap_or_default();
|
||||
acc = Some(match acc {
|
||||
None => cfg_val,
|
||||
Some(prev) => prev.meet(&cfg_val),
|
||||
});
|
||||
}
|
||||
if let Some(joined) = acc
|
||||
&& joined != XmlParserConfig::default()
|
||||
{
|
||||
let prev = configs.get(&inst.value).copied();
|
||||
if prev != Some(joined) {
|
||||
configs.insert(inst.value, joined);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for inst in &block.body {
|
||||
if let SsaOp::Assign(uses) = &inst.op
|
||||
&& uses.len() == 1
|
||||
&& let Some(src_cfg) = configs.get(&uses[0]).copied()
|
||||
&& src_cfg != XmlParserConfig::default()
|
||||
{
|
||||
let prev = configs.get(&inst.value).copied().unwrap_or_default();
|
||||
let new_cfg = prev.union(&src_cfg);
|
||||
if Some(new_cfg) != configs.get(&inst.value).copied() {
|
||||
configs.insert(inst.value, new_cfg);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
// InheritFromReceiver may need a re-pass when the
|
||||
// receiver's config was set after the call itself was
|
||||
// visited (e.g. the call appears in a later block whose
|
||||
// dominator chain only resolves on the second iteration).
|
||||
if let SsaOp::Call {
|
||||
callee,
|
||||
receiver: Some(rv),
|
||||
..
|
||||
} = &inst.op
|
||||
{
|
||||
let suffix = callee.rsplit(['.', ':']).next().unwrap_or(callee);
|
||||
let inherit = matches!(lang, Lang::Java)
|
||||
&& matches!(
|
||||
suffix,
|
||||
"newDocumentBuilder" | "newSAXParser" | "getXMLReader" | "newXMLReader"
|
||||
);
|
||||
if inherit && let Some(parent) = configs.get(rv).copied() {
|
||||
let prev = configs.get(&inst.value).copied().unwrap_or_default();
|
||||
let new_cfg = prev.union(&parent);
|
||||
if Some(new_cfg) != configs.get(&inst.value).copied()
|
||||
&& new_cfg != XmlParserConfig::default()
|
||||
{
|
||||
configs.insert(inst.value, new_cfg);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !changed {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
XmlParserConfigResult { configs }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_config_is_unsafe() {
|
||||
let c = XmlParserConfig::default();
|
||||
assert!(!c.is_secure());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn secure_processing_alone_is_safe() {
|
||||
let c = XmlParserConfig {
|
||||
secure_processing: true,
|
||||
..Default::default()
|
||||
};
|
||||
assert!(c.is_secure());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn external_entities_overrides_safe_flag() {
|
||||
let c = XmlParserConfig {
|
||||
secure_processing: true,
|
||||
external_entities: true,
|
||||
..Default::default()
|
||||
};
|
||||
assert!(!c.is_secure());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn meet_keeps_only_intersection_of_safe_flags() {
|
||||
let a = XmlParserConfig {
|
||||
secure_processing: true,
|
||||
disallow_doctype: true,
|
||||
..Default::default()
|
||||
};
|
||||
let b = XmlParserConfig {
|
||||
secure_processing: true,
|
||||
..Default::default()
|
||||
};
|
||||
let m = a.meet(&b);
|
||||
assert!(m.secure_processing);
|
||||
assert!(!m.disallow_doctype);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn meet_propagates_unsafe_flag() {
|
||||
let a = XmlParserConfig {
|
||||
secure_processing: true,
|
||||
..Default::default()
|
||||
};
|
||||
let b = XmlParserConfig {
|
||||
external_entities: true,
|
||||
..Default::default()
|
||||
};
|
||||
let m = a.meet(&b);
|
||||
// Unsafe sticky → no longer secure even though one branch was.
|
||||
assert!(!m.is_secure());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xxe_safe_returns_false_without_receiver() {
|
||||
let result = XmlParserConfigResult::default();
|
||||
assert!(!xxe_safe(None, &result));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xxe_safe_uses_receiver_config() {
|
||||
let mut configs = HashMap::new();
|
||||
configs.insert(
|
||||
SsaValue(7),
|
||||
XmlParserConfig {
|
||||
secure_processing: true,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let result = XmlParserConfigResult { configs };
|
||||
assert!(xxe_safe(Some(SsaValue(7)), &result));
|
||||
assert!(!xxe_safe(Some(SsaValue(8)), &result));
|
||||
}
|
||||
}
|
||||
235
src/ssa/xpath_config.rs
Normal file
235
src/ssa/xpath_config.rs
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
//! Per-SSA-value XPath-receiver configuration tracking.
|
||||
//!
|
||||
//! Mirrors [`crate::ssa::xml_config`] but for `XPath` instances rather
|
||||
//! than JAXP parser instances. Tracks "is this XPath receiver bound to
|
||||
//! an `XPathVariableResolver`" along the control-flow path: when a
|
||||
//! resolver has been bound, subsequent `xpath.evaluate(expr, ...)` calls
|
||||
//! are treated as parameterised and the `XPATH_INJECTION` bit is
|
||||
//! stripped from the sink's cap mask.
|
||||
//!
|
||||
//! Same engine shape as [`crate::ssa::xml_config::XmlParserConfigResult`]:
|
||||
//! a small forward dataflow run alongside type-fact analysis. Phi nodes
|
||||
//! propagate the meet of operand configs (a flag is "set" only when all
|
||||
//! reaching operands set it), copy assignments propagate the receiver's
|
||||
//! config, and `setXPathVariableResolver` calls update the receiver's
|
||||
//! config in place.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use super::ir::*;
|
||||
use crate::cfg::Cfg;
|
||||
use crate::symbol::Lang;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Receiver-instance config carried forward from `setXPathVariableResolver`
|
||||
/// calls. All flags default to `false` (resolver not bound). A `true`
|
||||
/// flag means: we have proven this XPath receiver was configured for
|
||||
/// parameterised evaluation along this control-flow path.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct XPathReceiverConfig {
|
||||
/// True when `xpath.setXPathVariableResolver(...)` has been called
|
||||
/// on this receiver. Set by Pass 1 on the receiver SSA value;
|
||||
/// propagated through phi joins (meet) and copy assignments (union).
|
||||
pub has_resolver: bool,
|
||||
}
|
||||
|
||||
impl XPathReceiverConfig {
|
||||
/// True when the receiver is provably bound to a variable resolver.
|
||||
pub fn is_parameterised(&self) -> bool {
|
||||
self.has_resolver
|
||||
}
|
||||
|
||||
/// Phi-meet: a flag survives only when *both* operands set it. Used
|
||||
/// when the XPath variable was reassigned across branches and only
|
||||
/// some branches bound a resolver.
|
||||
fn meet(&self, other: &Self) -> Self {
|
||||
XPathReceiverConfig {
|
||||
has_resolver: self.has_resolver && other.has_resolver,
|
||||
}
|
||||
}
|
||||
|
||||
/// Union: caller binds a resolver after a copy / phi-join. Any
|
||||
/// branch setting the flag wins for the union (used for copy
|
||||
/// propagation, which preserves the source value's flags).
|
||||
fn union(&self, other: &Self) -> Self {
|
||||
XPathReceiverConfig {
|
||||
has_resolver: self.has_resolver || other.has_resolver,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of XPath-receiver config analysis.
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
||||
pub struct XPathConfigResult {
|
||||
pub configs: HashMap<SsaValue, XPathReceiverConfig>,
|
||||
}
|
||||
|
||||
impl XPathConfigResult {
|
||||
/// True when the value carries a config fact proving resolver
|
||||
/// binding.
|
||||
pub fn is_parameterised(&self, v: SsaValue) -> bool {
|
||||
self.configs.get(&v).is_some_and(|c| c.is_parameterised())
|
||||
}
|
||||
}
|
||||
|
||||
/// Suppress the `Cap::XPATH_INJECTION` bit when the receiver of an XPath
|
||||
/// `evaluate` / `compile` sink was provably bound to a variable
|
||||
/// resolver. Returns `true` when XPATH_INJECTION should be stripped
|
||||
/// from the sink's cap mask.
|
||||
///
|
||||
/// Conservative defaults:
|
||||
/// * No receiver SSA value (free function) → returns `false` (cannot
|
||||
/// prove safety, fall through to existing classification).
|
||||
/// * Receiver carries no config fact → returns `false`.
|
||||
pub fn xpath_safe(receiver: Option<SsaValue>, xpath_config: &XPathConfigResult) -> bool {
|
||||
let Some(rv) = receiver else {
|
||||
return false;
|
||||
};
|
||||
xpath_config.is_parameterised(rv)
|
||||
}
|
||||
|
||||
/// Run the XPath-receiver config analysis on an SSA body.
|
||||
///
|
||||
/// Currently models Java's `setXPathVariableResolver` only — the only
|
||||
/// language-level resolver-binding API for XPath in the existing
|
||||
/// detection corpus. PHP's `DOMXPath::registerPhpFunctions()` is a
|
||||
/// different mechanism (PHP function registration) and not modelled
|
||||
/// here.
|
||||
pub fn analyze_xpath_config(body: &SsaBody, cfg: &Cfg, lang: Option<Lang>) -> XPathConfigResult {
|
||||
let Some(lang) = lang else {
|
||||
return XPathConfigResult::default();
|
||||
};
|
||||
if !matches!(lang, Lang::Java) {
|
||||
return XPathConfigResult::default();
|
||||
}
|
||||
|
||||
let mut configs: HashMap<SsaValue, XPathReceiverConfig> = HashMap::new();
|
||||
|
||||
// Pass 1 — direct effects from Call instructions in source order.
|
||||
// `setXPathVariableResolver` updates the call's receiver in place;
|
||||
// any non-null argument is treated as a resolver binding. Argument
|
||||
// null-check would require a const-prop fact, but the conservative
|
||||
// direction here is to assume the bound value is non-null (matches the
|
||||
// XML parser-config setter semantics).
|
||||
for block in &body.blocks {
|
||||
for inst in block.body.iter() {
|
||||
if let SsaOp::Call {
|
||||
callee, receiver, ..
|
||||
} = &inst.op
|
||||
{
|
||||
let suffix = callee.rsplit(['.', ':']).next().unwrap_or(callee);
|
||||
if suffix == "setXPathVariableResolver"
|
||||
&& let Some(rv) = receiver
|
||||
{
|
||||
let entry = configs.entry(*rv).or_default();
|
||||
entry.has_resolver = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if configs.is_empty() {
|
||||
return XPathConfigResult::default();
|
||||
}
|
||||
|
||||
// Pass 2 — fixed-point propagation through copy assignments and
|
||||
// phi joins. Caps the iteration count: in practice 2-3 rounds
|
||||
// suffice on intra-procedural shapes.
|
||||
let _ = cfg; // CFG retained for parity with `xml_config`; reserved for
|
||||
// future kwarg-driven seeds (e.g. constructor options).
|
||||
for _ in 0..6 {
|
||||
let mut changed = false;
|
||||
for block in &body.blocks {
|
||||
for inst in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &inst.op {
|
||||
let mut acc: Option<XPathReceiverConfig> = None;
|
||||
for (_, val) in operands {
|
||||
let cfg_val = configs.get(val).copied().unwrap_or_default();
|
||||
acc = Some(match acc {
|
||||
None => cfg_val,
|
||||
Some(prev) => prev.meet(&cfg_val),
|
||||
});
|
||||
}
|
||||
if let Some(joined) = acc
|
||||
&& joined != XPathReceiverConfig::default()
|
||||
{
|
||||
let prev = configs.get(&inst.value).copied();
|
||||
if prev != Some(joined) {
|
||||
configs.insert(inst.value, joined);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for inst in &block.body {
|
||||
if let SsaOp::Assign(uses) = &inst.op
|
||||
&& uses.len() == 1
|
||||
&& let Some(src_cfg) = configs.get(&uses[0]).copied()
|
||||
&& src_cfg != XPathReceiverConfig::default()
|
||||
{
|
||||
let prev = configs.get(&inst.value).copied().unwrap_or_default();
|
||||
let new_cfg = prev.union(&src_cfg);
|
||||
if Some(new_cfg) != configs.get(&inst.value).copied() {
|
||||
configs.insert(inst.value, new_cfg);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !changed {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
XPathConfigResult { configs }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_config_is_unparameterised() {
|
||||
let c = XPathReceiverConfig::default();
|
||||
assert!(!c.is_parameterised());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn has_resolver_marks_parameterised() {
|
||||
let c = XPathReceiverConfig { has_resolver: true };
|
||||
assert!(c.is_parameterised());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn meet_keeps_intersection() {
|
||||
let a = XPathReceiverConfig { has_resolver: true };
|
||||
let b = XPathReceiverConfig {
|
||||
has_resolver: false,
|
||||
};
|
||||
let m = a.meet(&b);
|
||||
assert!(!m.has_resolver);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn meet_both_set_keeps_set() {
|
||||
let a = XPathReceiverConfig { has_resolver: true };
|
||||
let b = XPathReceiverConfig { has_resolver: true };
|
||||
let m = a.meet(&b);
|
||||
assert!(m.has_resolver);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xpath_safe_returns_false_without_receiver() {
|
||||
let result = XPathConfigResult::default();
|
||||
assert!(!xpath_safe(None, &result));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xpath_safe_uses_receiver_config() {
|
||||
let mut configs = HashMap::new();
|
||||
configs.insert(SsaValue(7), XPathReceiverConfig { has_resolver: true });
|
||||
let result = XPathConfigResult { configs };
|
||||
assert!(xpath_safe(Some(SsaValue(7)), &result));
|
||||
assert!(!xpath_safe(Some(SsaValue(8)), &result));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue