Performance and precision pass (#64)

This commit is contained in:
Eli Peter 2026-05-04 19:58:04 -04:00 committed by GitHub
parent c7c5e0f3a1
commit fb698d2c27
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
97 changed files with 9932 additions and 517 deletions

View file

@ -1,6 +1,7 @@
use std::collections::{HashMap, HashSet, VecDeque};
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use super::ir::*;
@ -96,40 +97,56 @@ pub struct ConstPropResult {
}
/// Run Sparse Conditional Constant Propagation on an SSA body.
///
/// Internal storage is dense `Vec`-indexed by [`SsaValue`] / [`BlockId`] to
/// avoid the per-lookup `SipHash` cost of `HashMap<SsaValue, _>` /
/// `HashSet<(BlockId, BlockId)>` that previously dominated the inner
/// fixed-point loop. The public [`ConstPropResult`] still exposes the
/// `HashMap`-shaped contract; the conversion at the end of the function is
/// O(num_values) and runs once.
pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
let num_blocks = body.blocks.len();
let num_values = body.value_defs.len();
// Per-value lattice: starts at Top
let mut values: HashMap<SsaValue, ConstLattice> = HashMap::new();
// Dense per-value lattice (`Vec` indexed by `SsaValue.0`). All values
// are defined by exactly one inst (phi or body), so initialising the
// entire range to Top is equivalent to the previous per-inst insert
// pass at strictly lower cost (no hashing).
let mut values: Vec<ConstLattice> = vec![ConstLattice::Top; num_values];
// Executable flags per CFG edge (from_block, to_block)
let mut executable_edges: HashSet<(BlockId, BlockId)> = HashSet::new();
// Executable blocks
let mut executable_blocks: HashSet<BlockId> = HashSet::new();
// Per-block executability and per-(dest, pred) executable-edge bitmap.
// Edges are stored as a per-destination list of executable predecessors
// — phi evaluation only ever asks "is `(pred, this_block)` executable?",
// so a tiny SmallVec scan over the dest's predecessors beats a
// `HashSet<(BlockId, BlockId)>::contains` (which hashes a 64-bit pair
// for every operand of every phi).
let mut executable_blocks: Vec<bool> = vec![false; num_blocks];
let mut executable_preds: Vec<SmallVec<[BlockId; 2]>> = vec![SmallVec::new(); num_blocks];
// Two worklists
// Worklists
let mut cfg_worklist: VecDeque<BlockId> = VecDeque::new();
let mut ssa_worklist: VecDeque<SsaValue> = VecDeque::new();
// Mark entry executable
executable_blocks.insert(body.entry);
executable_blocks[body.entry.0 as usize] = true;
cfg_worklist.push_back(body.entry);
// Build use-map: SsaValue → list of (BlockId, instruction index in block)
// so we can propagate SSA value changes efficiently.
let mut use_sites: HashMap<SsaValue, Vec<BlockId>> = HashMap::new();
// Use-map: dense `Vec` indexed by `SsaValue.0`. Populated in a single
// pass via the closure-based [`inst_uses_each`] helper, which avoids
// the heap allocation of the prior `inst_uses() -> Vec<SsaValue>`
// factory.
let mut use_sites: Vec<SmallVec<[BlockId; 2]>> = vec![SmallVec::new(); num_values];
for block in &body.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
for used_val in inst_uses(inst) {
use_sites.entry(used_val).or_default().push(block.id);
}
}
}
// Initialize all values to Top
for block in &body.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
values.insert(inst.value, ConstLattice::Top);
inst_uses_each(inst, |used_val| {
let idx = used_val.0 as usize;
if idx < use_sites.len() {
let bucket = &mut use_sites[idx];
if bucket.last() != Some(&block.id) {
bucket.push(block.id);
}
}
});
}
}
@ -144,10 +161,10 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
// Evaluate phis
for phi in &block.phis {
if let SsaOp::Phi(operands) = &phi.op {
let old = values.get(&phi.value).cloned().unwrap_or(ConstLattice::Top);
let new_val = eval_phi(operands, &values, &executable_edges, block_id);
let old = lookup(&values, phi.value);
let new_val = eval_phi(operands, &values, &executable_preds, block_id);
if new_val != old {
values.insert(phi.value, new_val);
store(&mut values, phi.value, new_val);
ssa_worklist.push_back(phi.value);
changed = true;
}
@ -156,13 +173,10 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
// Evaluate body instructions
for inst in &block.body {
let old = values
.get(&inst.value)
.cloned()
.unwrap_or(ConstLattice::Top);
let old = lookup(&values, inst.value);
let new_val = eval_inst(inst, &values);
if new_val != old {
values.insert(inst.value, new_val);
store(&mut values, inst.value, new_val);
ssa_worklist.push_back(inst.value);
changed = true;
}
@ -173,7 +187,7 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
block,
body,
&values,
&mut executable_edges,
&mut executable_preds,
&mut executable_blocks,
&mut cfg_worklist,
);
@ -181,54 +195,57 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
// Process SSA worklist
while let Some(val) = ssa_worklist.pop_front() {
if let Some(blocks) = use_sites.get(&val) {
for &block_id in blocks {
if !executable_blocks.contains(&block_id) {
continue;
}
let block = body.block(block_id);
// Re-evaluate phis using this value
for phi in &block.phis {
if let SsaOp::Phi(operands) = &phi.op
&& operands.iter().any(|(_, v)| *v == val)
{
let old = values.get(&phi.value).cloned().unwrap_or(ConstLattice::Top);
let new_val = eval_phi(operands, &values, &executable_edges, block_id);
if new_val != old {
values.insert(phi.value, new_val);
ssa_worklist.push_back(phi.value);
changed = true;
}
}
}
// Re-evaluate body instructions using this value
for inst in &block.body {
if inst_uses(inst).contains(&val) {
let old = values
.get(&inst.value)
.cloned()
.unwrap_or(ConstLattice::Top);
let new_val = eval_inst(inst, &values);
if new_val != old {
values.insert(inst.value, new_val);
ssa_worklist.push_back(inst.value);
changed = true;
}
}
}
// Re-evaluate terminator if condition changed
process_terminator(
block,
body,
&values,
&mut executable_edges,
&mut executable_blocks,
&mut cfg_worklist,
);
let val_idx = val.0 as usize;
if val_idx >= use_sites.len() {
continue;
}
// Snapshot the use-list so we can borrow `values` mutably
// while iterating block ids. The list is short (typically
// 13 blocks) so the clone is cheap.
let use_blocks = use_sites[val_idx].clone();
for block_id in use_blocks {
if !executable_blocks[block_id.0 as usize] {
continue;
}
let block = body.block(block_id);
// Re-evaluate phis using this value
for phi in &block.phis {
if let SsaOp::Phi(operands) = &phi.op
&& operands.iter().any(|(_, v)| *v == val)
{
let old = lookup(&values, phi.value);
let new_val = eval_phi(operands, &values, &executable_preds, block_id);
if new_val != old {
store(&mut values, phi.value, new_val);
ssa_worklist.push_back(phi.value);
changed = true;
}
}
}
// Re-evaluate body instructions using this value
for inst in &block.body {
if inst_has_use(inst, val) {
let old = lookup(&values, inst.value);
let new_val = eval_inst(inst, &values);
if new_val != old {
store(&mut values, inst.value, new_val);
ssa_worklist.push_back(inst.value);
changed = true;
}
}
}
// Re-evaluate terminator if condition changed
process_terminator(
block,
body,
&values,
&mut executable_preds,
&mut executable_blocks,
&mut cfg_worklist,
);
}
}
@ -237,44 +254,79 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
}
}
// Compute unreachable blocks
let unreachable_blocks: HashSet<BlockId> = (0..num_blocks)
.map(|i| BlockId(i as u32))
.filter(|bid| !executable_blocks.contains(bid))
.collect();
// Convert dense storage to the public `HashMap`-shaped result. Walks
// the value vector exactly once. The unreachable-blocks set is small
// (often empty), so building it from a linear scan is fine.
let mut out_values: HashMap<SsaValue, ConstLattice> = HashMap::with_capacity(num_values);
for (i, v) in values.into_iter().enumerate() {
out_values.insert(SsaValue(i as u32), v);
}
let mut unreachable_blocks: HashSet<BlockId> = HashSet::new();
for (i, exec) in executable_blocks.iter().enumerate() {
if !exec {
unreachable_blocks.insert(BlockId(i as u32));
}
}
ConstPropResult {
values,
values: out_values,
unreachable_blocks,
}
}
/// Dense lattice lookup. Returns Top for out-of-range values to match the
/// pre-refactor `HashMap::get(&v).cloned().unwrap_or(Top)` semantics.
#[inline]
fn lookup(values: &[ConstLattice], v: SsaValue) -> ConstLattice {
values
.get(v.0 as usize)
.cloned()
.unwrap_or(ConstLattice::Top)
}
/// Dense lattice store. Out-of-range writes are silently dropped to
/// preserve robustness against malformed SSA input — the prior HashMap
/// path would have inserted a stray entry; the dense path leaves it
/// implicit (Top). Either way the value is unobservable downstream
/// because no use-map entry would point at it.
#[inline]
fn store(values: &mut [ConstLattice], v: SsaValue, val: ConstLattice) {
let idx = v.0 as usize;
if idx < values.len() {
values[idx] = val;
}
}
/// Evaluate a phi: meet of operands from executable predecessors.
fn eval_phi(
operands: &[(BlockId, SsaValue)],
values: &HashMap<SsaValue, ConstLattice>,
executable_edges: &HashSet<(BlockId, BlockId)>,
values: &[ConstLattice],
executable_preds: &[SmallVec<[BlockId; 2]>],
this_block: BlockId,
) -> ConstLattice {
let preds = executable_preds
.get(this_block.0 as usize)
.map(|p| p.as_slice())
.unwrap_or(&[]);
let mut result = ConstLattice::Top;
for (pred_block, val) in operands {
if !executable_edges.contains(&(*pred_block, this_block)) {
if !preds.contains(pred_block) {
continue; // skip non-executable predecessors
}
let operand_val = values.get(val).cloned().unwrap_or(ConstLattice::Top);
let operand_val = lookup(values, *val);
result = result.meet(&operand_val);
}
result
}
/// Evaluate a single instruction.
fn eval_inst(inst: &SsaInst, values: &HashMap<SsaValue, ConstLattice>) -> ConstLattice {
fn eval_inst(inst: &SsaInst, values: &[ConstLattice]) -> ConstLattice {
match &inst.op {
SsaOp::Const(Some(text)) => ConstLattice::parse(text),
SsaOp::Const(None) => ConstLattice::Varying, // unknown constant
SsaOp::Assign(uses) if uses.len() == 1 => {
// Copy: propagate the source's value
values.get(&uses[0]).cloned().unwrap_or(ConstLattice::Top)
lookup(values, uses[0])
}
SsaOp::Assign(_) => ConstLattice::Varying, // expression with multiple uses
SsaOp::Call { .. }
@ -297,29 +349,69 @@ fn eval_inst(inst: &SsaInst, values: &HashMap<SsaValue, ConstLattice>) -> ConstL
}
}
/// Collect SSA values used by an instruction (for use-map building).
fn inst_uses(inst: &SsaInst) -> Vec<SsaValue> {
/// Apply a closure to every SSA value used by an instruction. Avoids the
/// `Vec<SsaValue>` heap allocation that the previous `inst_uses(inst)`
/// helper paid on every call (use-map build is O(num_insts), the prior
/// path bottle-necked there).
#[inline]
fn inst_uses_each<F: FnMut(SsaValue)>(inst: &SsaInst, mut f: F) {
match &inst.op {
SsaOp::Phi(operands) => operands.iter().map(|(_, v)| *v).collect(),
SsaOp::Assign(uses) => uses.to_vec(),
SsaOp::Phi(operands) => {
for (_, v) in operands {
f(*v);
}
}
SsaOp::Assign(uses) => {
for v in uses {
f(*v);
}
}
SsaOp::Call { args, receiver, .. } => {
let mut vals = Vec::new();
if let Some(rv) = receiver {
vals.push(*rv);
f(*rv);
}
for arg in args {
vals.extend(arg.iter());
for v in arg {
f(*v);
}
}
vals
}
SsaOp::FieldProj { receiver, .. } => vec![*receiver],
SsaOp::FieldProj { receiver, .. } => f(*receiver),
SsaOp::Source
| SsaOp::Const(_)
| SsaOp::Param { .. }
| SsaOp::SelfParam
| SsaOp::CatchParam
| SsaOp::Nop
| SsaOp::Undef => Vec::new(),
| SsaOp::Undef => {}
}
}
/// Zero-allocation predicate: does `inst` use `target` as an operand?
/// Replaces the prior `inst_uses(inst).contains(&target)` shape, which
/// allocated a fresh `Vec<SsaValue>` on every check inside the SCCP
/// re-evaluation worklist.
#[inline]
fn inst_has_use(inst: &SsaInst, target: SsaValue) -> bool {
match &inst.op {
SsaOp::Phi(operands) => operands.iter().any(|(_, v)| *v == target),
SsaOp::Assign(uses) => uses.contains(&target),
SsaOp::Call { args, receiver, .. } => {
if let Some(rv) = receiver
&& *rv == target
{
return true;
}
args.iter().any(|arg| arg.contains(&target))
}
SsaOp::FieldProj { receiver, .. } => *receiver == target,
SsaOp::Source
| SsaOp::Const(_)
| SsaOp::Param { .. }
| SsaOp::SelfParam
| SsaOp::CatchParam
| SsaOp::Nop
| SsaOp::Undef => false,
}
}
@ -327,9 +419,9 @@ fn inst_uses(inst: &SsaInst) -> Vec<SsaValue> {
fn process_terminator(
block: &SsaBlock,
body: &SsaBody,
values: &HashMap<SsaValue, ConstLattice>,
executable_edges: &mut HashSet<(BlockId, BlockId)>,
executable_blocks: &mut HashSet<BlockId>,
values: &[ConstLattice],
executable_preds: &mut [SmallVec<[BlockId; 2]>],
executable_blocks: &mut [bool],
cfg_worklist: &mut VecDeque<BlockId>,
) {
match &block.terminator {
@ -343,7 +435,7 @@ fn process_terminator(
mark_edge_executable(
block.id,
target,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
@ -359,7 +451,7 @@ fn process_terminator(
let cond_val = body
.cfg_node_map
.get(cond)
.and_then(|v| values.get(v))
.map(|v| lookup(values, *v))
.and_then(|c| c.as_bool());
match cond_val {
@ -367,7 +459,7 @@ fn process_terminator(
mark_edge_executable(
block.id,
*true_blk,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
@ -376,7 +468,7 @@ fn process_terminator(
mark_edge_executable(
block.id,
*false_blk,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
@ -386,14 +478,14 @@ fn process_terminator(
mark_edge_executable(
block.id,
*true_blk,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
mark_edge_executable(
block.id,
*false_blk,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
@ -417,7 +509,7 @@ fn process_terminator(
mark_edge_executable(
block.id,
target,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
@ -432,7 +524,7 @@ fn process_terminator(
mark_edge_executable(
block.id,
target,
executable_edges,
executable_preds,
executable_blocks,
cfg_worklist,
);
@ -444,18 +536,27 @@ fn process_terminator(
fn mark_edge_executable(
from: BlockId,
to: BlockId,
executable_edges: &mut HashSet<(BlockId, BlockId)>,
executable_blocks: &mut HashSet<BlockId>,
executable_preds: &mut [SmallVec<[BlockId; 2]>],
executable_blocks: &mut [bool],
cfg_worklist: &mut VecDeque<BlockId>,
) {
if executable_edges.insert((from, to)) {
if executable_blocks.insert(to) {
cfg_worklist.push_back(to);
} else {
// Block already executable but new edge, re-evaluate phis
cfg_worklist.push_back(to);
}
let to_idx = to.0 as usize;
if to_idx >= executable_preds.len() {
return;
}
let preds = &mut executable_preds[to_idx];
if preds.contains(&from) {
return;
}
preds.push(from);
let was_already_exec = executable_blocks[to_idx];
if !was_already_exec {
executable_blocks[to_idx] = true;
}
// Always re-enqueue: either the block became newly reachable, or it
// already was but a new predecessor edge means phi operands need
// re-meeting against the now-executable predecessor.
cfg_worklist.push_back(to);
}
/// Apply constant propagation results: prune branches where condition is known constant.

View file

@ -7,6 +7,7 @@ use super::ir::*;
use crate::cfg::{BinOp, Cfg};
use crate::symbol::Lang;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
/// Inferred type kind for an SSA value.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
@ -40,6 +41,17 @@ pub enum TypeKind {
/// `label_prefix`, never participates in label-based callee
/// resolution.
LocalCollection,
/// A JPA / Hibernate Criteria API query object (`CriteriaQuery<T>`,
/// `CriteriaUpdate<T>`, `CriteriaDelete<T>`, `Subquery<T>`,
/// `TypedQuery<T>`). These objects are produced by the
/// `CriteriaBuilder` and emit parameterized SQL when handed to
/// `Session.createQuery(cq)` / `EntityManager.createQuery(cq)`. The
/// argument is structural (predicate AST), not a string, so SQL
/// injection cannot flow through it. Used to suppress the
/// `cfg-unguarded-sink` finding on `session.createQuery(cq)` shapes
/// where openmrs / xwiki / keycloak Hibernate DAOs build queries
/// via `cb.createQuery(Foo.class)` + `Root` / `Predicate` API.
JpaCriteriaQuery,
/// A framework-injected DTO body whose field types are known.
/// Populated when a parameter is recognised as a typed extractor and
/// the DTO class / struct / Pydantic model is resolvable in scope.
@ -86,6 +98,7 @@ impl TypeKind {
Self::FileHandle => Some("FileHandle"),
Self::Url => Some("URL"),
Self::RequestBuilder => Some("RequestBuilder"),
Self::JpaCriteriaQuery => Some("JpaCriteriaQuery"),
_ => None,
}
}
@ -222,6 +235,111 @@ pub fn is_type_safe_for_sink(
})
}
/// Check whether any of the sink-arg SSA values is a structural query
/// object that emits parameterized SQL by construction (currently the
/// JPA / Hibernate Criteria API: `CriteriaQuery`, `CriteriaUpdate`,
/// `CriteriaDelete`, `Subquery`, `TypedQuery`).
///
/// Used by both the SSA taint engine and the structural
/// `cfg-unguarded-sink` analysis to suppress the SQL-injection finding
/// on `session.createQuery(cq)` / `em.createQuery(cq)` / `executeUpdate`
/// shapes where the argument is a Criteria object built via
/// `CriteriaBuilder` rather than a string.
///
/// Returns `false` when `sink_caps` does not include `SQL_QUERY`, when
/// `values` is empty, or when no value carries the
/// [`TypeKind::JpaCriteriaQuery`] tag. Receiver values should be
/// excluded by the caller, the receiver of a JPA query method is the
/// `Session` / `EntityManager` channel, never the payload.
pub fn is_safe_query_object_arg(
values: &[SsaValue],
sink_caps: crate::labels::Cap,
type_facts: &TypeFactResult,
) -> bool {
use crate::labels::Cap;
if !sink_caps.intersects(Cap::SQL_QUERY) {
return false;
}
if values.is_empty() {
return false;
}
values
.iter()
.any(|v| type_facts.is_type(*v, &TypeKind::JpaCriteriaQuery))
}
/// Receiver-text-aware return-type inference for methods whose
/// constructor mapping cannot be determined from the callee suffix
/// alone.
///
/// The JPA `createQuery` suffix is overloaded between
/// `CriteriaBuilder.createQuery(Class)` (returns `CriteriaQuery`, our
/// safe-by-construction structural query object) and
/// `Session.createQuery(String|Query)` (the executable-query
/// constructor whose string overload IS a SQL sink). Class-literal
/// arg shape (e.g. `Foo.class`) doesn't surface in `arg_uses` at the
/// CFG layer, so we fall back to the receiver-text hint: if the
/// callee path includes a `CriteriaBuilder` cast or a receiver
/// variable named `cb` / `criteriaBuilder` / `builder`, treat the
/// call as the criteria-builder overload.
///
/// Conservative: returns `None` for any other shape so
/// [`constructor_type`] / `is_int_producing_callee` stay
/// authoritative, and consumers see Unknown instead of a wrong
/// type tag.
///
/// `_args` and `_consts` are kept on the signature so we can later
/// add arg-shape narrowing when class-literal lowering captures
/// `Foo.class` as an arg-use.
fn arg_aware_call_type(
lang: Lang,
callee: &str,
_args: &[SmallVec<[SsaValue; 2]>],
_consts: &HashMap<SsaValue, ConstLattice>,
) -> Option<TypeKind> {
if !matches!(lang, Lang::Java) {
return None;
}
let after_colons = callee.rsplit("::").next().unwrap_or(callee);
let suffix = after_colons.rsplit('.').next().unwrap_or(after_colons);
if suffix != "createQuery" {
return None;
}
// Strip the trailing `.createQuery` segment and inspect the
// receiver text for the criteria-builder hints. Conservative
// text-level match, the SSA layer doesn't expose receiver-type
// facts here yet.
let prefix = callee.rsplit_once('.').map(|(p, _)| p).unwrap_or(callee);
if prefix.contains("CriteriaBuilder") || receiver_is_criteria_builder(prefix) {
Some(TypeKind::JpaCriteriaQuery)
} else {
None
}
}
/// True when the receiver text identifies a CriteriaBuilder by
/// idiomatic naming (`cb`, `criteriaBuilder`, `builder`,
/// `getCriteriaBuilder()`), modulo casts and chained accesses.
fn receiver_is_criteria_builder(receiver_text: &str) -> bool {
// Drop trailing parenthesized portions and chained cast/syntax noise.
let cleaned = receiver_text
.rsplit_once(')')
.map(|(_, tail)| tail)
.unwrap_or(receiver_text)
.trim();
let cleaned = cleaned.trim_start_matches('.');
let last_segment = cleaned
.rsplit(['.', ':', ' '])
.next()
.unwrap_or(cleaned)
.trim_matches(|c: char| c == '(' || c == ')');
matches!(
last_segment,
"cb" | "criteriaBuilder" | "criteria_builder" | "builder" | "getCriteriaBuilder"
) || receiver_text.contains("getCriteriaBuilder()")
|| receiver_text.contains(".cb.")
}
/// Infer a type from a constructor, factory, or allocator call.
///
/// Maps known constructor/factory/allocator patterns to security-relevant
@ -260,6 +378,20 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
"FileInputStream" | "FileOutputStream" | "FileReader" | "FileWriter"
| "BufferedReader" | "BufferedWriter" => Some(TypeKind::FileHandle),
"getWriter" | "getOutputStream" => Some(TypeKind::HttpResponse),
// JPA / Hibernate Criteria API factory methods. These are
// unambiguous: `createCriteriaUpdate` / `createCriteriaDelete`
// / `createTupleQuery` / `subquery` exist only on
// `CriteriaBuilder` / `CriteriaQuery` and always return a
// structural query object. `createQuery` is overloaded
// (`CriteriaBuilder.createQuery(Class)` returns
// `CriteriaQuery`; `Session.createQuery(String)` returns
// `Query`), so it's gated below in
// [`infer_call_return_type_with_args`] on the arg-0 shape
// (a class literal) so we don't conflate the executable-
// query overload with the criteria builder.
"createCriteriaUpdate" | "createCriteriaDelete" | "createTupleQuery" | "subquery" => {
Some(TypeKind::JpaCriteriaQuery)
}
_ => None,
},
Lang::JavaScript | Lang::TypeScript => match suffix {
@ -687,9 +819,13 @@ pub fn analyze_types_with_param_types(
}
SsaOp::SelfParam => TypeFact::from_kind(TypeKind::Object),
SsaOp::CatchParam => TypeFact::from_kind(TypeKind::Object),
SsaOp::Call { callee, .. } => {
SsaOp::Call { callee, args, .. } => {
if let Some(ty) = lang.and_then(|l| constructor_type(l, callee)) {
TypeFact::from_kind(ty)
} else if let Some(ty) =
lang.and_then(|l| arg_aware_call_type(l, callee, args, consts))
{
TypeFact::from_kind(ty)
} else if is_int_producing_callee(callee) {
TypeFact::from_kind(TypeKind::Int)
} else {
@ -2227,4 +2363,171 @@ mod tests {
&result
));
}
// ── JPA Criteria query suppression (Phase: real-repo openmrs FP) ───
//
// These tests pin the `TypeKind::JpaCriteriaQuery` variant + the
// `is_safe_query_object_arg` predicate + the
// `arg_aware_call_type` receiver-text recogniser. Together they
// close the openmrs HibernateDAO `session.createQuery(cq)` FP
// cluster (216 → 24 cfg-unguarded-sink in openmrs).
/// `JpaCriteriaQuery` carries a label_prefix so type-qualified
/// callee resolution can attach future rules.
#[test]
fn jpa_criteria_query_label_prefix() {
assert_eq!(
TypeKind::JpaCriteriaQuery.label_prefix(),
Some("JpaCriteriaQuery")
);
}
/// `is_safe_query_object_arg` suppresses SQL_QUERY when any
/// supplied value is a `JpaCriteriaQuery`. Receiver inclusion is
/// the caller's responsibility, here we just verify the predicate.
#[test]
fn safe_query_object_arg_suppresses_sql_query() {
use crate::labels::Cap;
let mut facts = HashMap::new();
facts.insert(SsaValue(0), TypeFact::from_kind(TypeKind::JpaCriteriaQuery));
let result = TypeFactResult { facts };
assert!(is_safe_query_object_arg(
&[SsaValue(0)],
Cap::SQL_QUERY,
&result
));
// Other caps stay untouched.
assert!(!is_safe_query_object_arg(
&[SsaValue(0)],
Cap::CODE_EXEC,
&result
));
// Unknown-typed values do not trigger.
let mut facts2 = HashMap::new();
facts2.insert(SsaValue(0), TypeFact::from_kind(TypeKind::Unknown));
let result2 = TypeFactResult { facts: facts2 };
assert!(!is_safe_query_object_arg(
&[SsaValue(0)],
Cap::SQL_QUERY,
&result2
));
// Empty slice never suppresses.
assert!(!is_safe_query_object_arg(&[], Cap::SQL_QUERY, &result));
}
/// `is_safe_query_object_arg` fires when a Criteria value is mixed
/// in with other types — the predicate is `any`, not `all`, since
/// the criteria-object arg is the only injection-bearing slot for a
/// `createQuery(cq)` sink.
#[test]
fn safe_query_object_arg_fires_with_mixed_args() {
use crate::labels::Cap;
let mut facts = HashMap::new();
facts.insert(SsaValue(0), TypeFact::from_kind(TypeKind::JpaCriteriaQuery));
facts.insert(SsaValue(1), TypeFact::from_kind(TypeKind::String));
facts.insert(SsaValue(2), TypeFact::from_kind(TypeKind::Unknown));
let result = TypeFactResult { facts };
assert!(is_safe_query_object_arg(
&[SsaValue(0), SsaValue(1), SsaValue(2)],
Cap::SQL_QUERY,
&result
));
}
/// `arg_aware_call_type` maps the JPA `cb.createQuery(...)` /
/// `criteriaBuilder.createQuery(...)` / `((CriteriaBuilder)
/// x).createQuery(...)` shapes to `JpaCriteriaQuery`, distinct
/// from the overloaded `session.createQuery(...)` /
/// `em.createQuery(...)` which stays `None` (the
/// executable-query overload).
#[test]
fn arg_aware_call_type_jpa_criteria_builder_recogniser() {
let no_args: Vec<SmallVec<[SsaValue; 2]>> = vec![];
let consts: HashMap<SsaValue, ConstLattice> = HashMap::new();
// Receiver hint: bare `cb` ident.
assert_eq!(
arg_aware_call_type(Lang::Java, "cb.createQuery", &no_args, &consts),
Some(TypeKind::JpaCriteriaQuery)
);
// Receiver hint: bare `criteriaBuilder` ident.
assert_eq!(
arg_aware_call_type(Lang::Java, "criteriaBuilder.createQuery", &no_args, &consts),
Some(TypeKind::JpaCriteriaQuery)
);
// Cast in receiver text.
assert_eq!(
arg_aware_call_type(
Lang::Java,
"((CriteriaBuilder) cb).createQuery",
&no_args,
&consts
),
Some(TypeKind::JpaCriteriaQuery)
);
// Chained accessor: getCriteriaBuilder().createQuery
assert_eq!(
arg_aware_call_type(
Lang::Java,
"session.getCriteriaBuilder().createQuery",
&no_args,
&consts
),
Some(TypeKind::JpaCriteriaQuery)
);
// The executable-query overload (`session.createQuery`) does
// NOT match — receiver-text doesn't carry a CriteriaBuilder
// hint, so we leave the type as Unknown and let the
// suppression decide based on the arg-0 type fact.
assert_eq!(
arg_aware_call_type(Lang::Java, "session.createQuery", &no_args, &consts),
None
);
assert_eq!(
arg_aware_call_type(Lang::Java, "em.createQuery", &no_args, &consts),
None
);
// Non-Java langs return None.
assert_eq!(
arg_aware_call_type(Lang::Python, "cb.createQuery", &no_args, &consts),
None
);
// Other suffixes return None.
assert_eq!(
arg_aware_call_type(Lang::Java, "cb.createCriteriaUpdate", &no_args, &consts),
None
);
}
/// Unique-suffix Criteria API methods land on
/// `TypeKind::JpaCriteriaQuery` directly via [`constructor_type`]
/// without the receiver hint, since `createCriteriaUpdate` /
/// `createCriteriaDelete` / `createTupleQuery` / `subquery` exist
/// only on `CriteriaBuilder` / `CriteriaQuery` and have no
/// overload conflict.
#[test]
fn constructor_type_unique_jpa_criteria_methods() {
for suffix in &[
"createCriteriaUpdate",
"createCriteriaDelete",
"createTupleQuery",
"subquery",
] {
assert_eq!(
constructor_type(Lang::Java, suffix),
Some(TypeKind::JpaCriteriaQuery),
"suffix `{suffix}` must map to JpaCriteriaQuery"
);
// Same suffix prefixed by an arbitrary receiver still maps.
assert_eq!(
constructor_type(Lang::Java, &format!("cb.{suffix}")),
Some(TypeKind::JpaCriteriaQuery)
);
}
// Non-criteria methods unaffected.
assert_eq!(
constructor_type(Lang::Java, "session.createQuery"),
None,
"createQuery is overloaded — must not map at constructor_type level"
);
}
}