mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Performance and precision pass (#64)
This commit is contained in:
parent
c7c5e0f3a1
commit
fb698d2c27
97 changed files with 9932 additions and 517 deletions
|
|
@ -1,6 +1,7 @@
|
|||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
use super::ir::*;
|
||||
|
||||
|
|
@ -96,40 +97,56 @@ pub struct ConstPropResult {
|
|||
}
|
||||
|
||||
/// Run Sparse Conditional Constant Propagation on an SSA body.
|
||||
///
|
||||
/// Internal storage is dense `Vec`-indexed by [`SsaValue`] / [`BlockId`] to
|
||||
/// avoid the per-lookup `SipHash` cost of `HashMap<SsaValue, _>` /
|
||||
/// `HashSet<(BlockId, BlockId)>` that previously dominated the inner
|
||||
/// fixed-point loop. The public [`ConstPropResult`] still exposes the
|
||||
/// `HashMap`-shaped contract; the conversion at the end of the function is
|
||||
/// O(num_values) and runs once.
|
||||
pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
||||
let num_blocks = body.blocks.len();
|
||||
let num_values = body.value_defs.len();
|
||||
|
||||
// Per-value lattice: starts at Top
|
||||
let mut values: HashMap<SsaValue, ConstLattice> = HashMap::new();
|
||||
// Dense per-value lattice (`Vec` indexed by `SsaValue.0`). All values
|
||||
// are defined by exactly one inst (phi or body), so initialising the
|
||||
// entire range to Top is equivalent to the previous per-inst insert
|
||||
// pass at strictly lower cost (no hashing).
|
||||
let mut values: Vec<ConstLattice> = vec![ConstLattice::Top; num_values];
|
||||
|
||||
// Executable flags per CFG edge (from_block, to_block)
|
||||
let mut executable_edges: HashSet<(BlockId, BlockId)> = HashSet::new();
|
||||
// Executable blocks
|
||||
let mut executable_blocks: HashSet<BlockId> = HashSet::new();
|
||||
// Per-block executability and per-(dest, pred) executable-edge bitmap.
|
||||
// Edges are stored as a per-destination list of executable predecessors
|
||||
// — phi evaluation only ever asks "is `(pred, this_block)` executable?",
|
||||
// so a tiny SmallVec scan over the dest's predecessors beats a
|
||||
// `HashSet<(BlockId, BlockId)>::contains` (which hashes a 64-bit pair
|
||||
// for every operand of every phi).
|
||||
let mut executable_blocks: Vec<bool> = vec![false; num_blocks];
|
||||
let mut executable_preds: Vec<SmallVec<[BlockId; 2]>> = vec![SmallVec::new(); num_blocks];
|
||||
|
||||
// Two worklists
|
||||
// Worklists
|
||||
let mut cfg_worklist: VecDeque<BlockId> = VecDeque::new();
|
||||
let mut ssa_worklist: VecDeque<SsaValue> = VecDeque::new();
|
||||
|
||||
// Mark entry executable
|
||||
executable_blocks.insert(body.entry);
|
||||
executable_blocks[body.entry.0 as usize] = true;
|
||||
cfg_worklist.push_back(body.entry);
|
||||
|
||||
// Build use-map: SsaValue → list of (BlockId, instruction index in block)
|
||||
// so we can propagate SSA value changes efficiently.
|
||||
let mut use_sites: HashMap<SsaValue, Vec<BlockId>> = HashMap::new();
|
||||
// Use-map: dense `Vec` indexed by `SsaValue.0`. Populated in a single
|
||||
// pass via the closure-based [`inst_uses_each`] helper, which avoids
|
||||
// the heap allocation of the prior `inst_uses() -> Vec<SsaValue>`
|
||||
// factory.
|
||||
let mut use_sites: Vec<SmallVec<[BlockId; 2]>> = vec![SmallVec::new(); num_values];
|
||||
for block in &body.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
for used_val in inst_uses(inst) {
|
||||
use_sites.entry(used_val).or_default().push(block.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize all values to Top
|
||||
for block in &body.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
values.insert(inst.value, ConstLattice::Top);
|
||||
inst_uses_each(inst, |used_val| {
|
||||
let idx = used_val.0 as usize;
|
||||
if idx < use_sites.len() {
|
||||
let bucket = &mut use_sites[idx];
|
||||
if bucket.last() != Some(&block.id) {
|
||||
bucket.push(block.id);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -144,10 +161,10 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
// Evaluate phis
|
||||
for phi in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &phi.op {
|
||||
let old = values.get(&phi.value).cloned().unwrap_or(ConstLattice::Top);
|
||||
let new_val = eval_phi(operands, &values, &executable_edges, block_id);
|
||||
let old = lookup(&values, phi.value);
|
||||
let new_val = eval_phi(operands, &values, &executable_preds, block_id);
|
||||
if new_val != old {
|
||||
values.insert(phi.value, new_val);
|
||||
store(&mut values, phi.value, new_val);
|
||||
ssa_worklist.push_back(phi.value);
|
||||
changed = true;
|
||||
}
|
||||
|
|
@ -156,13 +173,10 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
|
||||
// Evaluate body instructions
|
||||
for inst in &block.body {
|
||||
let old = values
|
||||
.get(&inst.value)
|
||||
.cloned()
|
||||
.unwrap_or(ConstLattice::Top);
|
||||
let old = lookup(&values, inst.value);
|
||||
let new_val = eval_inst(inst, &values);
|
||||
if new_val != old {
|
||||
values.insert(inst.value, new_val);
|
||||
store(&mut values, inst.value, new_val);
|
||||
ssa_worklist.push_back(inst.value);
|
||||
changed = true;
|
||||
}
|
||||
|
|
@ -173,7 +187,7 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
block,
|
||||
body,
|
||||
&values,
|
||||
&mut executable_edges,
|
||||
&mut executable_preds,
|
||||
&mut executable_blocks,
|
||||
&mut cfg_worklist,
|
||||
);
|
||||
|
|
@ -181,54 +195,57 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
|
||||
// Process SSA worklist
|
||||
while let Some(val) = ssa_worklist.pop_front() {
|
||||
if let Some(blocks) = use_sites.get(&val) {
|
||||
for &block_id in blocks {
|
||||
if !executable_blocks.contains(&block_id) {
|
||||
continue;
|
||||
}
|
||||
let block = body.block(block_id);
|
||||
|
||||
// Re-evaluate phis using this value
|
||||
for phi in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &phi.op
|
||||
&& operands.iter().any(|(_, v)| *v == val)
|
||||
{
|
||||
let old = values.get(&phi.value).cloned().unwrap_or(ConstLattice::Top);
|
||||
let new_val = eval_phi(operands, &values, &executable_edges, block_id);
|
||||
if new_val != old {
|
||||
values.insert(phi.value, new_val);
|
||||
ssa_worklist.push_back(phi.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate body instructions using this value
|
||||
for inst in &block.body {
|
||||
if inst_uses(inst).contains(&val) {
|
||||
let old = values
|
||||
.get(&inst.value)
|
||||
.cloned()
|
||||
.unwrap_or(ConstLattice::Top);
|
||||
let new_val = eval_inst(inst, &values);
|
||||
if new_val != old {
|
||||
values.insert(inst.value, new_val);
|
||||
ssa_worklist.push_back(inst.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate terminator if condition changed
|
||||
process_terminator(
|
||||
block,
|
||||
body,
|
||||
&values,
|
||||
&mut executable_edges,
|
||||
&mut executable_blocks,
|
||||
&mut cfg_worklist,
|
||||
);
|
||||
let val_idx = val.0 as usize;
|
||||
if val_idx >= use_sites.len() {
|
||||
continue;
|
||||
}
|
||||
// Snapshot the use-list so we can borrow `values` mutably
|
||||
// while iterating block ids. The list is short (typically
|
||||
// 1–3 blocks) so the clone is cheap.
|
||||
let use_blocks = use_sites[val_idx].clone();
|
||||
for block_id in use_blocks {
|
||||
if !executable_blocks[block_id.0 as usize] {
|
||||
continue;
|
||||
}
|
||||
let block = body.block(block_id);
|
||||
|
||||
// Re-evaluate phis using this value
|
||||
for phi in &block.phis {
|
||||
if let SsaOp::Phi(operands) = &phi.op
|
||||
&& operands.iter().any(|(_, v)| *v == val)
|
||||
{
|
||||
let old = lookup(&values, phi.value);
|
||||
let new_val = eval_phi(operands, &values, &executable_preds, block_id);
|
||||
if new_val != old {
|
||||
store(&mut values, phi.value, new_val);
|
||||
ssa_worklist.push_back(phi.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate body instructions using this value
|
||||
for inst in &block.body {
|
||||
if inst_has_use(inst, val) {
|
||||
let old = lookup(&values, inst.value);
|
||||
let new_val = eval_inst(inst, &values);
|
||||
if new_val != old {
|
||||
store(&mut values, inst.value, new_val);
|
||||
ssa_worklist.push_back(inst.value);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Re-evaluate terminator if condition changed
|
||||
process_terminator(
|
||||
block,
|
||||
body,
|
||||
&values,
|
||||
&mut executable_preds,
|
||||
&mut executable_blocks,
|
||||
&mut cfg_worklist,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -237,44 +254,79 @@ pub fn const_propagate(body: &SsaBody) -> ConstPropResult {
|
|||
}
|
||||
}
|
||||
|
||||
// Compute unreachable blocks
|
||||
let unreachable_blocks: HashSet<BlockId> = (0..num_blocks)
|
||||
.map(|i| BlockId(i as u32))
|
||||
.filter(|bid| !executable_blocks.contains(bid))
|
||||
.collect();
|
||||
// Convert dense storage to the public `HashMap`-shaped result. Walks
|
||||
// the value vector exactly once. The unreachable-blocks set is small
|
||||
// (often empty), so building it from a linear scan is fine.
|
||||
let mut out_values: HashMap<SsaValue, ConstLattice> = HashMap::with_capacity(num_values);
|
||||
for (i, v) in values.into_iter().enumerate() {
|
||||
out_values.insert(SsaValue(i as u32), v);
|
||||
}
|
||||
let mut unreachable_blocks: HashSet<BlockId> = HashSet::new();
|
||||
for (i, exec) in executable_blocks.iter().enumerate() {
|
||||
if !exec {
|
||||
unreachable_blocks.insert(BlockId(i as u32));
|
||||
}
|
||||
}
|
||||
|
||||
ConstPropResult {
|
||||
values,
|
||||
values: out_values,
|
||||
unreachable_blocks,
|
||||
}
|
||||
}
|
||||
|
||||
/// Dense lattice lookup. Returns Top for out-of-range values to match the
|
||||
/// pre-refactor `HashMap::get(&v).cloned().unwrap_or(Top)` semantics.
|
||||
#[inline]
|
||||
fn lookup(values: &[ConstLattice], v: SsaValue) -> ConstLattice {
|
||||
values
|
||||
.get(v.0 as usize)
|
||||
.cloned()
|
||||
.unwrap_or(ConstLattice::Top)
|
||||
}
|
||||
|
||||
/// Dense lattice store. Out-of-range writes are silently dropped to
|
||||
/// preserve robustness against malformed SSA input — the prior HashMap
|
||||
/// path would have inserted a stray entry; the dense path leaves it
|
||||
/// implicit (Top). Either way the value is unobservable downstream
|
||||
/// because no use-map entry would point at it.
|
||||
#[inline]
|
||||
fn store(values: &mut [ConstLattice], v: SsaValue, val: ConstLattice) {
|
||||
let idx = v.0 as usize;
|
||||
if idx < values.len() {
|
||||
values[idx] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/// Evaluate a phi: meet of operands from executable predecessors.
|
||||
fn eval_phi(
|
||||
operands: &[(BlockId, SsaValue)],
|
||||
values: &HashMap<SsaValue, ConstLattice>,
|
||||
executable_edges: &HashSet<(BlockId, BlockId)>,
|
||||
values: &[ConstLattice],
|
||||
executable_preds: &[SmallVec<[BlockId; 2]>],
|
||||
this_block: BlockId,
|
||||
) -> ConstLattice {
|
||||
let preds = executable_preds
|
||||
.get(this_block.0 as usize)
|
||||
.map(|p| p.as_slice())
|
||||
.unwrap_or(&[]);
|
||||
let mut result = ConstLattice::Top;
|
||||
for (pred_block, val) in operands {
|
||||
if !executable_edges.contains(&(*pred_block, this_block)) {
|
||||
if !preds.contains(pred_block) {
|
||||
continue; // skip non-executable predecessors
|
||||
}
|
||||
let operand_val = values.get(val).cloned().unwrap_or(ConstLattice::Top);
|
||||
let operand_val = lookup(values, *val);
|
||||
result = result.meet(&operand_val);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
/// Evaluate a single instruction.
|
||||
fn eval_inst(inst: &SsaInst, values: &HashMap<SsaValue, ConstLattice>) -> ConstLattice {
|
||||
fn eval_inst(inst: &SsaInst, values: &[ConstLattice]) -> ConstLattice {
|
||||
match &inst.op {
|
||||
SsaOp::Const(Some(text)) => ConstLattice::parse(text),
|
||||
SsaOp::Const(None) => ConstLattice::Varying, // unknown constant
|
||||
SsaOp::Assign(uses) if uses.len() == 1 => {
|
||||
// Copy: propagate the source's value
|
||||
values.get(&uses[0]).cloned().unwrap_or(ConstLattice::Top)
|
||||
lookup(values, uses[0])
|
||||
}
|
||||
SsaOp::Assign(_) => ConstLattice::Varying, // expression with multiple uses
|
||||
SsaOp::Call { .. }
|
||||
|
|
@ -297,29 +349,69 @@ fn eval_inst(inst: &SsaInst, values: &HashMap<SsaValue, ConstLattice>) -> ConstL
|
|||
}
|
||||
}
|
||||
|
||||
/// Collect SSA values used by an instruction (for use-map building).
|
||||
fn inst_uses(inst: &SsaInst) -> Vec<SsaValue> {
|
||||
/// Apply a closure to every SSA value used by an instruction. Avoids the
|
||||
/// `Vec<SsaValue>` heap allocation that the previous `inst_uses(inst)`
|
||||
/// helper paid on every call (use-map build is O(num_insts), the prior
|
||||
/// path bottle-necked there).
|
||||
#[inline]
|
||||
fn inst_uses_each<F: FnMut(SsaValue)>(inst: &SsaInst, mut f: F) {
|
||||
match &inst.op {
|
||||
SsaOp::Phi(operands) => operands.iter().map(|(_, v)| *v).collect(),
|
||||
SsaOp::Assign(uses) => uses.to_vec(),
|
||||
SsaOp::Phi(operands) => {
|
||||
for (_, v) in operands {
|
||||
f(*v);
|
||||
}
|
||||
}
|
||||
SsaOp::Assign(uses) => {
|
||||
for v in uses {
|
||||
f(*v);
|
||||
}
|
||||
}
|
||||
SsaOp::Call { args, receiver, .. } => {
|
||||
let mut vals = Vec::new();
|
||||
if let Some(rv) = receiver {
|
||||
vals.push(*rv);
|
||||
f(*rv);
|
||||
}
|
||||
for arg in args {
|
||||
vals.extend(arg.iter());
|
||||
for v in arg {
|
||||
f(*v);
|
||||
}
|
||||
}
|
||||
vals
|
||||
}
|
||||
SsaOp::FieldProj { receiver, .. } => vec![*receiver],
|
||||
SsaOp::FieldProj { receiver, .. } => f(*receiver),
|
||||
SsaOp::Source
|
||||
| SsaOp::Const(_)
|
||||
| SsaOp::Param { .. }
|
||||
| SsaOp::SelfParam
|
||||
| SsaOp::CatchParam
|
||||
| SsaOp::Nop
|
||||
| SsaOp::Undef => Vec::new(),
|
||||
| SsaOp::Undef => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Zero-allocation predicate: does `inst` use `target` as an operand?
|
||||
/// Replaces the prior `inst_uses(inst).contains(&target)` shape, which
|
||||
/// allocated a fresh `Vec<SsaValue>` on every check inside the SCCP
|
||||
/// re-evaluation worklist.
|
||||
#[inline]
|
||||
fn inst_has_use(inst: &SsaInst, target: SsaValue) -> bool {
|
||||
match &inst.op {
|
||||
SsaOp::Phi(operands) => operands.iter().any(|(_, v)| *v == target),
|
||||
SsaOp::Assign(uses) => uses.contains(&target),
|
||||
SsaOp::Call { args, receiver, .. } => {
|
||||
if let Some(rv) = receiver
|
||||
&& *rv == target
|
||||
{
|
||||
return true;
|
||||
}
|
||||
args.iter().any(|arg| arg.contains(&target))
|
||||
}
|
||||
SsaOp::FieldProj { receiver, .. } => *receiver == target,
|
||||
SsaOp::Source
|
||||
| SsaOp::Const(_)
|
||||
| SsaOp::Param { .. }
|
||||
| SsaOp::SelfParam
|
||||
| SsaOp::CatchParam
|
||||
| SsaOp::Nop
|
||||
| SsaOp::Undef => false,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -327,9 +419,9 @@ fn inst_uses(inst: &SsaInst) -> Vec<SsaValue> {
|
|||
fn process_terminator(
|
||||
block: &SsaBlock,
|
||||
body: &SsaBody,
|
||||
values: &HashMap<SsaValue, ConstLattice>,
|
||||
executable_edges: &mut HashSet<(BlockId, BlockId)>,
|
||||
executable_blocks: &mut HashSet<BlockId>,
|
||||
values: &[ConstLattice],
|
||||
executable_preds: &mut [SmallVec<[BlockId; 2]>],
|
||||
executable_blocks: &mut [bool],
|
||||
cfg_worklist: &mut VecDeque<BlockId>,
|
||||
) {
|
||||
match &block.terminator {
|
||||
|
|
@ -343,7 +435,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
target,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -359,7 +451,7 @@ fn process_terminator(
|
|||
let cond_val = body
|
||||
.cfg_node_map
|
||||
.get(cond)
|
||||
.and_then(|v| values.get(v))
|
||||
.map(|v| lookup(values, *v))
|
||||
.and_then(|c| c.as_bool());
|
||||
|
||||
match cond_val {
|
||||
|
|
@ -367,7 +459,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
*true_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -376,7 +468,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
*false_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -386,14 +478,14 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
*true_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
mark_edge_executable(
|
||||
block.id,
|
||||
*false_blk,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -417,7 +509,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
target,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -432,7 +524,7 @@ fn process_terminator(
|
|||
mark_edge_executable(
|
||||
block.id,
|
||||
target,
|
||||
executable_edges,
|
||||
executable_preds,
|
||||
executable_blocks,
|
||||
cfg_worklist,
|
||||
);
|
||||
|
|
@ -444,18 +536,27 @@ fn process_terminator(
|
|||
fn mark_edge_executable(
|
||||
from: BlockId,
|
||||
to: BlockId,
|
||||
executable_edges: &mut HashSet<(BlockId, BlockId)>,
|
||||
executable_blocks: &mut HashSet<BlockId>,
|
||||
executable_preds: &mut [SmallVec<[BlockId; 2]>],
|
||||
executable_blocks: &mut [bool],
|
||||
cfg_worklist: &mut VecDeque<BlockId>,
|
||||
) {
|
||||
if executable_edges.insert((from, to)) {
|
||||
if executable_blocks.insert(to) {
|
||||
cfg_worklist.push_back(to);
|
||||
} else {
|
||||
// Block already executable but new edge, re-evaluate phis
|
||||
cfg_worklist.push_back(to);
|
||||
}
|
||||
let to_idx = to.0 as usize;
|
||||
if to_idx >= executable_preds.len() {
|
||||
return;
|
||||
}
|
||||
let preds = &mut executable_preds[to_idx];
|
||||
if preds.contains(&from) {
|
||||
return;
|
||||
}
|
||||
preds.push(from);
|
||||
let was_already_exec = executable_blocks[to_idx];
|
||||
if !was_already_exec {
|
||||
executable_blocks[to_idx] = true;
|
||||
}
|
||||
// Always re-enqueue: either the block became newly reachable, or it
|
||||
// already was but a new predecessor edge means phi operands need
|
||||
// re-meeting against the now-executable predecessor.
|
||||
cfg_worklist.push_back(to);
|
||||
}
|
||||
|
||||
/// Apply constant propagation results: prune branches where condition is known constant.
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use super::ir::*;
|
|||
use crate::cfg::{BinOp, Cfg};
|
||||
use crate::symbol::Lang;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Inferred type kind for an SSA value.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
||||
|
|
@ -40,6 +41,17 @@ pub enum TypeKind {
|
|||
/// `label_prefix`, never participates in label-based callee
|
||||
/// resolution.
|
||||
LocalCollection,
|
||||
/// A JPA / Hibernate Criteria API query object (`CriteriaQuery<T>`,
|
||||
/// `CriteriaUpdate<T>`, `CriteriaDelete<T>`, `Subquery<T>`,
|
||||
/// `TypedQuery<T>`). These objects are produced by the
|
||||
/// `CriteriaBuilder` and emit parameterized SQL when handed to
|
||||
/// `Session.createQuery(cq)` / `EntityManager.createQuery(cq)`. The
|
||||
/// argument is structural (predicate AST), not a string, so SQL
|
||||
/// injection cannot flow through it. Used to suppress the
|
||||
/// `cfg-unguarded-sink` finding on `session.createQuery(cq)` shapes
|
||||
/// where openmrs / xwiki / keycloak Hibernate DAOs build queries
|
||||
/// via `cb.createQuery(Foo.class)` + `Root` / `Predicate` API.
|
||||
JpaCriteriaQuery,
|
||||
/// A framework-injected DTO body whose field types are known.
|
||||
/// Populated when a parameter is recognised as a typed extractor and
|
||||
/// the DTO class / struct / Pydantic model is resolvable in scope.
|
||||
|
|
@ -86,6 +98,7 @@ impl TypeKind {
|
|||
Self::FileHandle => Some("FileHandle"),
|
||||
Self::Url => Some("URL"),
|
||||
Self::RequestBuilder => Some("RequestBuilder"),
|
||||
Self::JpaCriteriaQuery => Some("JpaCriteriaQuery"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -222,6 +235,111 @@ pub fn is_type_safe_for_sink(
|
|||
})
|
||||
}
|
||||
|
||||
/// Check whether any of the sink-arg SSA values is a structural query
|
||||
/// object that emits parameterized SQL by construction (currently the
|
||||
/// JPA / Hibernate Criteria API: `CriteriaQuery`, `CriteriaUpdate`,
|
||||
/// `CriteriaDelete`, `Subquery`, `TypedQuery`).
|
||||
///
|
||||
/// Used by both the SSA taint engine and the structural
|
||||
/// `cfg-unguarded-sink` analysis to suppress the SQL-injection finding
|
||||
/// on `session.createQuery(cq)` / `em.createQuery(cq)` / `executeUpdate`
|
||||
/// shapes where the argument is a Criteria object built via
|
||||
/// `CriteriaBuilder` rather than a string.
|
||||
///
|
||||
/// Returns `false` when `sink_caps` does not include `SQL_QUERY`, when
|
||||
/// `values` is empty, or when no value carries the
|
||||
/// [`TypeKind::JpaCriteriaQuery`] tag. Receiver values should be
|
||||
/// excluded by the caller, the receiver of a JPA query method is the
|
||||
/// `Session` / `EntityManager` channel, never the payload.
|
||||
pub fn is_safe_query_object_arg(
|
||||
values: &[SsaValue],
|
||||
sink_caps: crate::labels::Cap,
|
||||
type_facts: &TypeFactResult,
|
||||
) -> bool {
|
||||
use crate::labels::Cap;
|
||||
if !sink_caps.intersects(Cap::SQL_QUERY) {
|
||||
return false;
|
||||
}
|
||||
if values.is_empty() {
|
||||
return false;
|
||||
}
|
||||
values
|
||||
.iter()
|
||||
.any(|v| type_facts.is_type(*v, &TypeKind::JpaCriteriaQuery))
|
||||
}
|
||||
|
||||
/// Receiver-text-aware return-type inference for methods whose
|
||||
/// constructor mapping cannot be determined from the callee suffix
|
||||
/// alone.
|
||||
///
|
||||
/// The JPA `createQuery` suffix is overloaded between
|
||||
/// `CriteriaBuilder.createQuery(Class)` (returns `CriteriaQuery`, our
|
||||
/// safe-by-construction structural query object) and
|
||||
/// `Session.createQuery(String|Query)` (the executable-query
|
||||
/// constructor whose string overload IS a SQL sink). Class-literal
|
||||
/// arg shape (e.g. `Foo.class`) doesn't surface in `arg_uses` at the
|
||||
/// CFG layer, so we fall back to the receiver-text hint: if the
|
||||
/// callee path includes a `CriteriaBuilder` cast or a receiver
|
||||
/// variable named `cb` / `criteriaBuilder` / `builder`, treat the
|
||||
/// call as the criteria-builder overload.
|
||||
///
|
||||
/// Conservative: returns `None` for any other shape so
|
||||
/// [`constructor_type`] / `is_int_producing_callee` stay
|
||||
/// authoritative, and consumers see Unknown instead of a wrong
|
||||
/// type tag.
|
||||
///
|
||||
/// `_args` and `_consts` are kept on the signature so we can later
|
||||
/// add arg-shape narrowing when class-literal lowering captures
|
||||
/// `Foo.class` as an arg-use.
|
||||
fn arg_aware_call_type(
|
||||
lang: Lang,
|
||||
callee: &str,
|
||||
_args: &[SmallVec<[SsaValue; 2]>],
|
||||
_consts: &HashMap<SsaValue, ConstLattice>,
|
||||
) -> Option<TypeKind> {
|
||||
if !matches!(lang, Lang::Java) {
|
||||
return None;
|
||||
}
|
||||
let after_colons = callee.rsplit("::").next().unwrap_or(callee);
|
||||
let suffix = after_colons.rsplit('.').next().unwrap_or(after_colons);
|
||||
if suffix != "createQuery" {
|
||||
return None;
|
||||
}
|
||||
// Strip the trailing `.createQuery` segment and inspect the
|
||||
// receiver text for the criteria-builder hints. Conservative
|
||||
// text-level match, the SSA layer doesn't expose receiver-type
|
||||
// facts here yet.
|
||||
let prefix = callee.rsplit_once('.').map(|(p, _)| p).unwrap_or(callee);
|
||||
if prefix.contains("CriteriaBuilder") || receiver_is_criteria_builder(prefix) {
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// True when the receiver text identifies a CriteriaBuilder by
|
||||
/// idiomatic naming (`cb`, `criteriaBuilder`, `builder`,
|
||||
/// `getCriteriaBuilder()`), modulo casts and chained accesses.
|
||||
fn receiver_is_criteria_builder(receiver_text: &str) -> bool {
|
||||
// Drop trailing parenthesized portions and chained cast/syntax noise.
|
||||
let cleaned = receiver_text
|
||||
.rsplit_once(')')
|
||||
.map(|(_, tail)| tail)
|
||||
.unwrap_or(receiver_text)
|
||||
.trim();
|
||||
let cleaned = cleaned.trim_start_matches('.');
|
||||
let last_segment = cleaned
|
||||
.rsplit(['.', ':', ' '])
|
||||
.next()
|
||||
.unwrap_or(cleaned)
|
||||
.trim_matches(|c: char| c == '(' || c == ')');
|
||||
matches!(
|
||||
last_segment,
|
||||
"cb" | "criteriaBuilder" | "criteria_builder" | "builder" | "getCriteriaBuilder"
|
||||
) || receiver_text.contains("getCriteriaBuilder()")
|
||||
|| receiver_text.contains(".cb.")
|
||||
}
|
||||
|
||||
/// Infer a type from a constructor, factory, or allocator call.
|
||||
///
|
||||
/// Maps known constructor/factory/allocator patterns to security-relevant
|
||||
|
|
@ -260,6 +378,20 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
|
|||
"FileInputStream" | "FileOutputStream" | "FileReader" | "FileWriter"
|
||||
| "BufferedReader" | "BufferedWriter" => Some(TypeKind::FileHandle),
|
||||
"getWriter" | "getOutputStream" => Some(TypeKind::HttpResponse),
|
||||
// JPA / Hibernate Criteria API factory methods. These are
|
||||
// unambiguous: `createCriteriaUpdate` / `createCriteriaDelete`
|
||||
// / `createTupleQuery` / `subquery` exist only on
|
||||
// `CriteriaBuilder` / `CriteriaQuery` and always return a
|
||||
// structural query object. `createQuery` is overloaded
|
||||
// (`CriteriaBuilder.createQuery(Class)` returns
|
||||
// `CriteriaQuery`; `Session.createQuery(String)` returns
|
||||
// `Query`), so it's gated below in
|
||||
// [`infer_call_return_type_with_args`] on the arg-0 shape
|
||||
// (a class literal) so we don't conflate the executable-
|
||||
// query overload with the criteria builder.
|
||||
"createCriteriaUpdate" | "createCriteriaDelete" | "createTupleQuery" | "subquery" => {
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
}
|
||||
_ => None,
|
||||
},
|
||||
Lang::JavaScript | Lang::TypeScript => match suffix {
|
||||
|
|
@ -687,9 +819,13 @@ pub fn analyze_types_with_param_types(
|
|||
}
|
||||
SsaOp::SelfParam => TypeFact::from_kind(TypeKind::Object),
|
||||
SsaOp::CatchParam => TypeFact::from_kind(TypeKind::Object),
|
||||
SsaOp::Call { callee, .. } => {
|
||||
SsaOp::Call { callee, args, .. } => {
|
||||
if let Some(ty) = lang.and_then(|l| constructor_type(l, callee)) {
|
||||
TypeFact::from_kind(ty)
|
||||
} else if let Some(ty) =
|
||||
lang.and_then(|l| arg_aware_call_type(l, callee, args, consts))
|
||||
{
|
||||
TypeFact::from_kind(ty)
|
||||
} else if is_int_producing_callee(callee) {
|
||||
TypeFact::from_kind(TypeKind::Int)
|
||||
} else {
|
||||
|
|
@ -2227,4 +2363,171 @@ mod tests {
|
|||
&result
|
||||
));
|
||||
}
|
||||
|
||||
// ── JPA Criteria query suppression (Phase: real-repo openmrs FP) ───
|
||||
//
|
||||
// These tests pin the `TypeKind::JpaCriteriaQuery` variant + the
|
||||
// `is_safe_query_object_arg` predicate + the
|
||||
// `arg_aware_call_type` receiver-text recogniser. Together they
|
||||
// close the openmrs HibernateDAO `session.createQuery(cq)` FP
|
||||
// cluster (216 → 24 cfg-unguarded-sink in openmrs).
|
||||
|
||||
/// `JpaCriteriaQuery` carries a label_prefix so type-qualified
|
||||
/// callee resolution can attach future rules.
|
||||
#[test]
|
||||
fn jpa_criteria_query_label_prefix() {
|
||||
assert_eq!(
|
||||
TypeKind::JpaCriteriaQuery.label_prefix(),
|
||||
Some("JpaCriteriaQuery")
|
||||
);
|
||||
}
|
||||
|
||||
/// `is_safe_query_object_arg` suppresses SQL_QUERY when any
|
||||
/// supplied value is a `JpaCriteriaQuery`. Receiver inclusion is
|
||||
/// the caller's responsibility, here we just verify the predicate.
|
||||
#[test]
|
||||
fn safe_query_object_arg_suppresses_sql_query() {
|
||||
use crate::labels::Cap;
|
||||
let mut facts = HashMap::new();
|
||||
facts.insert(SsaValue(0), TypeFact::from_kind(TypeKind::JpaCriteriaQuery));
|
||||
let result = TypeFactResult { facts };
|
||||
assert!(is_safe_query_object_arg(
|
||||
&[SsaValue(0)],
|
||||
Cap::SQL_QUERY,
|
||||
&result
|
||||
));
|
||||
// Other caps stay untouched.
|
||||
assert!(!is_safe_query_object_arg(
|
||||
&[SsaValue(0)],
|
||||
Cap::CODE_EXEC,
|
||||
&result
|
||||
));
|
||||
// Unknown-typed values do not trigger.
|
||||
let mut facts2 = HashMap::new();
|
||||
facts2.insert(SsaValue(0), TypeFact::from_kind(TypeKind::Unknown));
|
||||
let result2 = TypeFactResult { facts: facts2 };
|
||||
assert!(!is_safe_query_object_arg(
|
||||
&[SsaValue(0)],
|
||||
Cap::SQL_QUERY,
|
||||
&result2
|
||||
));
|
||||
// Empty slice never suppresses.
|
||||
assert!(!is_safe_query_object_arg(&[], Cap::SQL_QUERY, &result));
|
||||
}
|
||||
|
||||
/// `is_safe_query_object_arg` fires when a Criteria value is mixed
|
||||
/// in with other types — the predicate is `any`, not `all`, since
|
||||
/// the criteria-object arg is the only injection-bearing slot for a
|
||||
/// `createQuery(cq)` sink.
|
||||
#[test]
|
||||
fn safe_query_object_arg_fires_with_mixed_args() {
|
||||
use crate::labels::Cap;
|
||||
let mut facts = HashMap::new();
|
||||
facts.insert(SsaValue(0), TypeFact::from_kind(TypeKind::JpaCriteriaQuery));
|
||||
facts.insert(SsaValue(1), TypeFact::from_kind(TypeKind::String));
|
||||
facts.insert(SsaValue(2), TypeFact::from_kind(TypeKind::Unknown));
|
||||
let result = TypeFactResult { facts };
|
||||
assert!(is_safe_query_object_arg(
|
||||
&[SsaValue(0), SsaValue(1), SsaValue(2)],
|
||||
Cap::SQL_QUERY,
|
||||
&result
|
||||
));
|
||||
}
|
||||
|
||||
/// `arg_aware_call_type` maps the JPA `cb.createQuery(...)` /
|
||||
/// `criteriaBuilder.createQuery(...)` / `((CriteriaBuilder)
|
||||
/// x).createQuery(...)` shapes to `JpaCriteriaQuery`, distinct
|
||||
/// from the overloaded `session.createQuery(...)` /
|
||||
/// `em.createQuery(...)` which stays `None` (the
|
||||
/// executable-query overload).
|
||||
#[test]
|
||||
fn arg_aware_call_type_jpa_criteria_builder_recogniser() {
|
||||
let no_args: Vec<SmallVec<[SsaValue; 2]>> = vec![];
|
||||
let consts: HashMap<SsaValue, ConstLattice> = HashMap::new();
|
||||
// Receiver hint: bare `cb` ident.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "cb.createQuery", &no_args, &consts),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// Receiver hint: bare `criteriaBuilder` ident.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "criteriaBuilder.createQuery", &no_args, &consts),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// Cast in receiver text.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(
|
||||
Lang::Java,
|
||||
"((CriteriaBuilder) cb).createQuery",
|
||||
&no_args,
|
||||
&consts
|
||||
),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// Chained accessor: getCriteriaBuilder().createQuery
|
||||
assert_eq!(
|
||||
arg_aware_call_type(
|
||||
Lang::Java,
|
||||
"session.getCriteriaBuilder().createQuery",
|
||||
&no_args,
|
||||
&consts
|
||||
),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
// The executable-query overload (`session.createQuery`) does
|
||||
// NOT match — receiver-text doesn't carry a CriteriaBuilder
|
||||
// hint, so we leave the type as Unknown and let the
|
||||
// suppression decide based on the arg-0 type fact.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "session.createQuery", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "em.createQuery", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
// Non-Java langs return None.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Python, "cb.createQuery", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
// Other suffixes return None.
|
||||
assert_eq!(
|
||||
arg_aware_call_type(Lang::Java, "cb.createCriteriaUpdate", &no_args, &consts),
|
||||
None
|
||||
);
|
||||
}
|
||||
|
||||
/// Unique-suffix Criteria API methods land on
|
||||
/// `TypeKind::JpaCriteriaQuery` directly via [`constructor_type`]
|
||||
/// without the receiver hint, since `createCriteriaUpdate` /
|
||||
/// `createCriteriaDelete` / `createTupleQuery` / `subquery` exist
|
||||
/// only on `CriteriaBuilder` / `CriteriaQuery` and have no
|
||||
/// overload conflict.
|
||||
#[test]
|
||||
fn constructor_type_unique_jpa_criteria_methods() {
|
||||
for suffix in &[
|
||||
"createCriteriaUpdate",
|
||||
"createCriteriaDelete",
|
||||
"createTupleQuery",
|
||||
"subquery",
|
||||
] {
|
||||
assert_eq!(
|
||||
constructor_type(Lang::Java, suffix),
|
||||
Some(TypeKind::JpaCriteriaQuery),
|
||||
"suffix `{suffix}` must map to JpaCriteriaQuery"
|
||||
);
|
||||
// Same suffix prefixed by an arbitrary receiver still maps.
|
||||
assert_eq!(
|
||||
constructor_type(Lang::Java, &format!("cb.{suffix}")),
|
||||
Some(TypeKind::JpaCriteriaQuery)
|
||||
);
|
||||
}
|
||||
// Non-criteria methods unaffected.
|
||||
assert_eq!(
|
||||
constructor_type(Lang::Java, "session.createQuery"),
|
||||
None,
|
||||
"createQuery is overloaded — must not map at constructor_type level"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue