feat(ssa): optimize branch condition handling via constant folding, enhance precision for taint analysis, and expand OWASP Benchmark support

This commit is contained in:
elipeter 2026-06-02 13:41:45 -05:00
parent ec76c9e08f
commit 9c99f6c6a9
22 changed files with 1020 additions and 17 deletions

View file

@ -624,6 +624,192 @@ pub fn apply_const_prop(body: &mut SsaBody, result: &ConstPropResult) -> usize {
pruned
}
/// Resolve a condition variable name to the SSA value reaching `block`.
///
/// Mirrors `constraint::lower::resolve_single_var` (the established resolver
/// for branch-condition variables): prefer the highest-indexed definition in
/// the branch block itself, else the highest-indexed definition elsewhere.
/// Kept local to avoid a `ssa → constraint` dependency cycle (constraint
/// already depends on ssa).
fn resolve_const_var(body: &SsaBody, var_name: &str, block: BlockId) -> Option<SsaValue> {
let mut best_in_block: Option<SsaValue> = None;
let mut best_outside: Option<SsaValue> = None;
for (idx, vd) in body.value_defs.iter().enumerate() {
if vd.var_name.as_deref() != Some(var_name) {
continue;
}
let v = SsaValue(idx as u32);
if vd.block == block {
best_in_block = Some(match best_in_block {
Some(existing) if existing.0 > v.0 => existing,
_ => v,
});
} else {
best_outside = Some(match best_outside {
Some(existing) if existing.0 > v.0 => existing,
_ => v,
});
}
}
best_in_block.or(best_outside)
}
/// Fold branch conditions that are pure integer-arithmetic comparisons over
/// constant operands, pruning the statically-dead edge.
///
/// Complements [`apply_const_prop`], which only folds a condition that lowers
/// to a single SSA boolean value. An arithmetic comparison condition such as
/// `(7*42) - num > 200` is **never** an SSA value — condition nodes lower to
/// `Nop` and the comparison is held structurally on the branch terminator — so
/// SCCP cannot reach it. This pass instead evaluates the
/// [`crate::cfg::CondArith`] tree captured at CFG-build time, resolving each
/// variable to its const-propagated integer.
///
/// Sound by construction:
/// * A branch is pruned only when its `CondArith` evaluates to a **definite**
/// boolean — every variable bound to a known integer constant and every
/// operation defined (no div-by-zero / overflow). `None`/`Varying` leaves
/// both edges intact.
/// * After the terminator is rewritten to `Goto(taken)` and the dead edge is
/// dropped (symmetrically, preserving pred/succ consistency), every phi
/// operand whose predecessor is no longer reachable from entry is removed.
/// That last step is what actually drops the dead-branch operand from a
/// merge phi like `bar = phi(then: "const", else: param)` — without it the
/// taint engine's phi fallback would still read the tainted `param` from
/// the joined entry state.
///
/// Returns the number of branches pruned.
pub fn fold_constant_branches(
body: &mut SsaBody,
cfg: &crate::cfg::Cfg,
const_values: &HashMap<SsaValue, ConstLattice>,
) -> usize {
use crate::ssa::ir::Terminator;
// 1. Collect definite fold decisions: (branch_block_idx, taken, untaken).
let mut prune_ops: Vec<(usize, BlockId, BlockId)> = Vec::new();
for (block_idx, block) in body.blocks.iter().enumerate() {
let Terminator::Branch {
cond,
true_blk,
false_blk,
..
} = &block.terminator
else {
continue;
};
// Degenerate `cond ? X : X` (both edges to one block): nothing to prune.
if true_blk == false_blk {
continue;
}
let Some(cond_info) = cfg.node_weight(*cond) else {
continue;
};
let Some(arith) = cond_info.cond_arith.as_ref() else {
continue;
};
let branch_block = block.id;
let resolve = |name: &str| -> Option<i64> {
let v = resolve_const_var(body, name, branch_block)?;
match const_values.get(&v) {
Some(ConstLattice::Int(n)) => Some(*n),
_ => None,
}
};
match arith.eval_bool(&resolve) {
Some(true) => prune_ops.push((block_idx, *true_blk, *false_blk)),
Some(false) => prune_ops.push((block_idx, *false_blk, *true_blk)),
None => {}
}
}
let pruned = prune_ops.len();
if pruned == 0 {
return 0;
}
// 2. Rewrite terminators + drop the dead edge (symmetrically).
for &(block_idx, taken, untaken) in &prune_ops {
let pred_id = body.blocks[block_idx].id;
body.blocks[block_idx].terminator = Terminator::Goto(taken);
body.blocks[block_idx].succs.retain(|s| *s != untaken);
let untaken_idx = untaken.0 as usize;
if untaken_idx < body.blocks.len() {
body.blocks[untaken_idx].preds.retain(|p| *p != pred_id);
}
}
// 3. Recompute reachability from entry over the (now-pruned) succ edges.
let n = body.blocks.len();
let mut reachable = vec![false; n];
let mut stack = vec![body.entry];
if (body.entry.0 as usize) < n {
reachable[body.entry.0 as usize] = true;
}
while let Some(b) = stack.pop() {
let bidx = b.0 as usize;
if bidx >= n {
continue;
}
// Clone succs to avoid borrow conflict with `reachable`.
let succs: SmallVec<[BlockId; 2]> = body.blocks[bidx].succs.clone();
for s in succs {
let sidx = s.0 as usize;
if sidx < n && !reachable[sidx] {
reachable[sidx] = true;
stack.push(s);
}
}
}
// 4. Reachable blocks: drop the now-dead predecessor. Removing the phi
// operand from the merge block is what stops the tainted dead-branch
// value feeding the phi; removing the pred keeps pred/succ symmetric
// with step 5's succ clearing. Operands from still-reachable
// predecessors are untouched, so no live flow is lost.
for (bidx, block) in body.blocks.iter_mut().enumerate() {
if !reachable[bidx] {
continue;
}
block.preds.retain(|p| {
let pidx = p.0 as usize;
pidx < n && reachable[pidx]
});
for phi in &mut block.phis {
if let SsaOp::Phi(operands) = &mut phi.op {
operands.retain(|(pred, _)| {
let pidx = pred.0 as usize;
pidx < n && reachable[pidx]
});
}
}
}
// 5. Unreachable blocks: neutralise them so the *later* optimiser passes
// (copy-prop, base-alias grouping, type-facts, points-to) and the taint
// transfer never observe their dead instructions. This is the
// load-bearing step for precision: a dead `else bar = param` would
// otherwise make copy-prop alias `bar`↔`param`, and
// `propagate_taint_to_aliases` would then poison the *surviving const*
// `bar` with `param`'s (still-reachable) taint — defeating the whole
// prune. Each instruction is rewritten to `Nop` (value + cfg_node
// preserved so `value_defs` coverage holds), the terminator to
// `Unreachable`, and the block is fully disconnected.
for (bidx, block) in body.blocks.iter_mut().enumerate() {
if reachable[bidx] {
continue;
}
for inst in block.phis.iter_mut().chain(block.body.iter_mut()) {
inst.op = SsaOp::Nop;
}
block.terminator = Terminator::Unreachable;
block.succs.clear();
block.preds.clear();
}
pruned
}
/// Collect module aliases from `require()` calls in the SSA body.
///
/// Detects patterns like `const http = require("http")` and propagates

View file

@ -101,7 +101,12 @@ pub fn optimize_ssa_with_param_types(
) -> OptimizeResult {
// 1. Constant propagation (SCCP)
let cp = const_prop::const_propagate(body);
let branches_pruned = const_prop::apply_const_prop(body, &cp);
let mut branches_pruned = const_prop::apply_const_prop(body, &cp);
// 1b. Fold pure integer-arithmetic comparison branch conditions that SCCP
// cannot reach (the comparison is held on the terminator, not an SSA
// value). Prunes statically-dead edges + their merge-phi operands so a
// dead `else bar = param` stops feeding a tainted operand into the phi.
branches_pruned += const_prop::fold_constant_branches(body, cfg, &cp.values);
// 2. Copy propagation
let (copies_eliminated, copy_map) = copy_prop::copy_propagate(body, cfg);

View file

@ -261,6 +261,33 @@ pub enum TypeKind {
/// arbitrary-receiver-name shape (`sess`, `hibernateSession`, etc.)
/// via type-qualified resolution.
HibernateSession,
/// A `java.lang.ProcessBuilder` instance produced by
/// `new ProcessBuilder(...)`. The dominant OWASP Benchmark
/// command-injection shape builds an argument `List<String>`, attaches
/// it via `pb.command(argList)`, then runs it with `pb.start()`. The
/// argument list is a separate channel from the constructor, so the
/// flat `ProcessBuilder` constructor sink never sees the tainted args.
/// Mapping the receiver to this TypeKind lets the type-qualified
/// resolver rewrite `pb.command(argList)` → `ProcessBuilder.command`
/// against the flat SHELL_ESCAPE rule in `labels/java.rs`, so tainted
/// list contents reaching the command builder are caught at the
/// `command(...)` call site.
ProcessBuilder,
/// A `java.lang.Runtime` instance produced by the static factory
/// `Runtime.getRuntime()`. The dominant OWASP Benchmark
/// command-injection shape splits the receiver across statements:
/// `Runtime r = Runtime.getRuntime(); ... r.exec(args, argsEnv)`. The
/// callee text at the sink is `r.exec`, which does not suffix-match the
/// flat `Runtime.exec` rule in `labels/java.rs` (the chained
/// `Runtime.getRuntime().exec(...)` form fires only because its callee
/// text literally contains `Runtime`). Mapping the receiver `r` to
/// this TypeKind lets the type-qualified resolver rewrite `r.exec(...)`
/// → `Runtime.exec` against the flat SHELL_ESCAPE rule, so tainted data
/// reaching the split-receiver exec is caught. No payload-arg
/// restriction: `Runtime.exec` overloads place the tainted data in
/// either the command (arg 0) or the environment array (arg 1), so the
/// default all-args sink scan must cover every position.
Runtime,
}
/// structural carrier for a recognised DTO type. Maps
@ -318,6 +345,8 @@ impl TypeKind {
Self::GormDb => Some("GormDb"),
Self::SqlxDb => Some("SqlxDb"),
Self::HibernateSession => Some("HibernateSession"),
Self::ProcessBuilder => Some("ProcessBuilder"),
Self::Runtime => Some("Runtime"),
_ => None,
}
}
@ -708,6 +737,18 @@ pub(crate) fn constructor_type(lang: Lang, callee: &str) -> Option<TypeKind> {
"openSession" | "getCurrentSession" | "openStatelessSession" => {
Some(TypeKind::HibernateSession)
}
// `new ProcessBuilder(...)` — the receiver's `command(argList)`
// setter is a command-injection sink for the list contents.
// Type-qualified resolution rewrites `pb.command(...)` →
// `ProcessBuilder.command` against the flat SHELL_ESCAPE rule.
"ProcessBuilder" => Some(TypeKind::ProcessBuilder),
// `Runtime.getRuntime()` — the static factory returns the
// singleton `java.lang.Runtime`. Gating on `callee.contains
// ("Runtime")` keeps an unrelated `foo.getRuntime()` method from
// being mistyped. Type-qualified resolution rewrites the
// split-receiver `r.exec(...)` → `Runtime.exec` against the flat
// SHELL_ESCAPE rule.
"getRuntime" if callee.contains("Runtime") => Some(TypeKind::Runtime),
_ => None,
},
Lang::JavaScript | Lang::TypeScript => {