mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
4698 lines
190 KiB
Rust
4698 lines
190 KiB
Rust
#![allow(
|
|
clippy::collapsible_if,
|
|
clippy::if_same_then_else,
|
|
clippy::needless_range_loop,
|
|
clippy::only_used_in_recursion,
|
|
clippy::too_many_arguments,
|
|
clippy::type_complexity,
|
|
clippy::unnecessary_unwrap
|
|
)]
|
|
|
|
use crate::cfg::{Cfg, EdgeKind, StmtKind};
|
|
use petgraph::algo::dominators::{Dominators, simple_fast};
|
|
use petgraph::graph::NodeIndex;
|
|
use petgraph::prelude::*;
|
|
use petgraph::visit::EdgeRef;
|
|
use smallvec::SmallVec;
|
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
|
|
|
|
use super::ir::*;
|
|
|
|
/// Try to decompose a chained-receiver method call (e.g. `a.b.c.method`)
|
|
/// into a `FieldProj` chain plus a bare-method `Call`.
|
|
///
|
|
/// **Returns** `Some((final_receiver_value, bare_method_name))` on success,
|
|
/// `None` to fall back to the existing single-Call lowering (current
|
|
/// behaviour).
|
|
///
|
|
/// On success, the caller should:
|
|
/// - Construct the `Call` op with `callee = bare_method_name`,
|
|
/// `callee_text = Some(original_callee.to_string())`,
|
|
/// `receiver = Some(final_receiver_value)`.
|
|
/// - Use the returned receiver as the implicit method receiver, do NOT
|
|
/// add the chain root or any intermediate field name to `args`.
|
|
///
|
|
/// **Decomposition rules**:
|
|
/// - Skip when the callee contains zero `.` characters (no member access)
|
|
/// or only one `.` (single-dot case is handled by the existing
|
|
/// `info.call.receiver` channel without needing a `FieldProj` op).
|
|
/// - Bail when any "complex" token appears in the callee, `(`, `)`,
|
|
/// `[`, `]`, `::`, `->`, `?`, `<`, `>`, `*`, `&`, `:` (other than `::`
|
|
/// already filtered), or whitespace, signaling the callee text isn't
|
|
/// a clean `<ident>.<ident>...` chain we can safely split on `.`.
|
|
/// - The first segment must be a known SSA variable in `var_stacks`;
|
|
/// otherwise the chain root is unresolvable and we bail.
|
|
/// - Each intermediate segment becomes a `FieldProj { receiver, field }`
|
|
/// instruction emitted onto `block.body` with a fresh `SsaValue`.
|
|
/// - The last segment is the bare method name returned to the caller.
|
|
///
|
|
/// FieldProj instructions are tagged with `var_name = Some("base.f1.f2")`
|
|
/// so debug output and downstream consumers that key on `var_name` can
|
|
/// recognise the projection chain provenance.
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn try_lower_field_proj_chain(
|
|
callee: &str,
|
|
var_stacks: &HashMap<String, Vec<SsaValue>>,
|
|
field_interner: &mut crate::ssa::ir::FieldInterner,
|
|
block_idx: usize,
|
|
block_id: BlockId,
|
|
next_value: &mut u32,
|
|
ssa_blocks: &mut [SsaBlock],
|
|
value_defs: &mut Vec<ValueDef>,
|
|
cfg_node: NodeIndex,
|
|
span: (usize, usize),
|
|
) -> Option<(SsaValue, String)> {
|
|
// Bail on any token that signals a complex callee expression.
|
|
// `::` (Rust/C++ paths) is folded into the broader `:` check.
|
|
for ch in callee.chars() {
|
|
match ch {
|
|
'(' | ')' | '[' | ']' | '<' | '>' | '?' | '*' | '&' | ':' | ' ' | '\t' | '\n' | '-'
|
|
| '!' | ',' | ';' | '"' | '\'' | '\\' => return None,
|
|
_ => {}
|
|
}
|
|
}
|
|
let segments: Vec<&str> = callee.split('.').collect();
|
|
// Need at least 3 segments: `base.field.method` → 1 FieldProj, 1 Call.
|
|
if segments.len() < 3 {
|
|
return None;
|
|
}
|
|
// Reject empty segments (would happen on leading/trailing/double dots).
|
|
if segments.iter().any(|s| s.is_empty()) {
|
|
return None;
|
|
}
|
|
|
|
let base = segments[0];
|
|
let mut current = *var_stacks.get(base).and_then(|s| s.last())?;
|
|
let mut chain_var = base.to_string();
|
|
|
|
// Each intermediate segment becomes a FieldProj op. segments[0] is the
|
|
// base SSA variable, segments[len-1] is the bare method name.
|
|
for field_name in &segments[1..segments.len() - 1] {
|
|
let fid = field_interner.intern(field_name);
|
|
let v = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
chain_var.push('.');
|
|
chain_var.push_str(field_name);
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: v,
|
|
op: SsaOp::FieldProj {
|
|
receiver: current,
|
|
field: fid,
|
|
projected_type: None,
|
|
},
|
|
cfg_node,
|
|
var_name: Some(chain_var.clone()),
|
|
span,
|
|
});
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(chain_var.clone()),
|
|
cfg_node,
|
|
block: block_id,
|
|
});
|
|
current = v;
|
|
}
|
|
|
|
let method = segments.last().unwrap().to_string();
|
|
Some((current, method))
|
|
}
|
|
|
|
/// Lower a CFG to SSA form for a single function scope.
|
|
///
|
|
/// `scope` filters nodes by `enclosing_func`:
|
|
/// - `None` → top-level code only (`enclosing_func.is_none()`)
|
|
/// - `Some(name)` → only nodes with `enclosing_func == Some(name)`
|
|
///
|
|
/// If `scope_all` is true, all nodes reachable from `entry` are included
|
|
/// regardless of `enclosing_func`.
|
|
pub fn lower_to_ssa(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
scope: Option<&str>,
|
|
scope_all: bool,
|
|
) -> Result<SsaBody, SsaError> {
|
|
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, &[], false)
|
|
}
|
|
|
|
/// Like `lower_to_ssa` but with formal parameter names supplied in declaration
|
|
/// order. External variables that match these names are placed first (in
|
|
/// declaration order) so that `Param { index }` indices 0..N correspond to
|
|
/// call-site argument positions.
|
|
pub fn lower_to_ssa_with_params(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
scope: Option<&str>,
|
|
scope_all: bool,
|
|
formal_params: &[String],
|
|
) -> Result<SsaBody, SsaError> {
|
|
// `with_params=true` signals "callers supplied an explicit formal list,
|
|
// even if empty" (e.g. arrow `() => {…}` has zero formals). This lets
|
|
// the synthetic-externals classifier distinguish "no formals info" from
|
|
// "explicit empty formals" — closure captures of an arrow with empty
|
|
// formals are still synthetic, not formals. Bug surfaced on outline's
|
|
// jest test files: free vars bubbled up from nested arrow callbacks
|
|
// (`body`, `userId`, `server.post`) became Params at the outer arrow's
|
|
// entry, and the JS/TS auto-seed treated `userId` as a real handler
|
|
// formal, producing 934 phantom taint findings. See
|
|
// `taint/ssa_transfer/mod.rs::auto_seed_handler_params`.
|
|
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, formal_params, true)
|
|
}
|
|
|
|
/// Like `lower_to_ssa` but with `scope_nop`: when true, all nodes are included
|
|
/// in the SSA body for graph connectivity, but out-of-scope nodes become Nop
|
|
/// (their defines/uses are ignored). This is used for the JS two-level solve
|
|
/// where the CFG linearizes function bodies inline.
|
|
pub fn lower_to_ssa_scoped_nop(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
scope: Option<&str>,
|
|
) -> Result<SsaBody, SsaError> {
|
|
lower_to_ssa_inner(cfg, entry, scope, false, true, &[], false)
|
|
}
|
|
|
|
fn lower_to_ssa_inner(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
scope: Option<&str>,
|
|
scope_all: bool,
|
|
scope_nop: bool,
|
|
formal_params: &[String],
|
|
with_params: bool,
|
|
) -> Result<SsaBody, SsaError> {
|
|
if cfg.node_count() == 0 {
|
|
return Err(SsaError::EmptyCfg);
|
|
}
|
|
|
|
// When scope_nop is set, traverse all nodes (scope_all=true) for graph connectivity
|
|
let traverse_all = scope_all || scope_nop;
|
|
|
|
// Collect reachable nodes in scope, stripping exception edges.
|
|
let (reachable, filtered_edges, raw_exception_edges) =
|
|
collect_reachable(cfg, entry, scope, traverse_all);
|
|
|
|
// Build the set of nodes that should be treated as Nop (out-of-scope but included)
|
|
let nop_nodes: HashSet<NodeIndex> = if scope_nop {
|
|
let in_scope = |node: NodeIndex| -> bool {
|
|
let info = &cfg[node];
|
|
match scope {
|
|
None => info.ast.enclosing_func.is_none(),
|
|
Some(name) => info.ast.enclosing_func.as_deref() == Some(name),
|
|
}
|
|
};
|
|
reachable
|
|
.iter()
|
|
.filter(|&&n| !in_scope(n) && !matches!(cfg[n].kind, StmtKind::Entry | StmtKind::Exit))
|
|
.copied()
|
|
.collect()
|
|
} else {
|
|
HashSet::new()
|
|
};
|
|
if reachable.is_empty() {
|
|
return Err(SsaError::EmptyCfg);
|
|
}
|
|
|
|
// 1. Form basic blocks
|
|
let (blocks_nodes, block_of_node, block_succs, block_preds) =
|
|
form_blocks(cfg, entry, &reachable, &filtered_edges);
|
|
|
|
let num_blocks = blocks_nodes.len();
|
|
if num_blocks == 0 {
|
|
return Err(SsaError::EmptyCfg);
|
|
}
|
|
|
|
// 2. Compute dominators on block-level graph
|
|
let (block_graph, block_graph_entry) = build_block_graph(num_blocks, &block_succs, BlockId(0));
|
|
let doms = simple_fast(&block_graph, block_graph_entry);
|
|
|
|
// 3. Compute dominance frontiers
|
|
let dom_frontiers = compute_dominance_frontiers(num_blocks, &block_preds, &doms, &block_graph);
|
|
|
|
// 4. Collect variable definitions per block (skip nop nodes)
|
|
let mut var_defs = collect_var_defs(cfg, &blocks_nodes, &nop_nodes);
|
|
|
|
// 4b. For per-function scope: identify external variables (used but not defined)
|
|
// and inject synthetic Param defs at entry block so rename can find them.
|
|
// When formal_params is supplied, reorder so formal params come first in
|
|
// declaration order, this makes Param indices correspond to call-site positions.
|
|
//
|
|
let external_vars = if scope.is_some() && !scope_all && !scope_nop {
|
|
let raw = identify_external_uses(cfg, &blocks_nodes, &var_defs);
|
|
reorder_external_vars(raw, formal_params)
|
|
} else {
|
|
vec![]
|
|
};
|
|
// Register external vars as defined in block 0 so phi insertion considers them
|
|
for var in &external_vars {
|
|
var_defs.entry(var.clone()).or_default().insert(0);
|
|
}
|
|
|
|
// 5. Phi insertion (Cytron algorithm)
|
|
let phi_placements = insert_phis(&var_defs, &dom_frontiers, num_blocks);
|
|
|
|
// 6. Rename variables (dominator tree preorder walk)
|
|
let dom_tree_children = build_dom_tree_children(num_blocks, &doms, &block_graph);
|
|
let (
|
|
mut ssa_blocks,
|
|
mut value_defs,
|
|
cfg_node_map,
|
|
field_interner,
|
|
field_writes,
|
|
synthetic_externals,
|
|
slot_scoped_assigns,
|
|
) = rename_variables(
|
|
cfg,
|
|
&blocks_nodes,
|
|
&block_succs,
|
|
&block_preds,
|
|
&phi_placements,
|
|
&dom_tree_children,
|
|
&filtered_edges,
|
|
&external_vars,
|
|
formal_params,
|
|
with_params,
|
|
&nop_nodes,
|
|
);
|
|
|
|
// 6b. Fill any missing phi operands with a shared Undef sentinel so
|
|
// every phi has exactly one operand per predecessor. See
|
|
// `fill_undef_phi_operands` for the invariant rationale.
|
|
fill_undef_phi_operands(
|
|
&mut ssa_blocks,
|
|
&block_preds,
|
|
&mut value_defs,
|
|
&blocks_nodes,
|
|
);
|
|
|
|
// 7. Fill in preds/succs on SsaBlocks
|
|
for bid in 0..num_blocks {
|
|
let id = BlockId(bid as u32);
|
|
ssa_blocks[bid].id = id;
|
|
ssa_blocks[bid].preds = block_preds[bid]
|
|
.iter()
|
|
.map(|&b| BlockId(b as u32))
|
|
.collect();
|
|
ssa_blocks[bid].succs = block_succs[bid]
|
|
.iter()
|
|
.map(|&b| BlockId(b as u32))
|
|
.collect();
|
|
}
|
|
|
|
// 7b. Debug assertions: verify structural invariants.
|
|
// The helper body is `debug_assert!` only, so it's a no-op in release ,
|
|
// call unconditionally to avoid a dead_code warning when the lib is
|
|
// built without `--tests`.
|
|
debug_assert_bfs_ordering(&block_preds);
|
|
// Phi operand counts are a release-level invariant: every phi must
|
|
// have exactly one operand per predecessor. Missing operands are
|
|
// filled with an explicit Undef sentinel in
|
|
// `fill_undef_phi_operands`; extra operands would reference
|
|
// nonexistent predecessors and corrupt analysis silently.
|
|
assert_phi_operand_counts(&ssa_blocks, &block_preds);
|
|
|
|
// 8. Map exception edges from CFG node indices to SSA block IDs
|
|
let exception_edges: Vec<(BlockId, BlockId)> = raw_exception_edges
|
|
.iter()
|
|
.filter_map(|(src_node, catch_node)| {
|
|
let src_block = block_of_node.get(src_node)?;
|
|
let catch_block = block_of_node.get(catch_node)?;
|
|
Some((BlockId(*src_block as u32), BlockId(*catch_block as u32)))
|
|
})
|
|
.collect();
|
|
|
|
let body = SsaBody {
|
|
blocks: ssa_blocks,
|
|
entry: BlockId(0),
|
|
value_defs,
|
|
cfg_node_map,
|
|
exception_edges,
|
|
field_interner,
|
|
field_writes,
|
|
synthetic_externals,
|
|
slot_scoped_assigns,
|
|
};
|
|
|
|
// 9. Catch-block reachability invariant.
|
|
//
|
|
// A CatchParam-carrying block that is neither reachable from entry nor
|
|
// listed as an exception target indicates a CFG construction bug. Debug
|
|
// builds panic loudly; release builds warn, record an engine note so
|
|
// downstream findings carry "SSA lowering bailed" provenance, and fall
|
|
// through to the existing orphan handling above (the "all definitions"
|
|
// fallback) which remains sound for taint reachability.
|
|
check_catch_block_reachability_gated(&body);
|
|
|
|
Ok(body)
|
|
}
|
|
|
|
/// Runtime gate around [`check_catch_block_reachability`] that panics in
|
|
/// debug builds and warns + records an engine note in release builds.
|
|
///
|
|
/// The current lowering's orphan handling (`process_block` fallback in
|
|
/// `rename_variables`) already widens to an "all definitions" conservative
|
|
/// state for blocks without predecessors. That preserves soundness for
|
|
/// taint reachability but masks CFG-builder bugs: this gate surfaces them.
|
|
fn check_catch_block_reachability_gated(body: &SsaBody) {
|
|
let result = super::invariants::check_catch_block_reachability(body);
|
|
if let Err(err) = result {
|
|
#[cfg(debug_assertions)]
|
|
{
|
|
if !catch_invariant_do_not_panic() {
|
|
panic!(
|
|
"SSA catch-block reachability invariant violated:\n{}",
|
|
err.joined()
|
|
);
|
|
}
|
|
}
|
|
tracing::warn!(
|
|
violations = %err.joined(),
|
|
"SSA catch-block reachability invariant violated; proceeding with \
|
|
conservative orphan fallback"
|
|
);
|
|
crate::taint::ssa_transfer::record_engine_note(
|
|
crate::engine_notes::EngineNote::SsaLoweringBailed {
|
|
reason: format!("catch_block_orphan: {}", err.joined()),
|
|
},
|
|
);
|
|
}
|
|
}
|
|
|
|
// Test-only escape hatch: when set, `check_catch_block_reachability_gated`
|
|
// takes the release-build path (warn + engine note, no panic) even under
|
|
// `debug_assertions`. Used by the invariant test that constructs a
|
|
// synthetic orphan catch body.
|
|
#[cfg(debug_assertions)]
|
|
thread_local! {
|
|
static CATCH_INVARIANT_DO_NOT_PANIC: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
|
|
}
|
|
|
|
#[cfg(debug_assertions)]
|
|
#[allow(dead_code)]
|
|
pub(crate) fn set_catch_invariant_do_not_panic(on: bool) {
|
|
CATCH_INVARIANT_DO_NOT_PANIC.with(|c| c.set(on));
|
|
}
|
|
|
|
#[cfg(debug_assertions)]
|
|
fn catch_invariant_do_not_panic() -> bool {
|
|
CATCH_INVARIANT_DO_NOT_PANIC.with(|c| c.get())
|
|
}
|
|
|
|
/// Collect reachable nodes (BFS from entry), filtering by scope and stripping exception edges.
|
|
/// Returns (reachable set, filtered edges, exception edges as (src_node, catch_node)).
|
|
fn collect_reachable(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
scope: Option<&str>,
|
|
scope_all: bool,
|
|
) -> (
|
|
HashSet<NodeIndex>,
|
|
Vec<(NodeIndex, NodeIndex, EdgeKind)>,
|
|
Vec<(NodeIndex, NodeIndex)>,
|
|
) {
|
|
let mut reachable = HashSet::new();
|
|
let mut edges = Vec::new();
|
|
let mut exception_edges = Vec::new();
|
|
let mut queue = VecDeque::new();
|
|
|
|
// Check if a node is in scope
|
|
let in_scope = |node: NodeIndex| -> bool {
|
|
if scope_all {
|
|
return true;
|
|
}
|
|
let info = &cfg[node];
|
|
match scope {
|
|
None => info.ast.enclosing_func.is_none(),
|
|
Some(name) => info.ast.enclosing_func.as_deref() == Some(name),
|
|
}
|
|
};
|
|
|
|
if !in_scope(entry) && !scope_all {
|
|
// Entry must be in scope; for top-level, Entry node often has no enclosing_func
|
|
// Accept Entry/Exit nodes regardless of scope
|
|
if !matches!(cfg[entry].kind, StmtKind::Entry | StmtKind::Exit) {
|
|
return (reachable, edges, exception_edges);
|
|
}
|
|
}
|
|
|
|
reachable.insert(entry);
|
|
queue.push_back(entry);
|
|
|
|
while let Some(node) = queue.pop_front() {
|
|
for edge in cfg.edges(node) {
|
|
let kind = *edge.weight();
|
|
let target = edge.target();
|
|
|
|
// Strip exception edges from the graph, but still visit targets
|
|
// so catch-block nodes are included in the SSA body.
|
|
if matches!(kind, EdgeKind::Exception) {
|
|
if (in_scope(target)
|
|
|| matches!(cfg[target].kind, StmtKind::Entry | StmtKind::Exit))
|
|
&& reachable.insert(target)
|
|
{
|
|
queue.push_back(target);
|
|
}
|
|
// Record exception edge for taint seeding
|
|
exception_edges.push((node, target));
|
|
continue;
|
|
}
|
|
|
|
// Allow Entry/Exit nodes and nodes in scope
|
|
if !in_scope(target) && !matches!(cfg[target].kind, StmtKind::Entry | StmtKind::Exit) {
|
|
continue;
|
|
}
|
|
|
|
edges.push((node, target, kind));
|
|
|
|
if reachable.insert(target) {
|
|
queue.push_back(target);
|
|
}
|
|
}
|
|
}
|
|
|
|
(reachable, edges, exception_edges)
|
|
}
|
|
|
|
/// Form basic blocks from filtered CFG nodes.
|
|
///
|
|
/// Returns:
|
|
/// - blocks_nodes: Vec<Vec<NodeIndex>>, nodes per block (in order)
|
|
/// - block_of_node: HashMap<NodeIndex, usize>, node → block index
|
|
/// - block_succs: Vec<Vec<usize>>, successors per block
|
|
/// - block_preds: Vec<Vec<usize>>, predecessors per block
|
|
fn form_blocks(
|
|
cfg: &Cfg,
|
|
entry: NodeIndex,
|
|
reachable: &HashSet<NodeIndex>,
|
|
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
|
) -> (
|
|
Vec<Vec<NodeIndex>>,
|
|
HashMap<NodeIndex, usize>,
|
|
Vec<Vec<usize>>,
|
|
Vec<Vec<usize>>,
|
|
) {
|
|
// Build adjacency from filtered edges
|
|
let mut successors: HashMap<NodeIndex, Vec<(NodeIndex, EdgeKind)>> = HashMap::new();
|
|
let mut in_degree: HashMap<NodeIndex, usize> = HashMap::new();
|
|
let mut has_branching_in: HashMap<NodeIndex, bool> = HashMap::new();
|
|
|
|
for node in reachable {
|
|
in_degree.entry(*node).or_insert(0);
|
|
has_branching_in.entry(*node).or_insert(false);
|
|
}
|
|
|
|
// CFG construction wires every Return / Throw node to the synthetic
|
|
// function-exit node via a `Seq` edge so the underlying graph is a single
|
|
// connected component. Those edges are bookkeeping only: control flow
|
|
// does not actually fall through a Return into the exit block. Treating
|
|
// them as block successors causes an early-return block to share its
|
|
// post-exit body with the function's fall-through tail, silently merging
|
|
// two distinct paths into one (the "merged-return" defect). Strip them
|
|
// here so block-level adjacency reflects real control flow; the SSA
|
|
// terminator for the containing block becomes Return / Unreachable
|
|
// instead of Goto(exit).
|
|
let is_terminating =
|
|
|n: NodeIndex| -> bool { matches!(cfg[n].kind, StmtKind::Return | StmtKind::Throw) };
|
|
|
|
for &(src, tgt, kind) in filtered_edges {
|
|
if is_terminating(src) {
|
|
continue;
|
|
}
|
|
successors.entry(src).or_default().push((tgt, kind));
|
|
*in_degree.entry(tgt).or_insert(0) += 1;
|
|
if matches!(kind, EdgeKind::True | EdgeKind::False | EdgeKind::Back) {
|
|
*has_branching_in.entry(tgt).or_insert(false) = true;
|
|
}
|
|
}
|
|
|
|
// Determine block leaders
|
|
let mut is_leader: HashSet<NodeIndex> = HashSet::new();
|
|
is_leader.insert(entry); // entry is always a leader
|
|
|
|
for &node in reachable {
|
|
let in_deg = in_degree.get(&node).copied().unwrap_or(0);
|
|
if in_deg > 1 || has_branching_in.get(&node).copied().unwrap_or(false) {
|
|
is_leader.insert(node);
|
|
}
|
|
// Orphan nodes (reachable via exception edges but no filtered predecessors)
|
|
// must be leaders so they get their own block (e.g. catch block entries).
|
|
if in_deg == 0 && node != entry {
|
|
is_leader.insert(node);
|
|
}
|
|
// Node following a multi-exit node
|
|
let succs = successors.get(&node).map(|s| s.len()).unwrap_or(0);
|
|
if succs > 1 {
|
|
for &(tgt, _) in successors.get(&node).unwrap_or(&vec![]) {
|
|
is_leader.insert(tgt);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build blocks by following single-successor Seq edges from each leader
|
|
let mut blocks_nodes: Vec<Vec<NodeIndex>> = Vec::new();
|
|
let mut block_of_node: HashMap<NodeIndex, usize> = HashMap::new();
|
|
let mut visited: HashSet<NodeIndex> = HashSet::new();
|
|
|
|
// BFS order to assign blocks deterministically (entry first)
|
|
let mut leader_queue: VecDeque<NodeIndex> = VecDeque::new();
|
|
leader_queue.push_back(entry);
|
|
let mut leader_visited: HashSet<NodeIndex> = HashSet::new();
|
|
leader_visited.insert(entry);
|
|
|
|
// Discover leaders in BFS order over `cfg`, but skip edges whose
|
|
// source is a terminating (Return / Throw) node. Walking the raw
|
|
// `cfg` directly here would re-introduce the bookkeeping
|
|
// Return/Throw → fn_exit edges we just stripped, fn_exit (or any
|
|
// post-return join) would be discovered through them and assigned a
|
|
// block ID before its true block-level predecessors, breaking the
|
|
// BFS-forward-pred invariant (`debug_assert_bfs_ordering`).
|
|
//
|
|
// We can't simply BFS our `successors` map because that excludes
|
|
// exception edges entirely (collect_reachable strips them and records
|
|
// them separately in `exception_edges`). Catch-block nodes are still
|
|
// in `reachable` and must be discoverable as leaders via the
|
|
// try-body → catch path, only the terminating-source bookkeeping
|
|
// edges are bogus.
|
|
{
|
|
let mut bfs_queue: VecDeque<NodeIndex> = VecDeque::new();
|
|
let mut bfs_seen: HashSet<NodeIndex> = HashSet::new();
|
|
bfs_queue.push_back(entry);
|
|
bfs_seen.insert(entry);
|
|
while let Some(node) = bfs_queue.pop_front() {
|
|
if reachable.contains(&node) && is_leader.contains(&node) && leader_visited.insert(node)
|
|
{
|
|
leader_queue.push_back(node);
|
|
}
|
|
if is_terminating(node) {
|
|
continue;
|
|
}
|
|
for edge in cfg.edges(node) {
|
|
let tgt = edge.target();
|
|
if reachable.contains(&tgt) && bfs_seen.insert(tgt) {
|
|
bfs_queue.push_back(tgt);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Belt-and-braces: any leader still unvisited gets appended in
|
|
// CFG-node-index order so block-ID assignment remains
|
|
// deterministic. We do NOT include the synthetic function-exit
|
|
// node when it is unreachable through filtered edges, that
|
|
// happens whenever every path in the body terminates explicitly
|
|
// (e.g. a function whose only return is `return buf.toString()`
|
|
// at the tail). Including it would emit an orphan SSA block
|
|
// with no real predecessors and no semantic meaning, which the
|
|
// structural reachability invariant correctly rejects.
|
|
// Genuine orphan handlers (catch blocks reached via stripped
|
|
// exception edges) keep their entries here.
|
|
let mut orphan_leaders: Vec<NodeIndex> = is_leader
|
|
.iter()
|
|
.copied()
|
|
.filter(|n| !leader_visited.contains(n))
|
|
.filter(|n| !matches!(cfg[*n].kind, StmtKind::Exit))
|
|
.collect();
|
|
orphan_leaders.sort_by_key(|n| n.index());
|
|
for n in orphan_leaders {
|
|
if leader_visited.insert(n) {
|
|
leader_queue.push_back(n);
|
|
}
|
|
}
|
|
}
|
|
|
|
for leader in leader_queue {
|
|
if visited.contains(&leader) {
|
|
continue;
|
|
}
|
|
|
|
let block_idx = blocks_nodes.len();
|
|
let mut block = vec![leader];
|
|
visited.insert(leader);
|
|
block_of_node.insert(leader, block_idx);
|
|
|
|
// Follow single-successor Seq edges
|
|
let mut current = leader;
|
|
loop {
|
|
let succs = successors.get(¤t).cloned().unwrap_or_default();
|
|
if succs.len() == 1
|
|
&& matches!(succs[0].1, EdgeKind::Seq)
|
|
&& !is_leader.contains(&succs[0].0)
|
|
{
|
|
let next = succs[0].0;
|
|
if visited.insert(next) {
|
|
block.push(next);
|
|
block_of_node.insert(next, block_idx);
|
|
current = next;
|
|
} else {
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
blocks_nodes.push(block);
|
|
}
|
|
|
|
// Build block-level successor/predecessor lists
|
|
let num_blocks = blocks_nodes.len();
|
|
let mut block_succs: Vec<Vec<usize>> = vec![vec![]; num_blocks];
|
|
let mut block_preds: Vec<Vec<usize>> = vec![vec![]; num_blocks];
|
|
|
|
for &(src, tgt, _kind) in filtered_edges {
|
|
// Mirror the adjacency-construction filter above: edges out of
|
|
// Return/Throw CFG nodes are not real successors at the block level.
|
|
if is_terminating(src) {
|
|
continue;
|
|
}
|
|
if let (Some(&src_blk), Some(&tgt_blk)) = (block_of_node.get(&src), block_of_node.get(&tgt))
|
|
{
|
|
if src_blk != tgt_blk && !block_succs[src_blk].contains(&tgt_blk) {
|
|
block_succs[src_blk].push(tgt_blk);
|
|
block_preds[tgt_blk].push(src_blk);
|
|
}
|
|
}
|
|
}
|
|
|
|
(blocks_nodes, block_of_node, block_succs, block_preds)
|
|
}
|
|
|
|
/// Build a block-level petgraph for dominator computation.
|
|
fn build_block_graph(
|
|
num_blocks: usize,
|
|
block_succs: &[Vec<usize>],
|
|
_entry: BlockId,
|
|
) -> (Graph<BlockId, ()>, NodeIndex) {
|
|
let mut g: Graph<BlockId, ()> = Graph::new();
|
|
let mut block_nodes: Vec<NodeIndex> = Vec::with_capacity(num_blocks);
|
|
|
|
for i in 0..num_blocks {
|
|
block_nodes.push(g.add_node(BlockId(i as u32)));
|
|
}
|
|
|
|
for (i, succs) in block_succs.iter().enumerate() {
|
|
for &s in succs {
|
|
g.add_edge(block_nodes[i], block_nodes[s], ());
|
|
}
|
|
}
|
|
|
|
let entry_gnode = block_nodes[0]; // block 0 is always entry
|
|
(g, entry_gnode)
|
|
}
|
|
|
|
/// Compute dominance frontiers for all blocks.
|
|
fn compute_dominance_frontiers(
|
|
num_blocks: usize,
|
|
block_preds: &[Vec<usize>],
|
|
doms: &Dominators<NodeIndex>,
|
|
block_graph: &Graph<BlockId, ()>,
|
|
) -> Vec<HashSet<usize>> {
|
|
let mut df: Vec<HashSet<usize>> = vec![HashSet::new(); num_blocks];
|
|
|
|
// Map block index → graph NodeIndex
|
|
let block_node: Vec<NodeIndex> = block_graph.node_indices().collect();
|
|
|
|
for n in 0..num_blocks {
|
|
let preds = &block_preds[n];
|
|
if preds.len() >= 2 {
|
|
for &p in preds {
|
|
let mut runner = p;
|
|
// idom(n) in the block graph
|
|
let n_gnode = block_node[n];
|
|
let idom_n = doms.immediate_dominator(n_gnode);
|
|
loop {
|
|
let runner_gnode = block_node[runner];
|
|
if idom_n == Some(runner_gnode) {
|
|
break;
|
|
}
|
|
df[runner].insert(n);
|
|
// Move runner to its immediate dominator
|
|
match doms.immediate_dominator(runner_gnode) {
|
|
Some(idom_runner) if idom_runner != runner_gnode => {
|
|
// Find block index from graph node
|
|
runner = block_graph[idom_runner].0 as usize;
|
|
}
|
|
_ => break, // reached root
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
df
|
|
}
|
|
|
|
/// Identify variables used but not defined within the scoped blocks.
|
|
/// These represent external (e.g. global/top-level) variables that need
|
|
/// synthetic Param instructions so the SSA rename pass can reference them.
|
|
fn identify_external_uses(
|
|
cfg: &Cfg,
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
|
var_defs: &BTreeMap<String, HashSet<usize>>,
|
|
) -> Vec<String> {
|
|
let mut used: HashSet<String> = HashSet::new();
|
|
for nodes in blocks_nodes {
|
|
for &node in nodes {
|
|
for u in &cfg[node].taint.uses {
|
|
used.insert(u.clone());
|
|
}
|
|
}
|
|
}
|
|
// External = used but never defined in any block
|
|
let mut external: Vec<String> = used
|
|
.into_iter()
|
|
.filter(|u| !var_defs.contains_key(u))
|
|
.collect();
|
|
external.sort(); // deterministic order
|
|
external
|
|
}
|
|
|
|
/// True iff `name` is a language-reserved method receiver identifier
|
|
/// (Rust/Python `self`, JS/TS/Java/PHP/C++ `this`).
|
|
///
|
|
/// Receivers get their own IR node ([`SsaOp::SelfParam`]) and are therefore
|
|
/// tracked as a distinct channel from positional parameters. Keeping the
|
|
/// check localised to one helper ensures the set of receiver names stays
|
|
/// consistent across lowering and summary extraction.
|
|
pub(crate) fn is_receiver_name(name: &str) -> bool {
|
|
matches!(name, "self" | "this")
|
|
}
|
|
|
|
/// Reorder external variables so the receiver (`self`/`this`) comes first,
|
|
/// followed by formal positional parameters in declaration order, followed
|
|
/// by remaining external vars in alphabetical order.
|
|
///
|
|
/// This fixed order is what the synthetic-parameter injection step relies
|
|
/// on to emit one [`SsaOp::SelfParam`] (for the leading receiver slot, when
|
|
/// present) followed by a contiguous run of [`SsaOp::Param { index }`] values
|
|
/// whose indices 0..N correspond exactly to positional call-site argument
|
|
/// positions, no receiver offset required anywhere downstream.
|
|
///
|
|
/// W1.b: every formal parameter gets a Param op even when the body never
|
|
/// references it directly. Without this, the *first* `obj.f = rhs` on a
|
|
/// formal `obj` whose body never reads `obj` produces no W1
|
|
/// `field_writes` entry, `var_stacks["obj"]` is empty when the synth
|
|
/// Assign runs because no external-use path interned `obj`. Subsequent
|
|
/// writes work because the synth Assign itself defines `obj`, so the
|
|
/// gap is exactly the FIRST write. Always emitting a formal Param at
|
|
/// block 0 closes that gap.
|
|
fn reorder_external_vars(external: Vec<String>, formal_params: &[String]) -> Vec<String> {
|
|
if formal_params.is_empty() {
|
|
return external; // no reordering, preserve existing alphabetical sort
|
|
}
|
|
let ext_set: HashSet<&str> = external.iter().map(|s| s.as_str()).collect();
|
|
let formal_set: HashSet<&str> = formal_params.iter().map(|s| s.as_str()).collect();
|
|
let mut result = Vec::with_capacity(external.len());
|
|
// Receiver first (highest priority), regardless of whether it appears in
|
|
// formal_params or was discovered purely as an external reference.
|
|
// Languages with explicit self (Rust/Python) put it in formal_params;
|
|
// languages with implicit this (JS/TS/Java/PHP) have it only as an
|
|
// external reference. Either way, SelfParam should be emitted first.
|
|
if ext_set.contains("self") || formal_set.contains("self") {
|
|
result.push("self".to_string());
|
|
} else if ext_set.contains("this") || formal_set.contains("this") {
|
|
result.push("this".to_string());
|
|
}
|
|
// Formal positional params next (declaration order), skipping any
|
|
// receiver that was already emitted above. W1.b: include EVERY
|
|
// formal regardless of whether the body uses it externally, an
|
|
// unused formal that gets field-written via `obj.cache = rhs` still
|
|
// needs a Param op so the synth Assign loop sees its prior reaching
|
|
// def in `var_stacks`.
|
|
for p in formal_params {
|
|
if is_receiver_name(p) {
|
|
continue;
|
|
}
|
|
result.push(p.clone());
|
|
}
|
|
// Remaining external vars alphabetically (external is already sorted),
|
|
// excluding anything already placed.
|
|
let placed: HashSet<String> = result.iter().cloned().collect();
|
|
for v in external {
|
|
if placed.contains(&v) {
|
|
continue;
|
|
}
|
|
if !formal_set.contains(v.as_str()) && !is_receiver_name(&v) {
|
|
result.push(v);
|
|
}
|
|
}
|
|
result
|
|
}
|
|
|
|
/// Collect variable definitions per block: var_name → set of block indices.
|
|
/// Nodes in `nop_nodes` are skipped (they won't define variables in SSA).
|
|
fn collect_var_defs(
|
|
cfg: &Cfg,
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
|
nop_nodes: &HashSet<NodeIndex>,
|
|
) -> BTreeMap<String, HashSet<usize>> {
|
|
let mut defs: BTreeMap<String, HashSet<usize>> = BTreeMap::new();
|
|
|
|
for (block_idx, nodes) in blocks_nodes.iter().enumerate() {
|
|
for &node in nodes {
|
|
if nop_nodes.contains(&node) {
|
|
continue;
|
|
}
|
|
if let Some(ref d) = cfg[node].taint.defines {
|
|
defs.entry(d.clone()).or_default().insert(block_idx);
|
|
// Register parent prefixes for synthetic base updates on field writes.
|
|
// E.g. `obj.data` also registers `obj` so phi insertion works correctly.
|
|
let mut path = d.as_str();
|
|
while let Some(dot_pos) = path.rfind('.') {
|
|
path = &path[..dot_pos];
|
|
defs.entry(path.to_string()).or_default().insert(block_idx);
|
|
}
|
|
}
|
|
// Register extra defines from destructuring patterns.
|
|
for ed in &cfg[node].taint.extra_defines {
|
|
defs.entry(ed.clone()).or_default().insert(block_idx);
|
|
}
|
|
// Implicit definitions for uninitialized declarations (e.g., C/C++
|
|
// `char buf[256]`). The variable appears in uses but not defines
|
|
// because def_use() doesn't treat declarations without initializers
|
|
// as definitions. Registering here ensures phi insertion at join points.
|
|
if cfg[node].taint.defines.is_none()
|
|
&& cfg[node].call.callee.is_none()
|
|
&& cfg[node].kind == StmtKind::Seq
|
|
&& cfg[node].taint.uses.len() == 1
|
|
{
|
|
defs.entry(cfg[node].taint.uses[0].clone())
|
|
.or_default()
|
|
.insert(block_idx);
|
|
}
|
|
}
|
|
}
|
|
|
|
defs
|
|
}
|
|
|
|
/// Cytron-style phi insertion: returns phi_placements[block] = set of var names needing phis.
|
|
///
|
|
/// Returns a `BTreeSet<String>` per block so downstream consumers that iterate
|
|
/// the set (notably `rename_variables`) observe a deterministic, alphabetical
|
|
/// order regardless of the underlying hasher state. The Cytron algorithm
|
|
/// itself is order-independent, only its observers are.
|
|
fn insert_phis(
|
|
var_defs: &BTreeMap<String, HashSet<usize>>,
|
|
dom_frontiers: &[HashSet<usize>],
|
|
_num_blocks: usize,
|
|
) -> Vec<BTreeSet<String>> {
|
|
let num_blocks = dom_frontiers.len();
|
|
let mut phi_placements: Vec<BTreeSet<String>> = vec![BTreeSet::new(); num_blocks];
|
|
|
|
for (var, def_blocks) in var_defs {
|
|
let mut worklist: VecDeque<usize> = def_blocks.iter().copied().collect();
|
|
let mut has_phi: HashSet<usize> = HashSet::new();
|
|
|
|
while let Some(b) = worklist.pop_front() {
|
|
for &f in &dom_frontiers[b] {
|
|
if has_phi.insert(f) {
|
|
phi_placements[f].insert(var.clone());
|
|
// Phi is a new definition, add to worklist
|
|
if !def_blocks.contains(&f) {
|
|
worklist.push_back(f);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
phi_placements
|
|
}
|
|
|
|
/// Build dominator tree children lists.
|
|
fn build_dom_tree_children(
|
|
num_blocks: usize,
|
|
doms: &Dominators<NodeIndex>,
|
|
block_graph: &Graph<BlockId, ()>,
|
|
) -> Vec<Vec<usize>> {
|
|
let mut children: Vec<Vec<usize>> = vec![vec![]; num_blocks];
|
|
let block_nodes: Vec<NodeIndex> = block_graph.node_indices().collect();
|
|
|
|
for i in 0..num_blocks {
|
|
if let Some(idom) = doms.immediate_dominator(block_nodes[i]) {
|
|
let idom_idx = block_graph[idom].0 as usize;
|
|
if idom_idx != i {
|
|
children[idom_idx].push(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
children
|
|
}
|
|
|
|
/// Rename variables: dominator tree preorder walk with per-variable stacks.
|
|
///
|
|
/// Returns (ssa_blocks, value_defs, cfg_node_map).
|
|
fn rename_variables(
|
|
cfg: &Cfg,
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
|
block_succs: &[Vec<usize>],
|
|
block_preds: &[Vec<usize>],
|
|
phi_placements: &[BTreeSet<String>],
|
|
dom_tree_children: &[Vec<usize>],
|
|
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
|
external_vars: &[String],
|
|
formal_params: &[String],
|
|
with_params: bool,
|
|
nop_nodes: &HashSet<NodeIndex>,
|
|
) -> (
|
|
Vec<SsaBlock>,
|
|
Vec<ValueDef>,
|
|
HashMap<NodeIndex, SsaValue>,
|
|
crate::ssa::ir::FieldInterner,
|
|
HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
|
|
HashSet<SsaValue>,
|
|
HashSet<SsaValue>,
|
|
) {
|
|
let num_blocks = blocks_nodes.len();
|
|
let mut next_value: u32 = 0;
|
|
let mut value_defs: Vec<ValueDef> = Vec::new();
|
|
let mut cfg_node_map: HashMap<NodeIndex, SsaValue> = HashMap::new();
|
|
// Per-body interner for FieldProj field names; populated when the
|
|
// member-access decomposition (try_lower_field_proj_chain) emits a
|
|
// chain for chained-receiver method calls (`a.b.c()`), and remains
|
|
// empty otherwise so existing per-statement Call lowering is
|
|
// bit-for-bit unchanged.
|
|
let mut field_interner = crate::ssa::ir::FieldInterner::new();
|
|
//side-table mapping each synthetic base-update
|
|
// [`SsaOp::Assign`]'s defined value to its `(receiver, field)` pair.
|
|
// Populated below at the synthetic-Assign emission site. Read by
|
|
// the taint engine to lift the assign into a structural field WRITE.
|
|
let mut field_writes: HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)> = HashMap::new();
|
|
// SSA values whose `Assign` comes from a bare-array destructure
|
|
// slot-scoped kill arm; the taint engine consults this set to skip
|
|
// outer-node Source label pickup while still unioning operand taint.
|
|
let mut slot_scoped_assigns: HashSet<SsaValue> = HashSet::new();
|
|
|
|
// Per-variable rename stacks
|
|
let mut var_stacks: HashMap<String, Vec<SsaValue>> = HashMap::new();
|
|
|
|
// Pre-allocate SSA blocks
|
|
let mut ssa_blocks: Vec<SsaBlock> = (0..num_blocks)
|
|
.map(|i| SsaBlock {
|
|
id: BlockId(i as u32),
|
|
phis: Vec::new(),
|
|
body: Vec::new(),
|
|
terminator: Terminator::Unreachable,
|
|
preds: SmallVec::new(),
|
|
succs: SmallVec::new(),
|
|
})
|
|
.collect();
|
|
|
|
// `BTreeMap` guarantees a deterministic (alphabetical) iteration order when
|
|
// pushing phi values onto `var_stacks` and when filling operands on
|
|
// successor phis, both sites are observable in SSA numbering if they
|
|
// reordered between runs.
|
|
let mut phi_values: Vec<BTreeMap<String, SsaValue>> = vec![BTreeMap::new(); num_blocks];
|
|
|
|
// Pre-create phi instructions for all blocks (operands filled during rename)
|
|
for (block_idx, vars) in phi_placements.iter().enumerate() {
|
|
let block_id = BlockId(block_idx as u32);
|
|
let cfg_node = blocks_nodes[block_idx][0]; // anchor to first node
|
|
for var in vars {
|
|
let v = SsaValue(next_value);
|
|
next_value += 1;
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(var.clone()),
|
|
cfg_node,
|
|
block: block_id,
|
|
});
|
|
phi_values[block_idx].insert(var.clone(), v);
|
|
ssa_blocks[block_idx].phis.push(SsaInst {
|
|
value: v,
|
|
op: SsaOp::Phi(SmallVec::new()),
|
|
cfg_node,
|
|
var_name: Some(var.clone()),
|
|
span: cfg[cfg_node].ast.span,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Process blocks in dominator tree preorder
|
|
// We need to track stack depths to restore after processing subtrees
|
|
// Use iterative approach: process block, then process children, restore
|
|
|
|
// Simpler approach: preorder walk with explicit save/restore
|
|
fn process_block(
|
|
block_idx: usize,
|
|
cfg: &Cfg,
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
|
block_succs: &[Vec<usize>],
|
|
block_preds: &[Vec<usize>],
|
|
phi_placements: &[BTreeSet<String>],
|
|
dom_tree_children: &[Vec<usize>],
|
|
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
|
var_stacks: &mut HashMap<String, Vec<SsaValue>>,
|
|
ssa_blocks: &mut [SsaBlock],
|
|
phi_values: &mut [BTreeMap<String, SsaValue>],
|
|
value_defs: &mut Vec<ValueDef>,
|
|
cfg_node_map: &mut HashMap<NodeIndex, SsaValue>,
|
|
next_value: &mut u32,
|
|
nop_nodes: &HashSet<NodeIndex>,
|
|
field_interner: &mut crate::ssa::ir::FieldInterner,
|
|
field_writes: &mut HashMap<SsaValue, (SsaValue, crate::ssa::ir::FieldId)>,
|
|
slot_scoped_assigns: &mut HashSet<SsaValue>,
|
|
) {
|
|
let block_id = BlockId(block_idx as u32);
|
|
|
|
// Save stack depths for rollback
|
|
let saved: Vec<(String, usize)> = var_stacks
|
|
.iter()
|
|
.map(|(k, v)| (k.clone(), v.len()))
|
|
.collect();
|
|
|
|
// 1. Push pre-created phi values onto var stacks
|
|
for (var, &v) in &phi_values[block_idx] {
|
|
var_stacks.entry(var.clone()).or_default().push(v);
|
|
}
|
|
|
|
// 2. Process body nodes
|
|
for &node in &blocks_nodes[block_idx] {
|
|
let info = &cfg[node];
|
|
|
|
// Helper: build Call args from arg_uses, falling back to info.taint.uses
|
|
let build_call_args = |info: &crate::cfg::NodeInfo,
|
|
var_stacks: &HashMap<String, Vec<SsaValue>>|
|
|
-> (Vec<SmallVec<[SsaValue; 2]>>, Option<SsaValue>) {
|
|
let receiver = info
|
|
.call
|
|
.receiver
|
|
.as_ref()
|
|
.and_then(|r| var_stacks.get(r).and_then(|s| s.last().copied()));
|
|
let args = if !info.call.arg_uses.is_empty() {
|
|
let mut args: Vec<SmallVec<[SsaValue; 2]>> = info
|
|
.call
|
|
.arg_uses
|
|
.iter()
|
|
.map(|arg_idents| {
|
|
arg_idents
|
|
.iter()
|
|
.filter_map(|ident| {
|
|
var_stacks.get(ident).and_then(|s| s.last().copied())
|
|
})
|
|
.collect()
|
|
})
|
|
.collect();
|
|
// For chained calls (e.g. fetch(url).then(fn)), arg_uses only
|
|
// captures the final call's args. Variables used by intermediate
|
|
// calls (like `url` in fetch) are in info.taint.uses but not arg_uses.
|
|
// Add them as an extra group so sink detection can see them.
|
|
//
|
|
// Exclude the receiver ident: it's carried on its own typed
|
|
// channel (`SsaOp::Call.receiver`). Callers that care about
|
|
// positional arity must read it from `info.call.arg_uses.len()`,
|
|
// not `args.len()`, since this implicit group inflates args.
|
|
let arg_uses_flat: HashSet<&str> = info
|
|
.call
|
|
.arg_uses
|
|
.iter()
|
|
.flat_map(|g| g.iter().map(|s| s.as_str()))
|
|
.collect();
|
|
let receiver_ident = info.call.receiver.as_deref();
|
|
let implicit: SmallVec<[SsaValue; 2]> = info
|
|
.taint
|
|
.uses
|
|
.iter()
|
|
.filter(|u| !arg_uses_flat.contains(u.as_str()))
|
|
.filter(|u| Some(u.as_str()) != receiver_ident)
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
|
.collect();
|
|
if !implicit.is_empty() {
|
|
args.push(implicit);
|
|
}
|
|
args
|
|
} else {
|
|
// Fallback: treat all uses as a single argument group
|
|
let all_uses: SmallVec<[SsaValue; 2]> = info
|
|
.taint
|
|
.uses
|
|
.iter()
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
|
.collect();
|
|
if all_uses.is_empty() {
|
|
vec![]
|
|
} else {
|
|
vec![all_uses]
|
|
}
|
|
};
|
|
(args, receiver)
|
|
};
|
|
|
|
// Determine operation and collect uses
|
|
// Out-of-scope nodes (nop_nodes) become Nop: they preserve graph
|
|
// connectivity but don't participate in taint flow.
|
|
let op = if nop_nodes.contains(&node) {
|
|
SsaOp::Nop
|
|
} else if info.catch_param {
|
|
SsaOp::CatchParam
|
|
} else if info
|
|
.taint
|
|
.labels
|
|
.iter()
|
|
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)))
|
|
&& info.call.callee.is_none()
|
|
{
|
|
// Pure source (e.g. $_GET, env var), no callee, so no args to track.
|
|
// Source-labeled calls (e.g. file_get_contents) fall through to Call
|
|
// so argument taint and sink detection still work.
|
|
SsaOp::Source
|
|
} else if info.call.callee.is_some() {
|
|
let callee = info.call.callee.as_deref().unwrap_or("").to_string();
|
|
let (mut args, mut receiver) = build_call_args(info, var_stacks);
|
|
// try decomposing chained-receiver method calls
|
|
// (`a.b.c()`) into a FieldProj chain plus a bare-method Call
|
|
// so downstream consumers can read the receiver structure
|
|
// without re-parsing the callee text. Bails to None on any
|
|
// non-chain receiver (current behaviour preserved).
|
|
let (final_callee, callee_text) = match try_lower_field_proj_chain(
|
|
&callee,
|
|
var_stacks,
|
|
field_interner,
|
|
block_idx,
|
|
block_id,
|
|
next_value,
|
|
ssa_blocks,
|
|
value_defs,
|
|
node,
|
|
info.ast.span,
|
|
) {
|
|
Some((recv_v, bare_method)) => {
|
|
receiver = Some(recv_v);
|
|
// Strip any positional arg group that exactly matches the
|
|
// chain root identifier, it has been replaced by the
|
|
// FieldProj chain receiver, and re-listing it as an
|
|
// argument would inflate arity / double-taint.
|
|
if let Some(base_ident) = callee.split('.').next() {
|
|
if let Some(base_v) = var_stacks.get(base_ident).and_then(|s| s.last())
|
|
{
|
|
args.retain(|grp| !(grp.len() == 1 && grp.first() == Some(base_v)));
|
|
}
|
|
}
|
|
(bare_method, Some(callee.clone()))
|
|
}
|
|
None => (callee, None),
|
|
};
|
|
SsaOp::Call {
|
|
callee: final_callee,
|
|
callee_text,
|
|
args,
|
|
receiver,
|
|
}
|
|
} else if info.taint.defines.is_some()
|
|
&& info.taint.uses.is_empty()
|
|
&& !info
|
|
.taint
|
|
.labels
|
|
.iter()
|
|
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)))
|
|
{
|
|
// Reassignment kill: a node that defines a variable but has no
|
|
// uses (operands) and is not a source is a constant/literal
|
|
// assignment. SSA rename allocates a fresh SsaValue, so
|
|
// downstream references see this new (untainted) value, the
|
|
// prior tainted definition is implicitly dead.
|
|
SsaOp::Const(info.taint.const_text.clone())
|
|
} else if info.taint.defines.is_some() {
|
|
let mut uses: SmallVec<[SsaValue; 4]> = info
|
|
.taint
|
|
.uses
|
|
.iter()
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
|
.collect();
|
|
// Inject Const for binary expression literal operand.
|
|
// When a binary expression has one identifier and one numeric literal
|
|
// (e.g., `flags & 0x07`), the literal isn't in `uses`. Inject a
|
|
// synthetic Const instruction so the Assign has 2 uses, preventing
|
|
// copy propagation from eliminating the operation.
|
|
if uses.len() == 1 && info.bin_op.is_some() && info.bin_op_const.is_some() {
|
|
let const_val = info.bin_op_const.unwrap();
|
|
let const_v = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
let const_inst = SsaInst {
|
|
value: const_v,
|
|
op: SsaOp::Const(Some(const_val.to_string())),
|
|
cfg_node: node,
|
|
var_name: None,
|
|
span: info.ast.span,
|
|
};
|
|
ssa_blocks[block_idx].body.push(const_inst);
|
|
value_defs.push(ValueDef {
|
|
var_name: None,
|
|
cfg_node: node,
|
|
block: block_id,
|
|
});
|
|
uses.push(const_v);
|
|
}
|
|
SsaOp::Assign(uses)
|
|
} else if matches!(info.kind, StmtKind::Return | StmtKind::Throw)
|
|
&& !info.taint.uses.is_empty()
|
|
{
|
|
// `return s` / `throw e` with identifier uses: emit an
|
|
// `Assign(uses)` so the SSA carries an explicit pass-through
|
|
// for the returned/thrown value. Without this, the Return
|
|
// node was lowered as a `Nop` and the terminator-setup
|
|
// "last non-Nop body inst" search returned None, producing
|
|
// `Terminator::Return(None)` for a function that visibly
|
|
// returns an identifier. That broke per-return-path
|
|
// PathFact narrowing for non-Rust languages where the
|
|
// returned identifier wasn't computed in the same block
|
|
// (e.g. Python `def f(s): return s`, `s` is a Param in
|
|
// block 0, the Return block itself has no body insts).
|
|
let uses: SmallVec<[SsaValue; 4]> = info
|
|
.taint
|
|
.uses
|
|
.iter()
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
|
.collect();
|
|
if uses.is_empty() {
|
|
SsaOp::Nop
|
|
} else {
|
|
SsaOp::Assign(uses)
|
|
}
|
|
} else if info.is_await_forward
|
|
&& info.call.callee.is_none()
|
|
&& !info.taint.uses.is_empty()
|
|
{
|
|
// `await x` resolves to the same value as `x` — model as a 1:1
|
|
// copy so taint, origins, and abstract-domain facts forward
|
|
// unchanged. Gated on `callee.is_none()` so an await-wrapped
|
|
// call still lowers as a Call op rather than being collapsed
|
|
// to Assign (today CFG splits `await foo(x)` into two nodes,
|
|
// but the guard keeps the invariant explicit).
|
|
let uses: SmallVec<[SsaValue; 4]> = info
|
|
.taint
|
|
.uses
|
|
.iter()
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
|
.collect();
|
|
if uses.is_empty() {
|
|
SsaOp::Nop
|
|
} else {
|
|
SsaOp::Assign(uses)
|
|
}
|
|
} else if matches!(
|
|
info.kind,
|
|
StmtKind::Entry
|
|
| StmtKind::Exit
|
|
| StmtKind::If
|
|
| StmtKind::Loop
|
|
| StmtKind::Break
|
|
| StmtKind::Continue
|
|
| StmtKind::Return
|
|
| StmtKind::Throw
|
|
) {
|
|
SsaOp::Nop
|
|
} else if info.call.callee.is_some() {
|
|
let callee = info.call.callee.as_deref().unwrap_or("").to_string();
|
|
let (mut args, mut receiver) = build_call_args(info, var_stacks);
|
|
// same FieldProj-chain decomposition as the primary
|
|
// Call branch above, kept in sync because this fallback
|
|
// path also constructs SSA Call ops (used for control-flow
|
|
// wrapper calls that landed past the earlier match arms).
|
|
let (final_callee, callee_text) = match try_lower_field_proj_chain(
|
|
&callee,
|
|
var_stacks,
|
|
field_interner,
|
|
block_idx,
|
|
block_id,
|
|
next_value,
|
|
ssa_blocks,
|
|
value_defs,
|
|
node,
|
|
info.ast.span,
|
|
) {
|
|
Some((recv_v, bare_method)) => {
|
|
receiver = Some(recv_v);
|
|
if let Some(base_ident) = callee.split('.').next() {
|
|
if let Some(base_v) = var_stacks.get(base_ident).and_then(|s| s.last())
|
|
{
|
|
args.retain(|grp| !(grp.len() == 1 && grp.first() == Some(base_v)));
|
|
}
|
|
}
|
|
(bare_method, Some(callee.clone()))
|
|
}
|
|
None => (callee, None),
|
|
};
|
|
SsaOp::Call {
|
|
callee: final_callee,
|
|
callee_text,
|
|
args,
|
|
receiver,
|
|
}
|
|
} else {
|
|
SsaOp::Nop
|
|
};
|
|
|
|
// Allocate SSA value
|
|
let v = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
let var_name_for_ssa = if nop_nodes.contains(&node) {
|
|
None
|
|
} else if info.taint.defines.is_some() {
|
|
info.taint.defines.clone()
|
|
} else if info.kind == StmtKind::Seq
|
|
&& info.call.callee.is_none()
|
|
&& info.taint.uses.len() == 1
|
|
&& !var_stacks.contains_key(&info.taint.uses[0])
|
|
{
|
|
// Implicit definition for uninitialized declarations (e.g.,
|
|
// C/C++ `char buf[256]`). Creates a reaching definition so
|
|
// output-parameter sources like fgets() can taint the buffer
|
|
// and subsequent uses (e.g., system(buf)) see the tainted value.
|
|
Some(info.taint.uses[0].clone())
|
|
} else {
|
|
None
|
|
};
|
|
value_defs.push(ValueDef {
|
|
var_name: var_name_for_ssa.clone(),
|
|
cfg_node: node,
|
|
block: block_id,
|
|
});
|
|
|
|
// Push defined variable onto stack (skip nop nodes)
|
|
if let Some(ref d) = var_name_for_ssa {
|
|
var_stacks.entry(d.clone()).or_default().push(v);
|
|
}
|
|
|
|
cfg_node_map.insert(node, v);
|
|
|
|
// Promise.all-style array-destructure precision: when a CallWrapper
|
|
// node binds an array_pattern (`const [a, b] = await Promise.all(
|
|
// [x, y])` or `let (a, b) = tokio::join!(x, y)`) and the value is a
|
|
// promise combinator that produces an array/tuple of per-element
|
|
// results (`Promise.all`, `Promise.allSettled`, `asyncio.gather`,
|
|
// `tokio::join!` and friends), rewrite the per-binding SSA so each
|
|
// binding sees only its own index's taint instead of the scalar
|
|
// union that `try_apply_promise_combinator` would produce.
|
|
//
|
|
// Two argument shapes are supported:
|
|
// (a) literal-array (JS/Python): one positional arg whose
|
|
// collected idents represent the array elements in order,
|
|
// e.g. `Promise.all([x, y])` → args = [[x, y]].
|
|
// (b) positional (Rust macros): N positional args, each one
|
|
// ident, e.g. `tokio::join!(x, y)` → args = [[x], [y]].
|
|
//
|
|
// `Promise.race` and `Promise.resolve` are excluded: the awaited
|
|
// value of a race is whichever promise wins (a single value, not
|
|
// an array), and destructuring that value index-by-index does not
|
|
// correspond to the args.
|
|
// The rewrite fires when:
|
|
// - the call is a promise combinator that produces an array of
|
|
// per-element results (`All` / `AllSettled`), AND
|
|
// - the LHS destructures into >= 2 bindings (sequential case
|
|
// where `extra_defines` is non-empty), OR
|
|
// - the LHS is an array_pattern with at least one skip slot
|
|
// (`array_pattern_indices` is non-empty, even if `extra_defines`
|
|
// itself is empty — `const [, b]` is a single-binding pattern
|
|
// whose index is 1, not 0).
|
|
let is_combinator_rewrite_target = matches!(
|
|
info.call
|
|
.callee
|
|
.as_deref()
|
|
.and_then(crate::labels::is_any_promise_combinator),
|
|
Some(
|
|
crate::labels::PromiseCombinatorKind::All
|
|
| crate::labels::PromiseCombinatorKind::AllSettled
|
|
)
|
|
);
|
|
// Indices for each binding in source order: primary at index 0,
|
|
// then extras. Falls back to sequential 0..N when the AST didn't
|
|
// record explicit indices (non-array_pattern destructures and
|
|
// tuple_pattern shapes that contain no wildcards).
|
|
let binding_indices: SmallVec<[usize; 4]> =
|
|
if !info.taint.array_pattern_indices.is_empty() {
|
|
info.taint.array_pattern_indices.clone()
|
|
} else if !info.taint.extra_defines.is_empty() {
|
|
(0..=info.taint.extra_defines.len()).collect()
|
|
} else {
|
|
SmallVec::new()
|
|
};
|
|
let promise_destruct_args: Option<SmallVec<[SsaValue; 4]>> =
|
|
if is_combinator_rewrite_target && !binding_indices.is_empty() {
|
|
let max_index = binding_indices.iter().copied().max().unwrap_or(0);
|
|
let needed = max_index + 1;
|
|
// Use `info.call.arg_uses` directly rather than the
|
|
// build_call_args-derived `args`, which may include an
|
|
// implicit "uses not in arg_uses" group appended for chain
|
|
// bookkeeping that would inflate the apparent arity.
|
|
let arg_uses = &info.call.arg_uses;
|
|
let map_idents = |idents: &[String]| -> Option<SmallVec<[SsaValue; 4]>> {
|
|
let mapped: SmallVec<[SsaValue; 4]> = idents
|
|
.iter()
|
|
.take(needed)
|
|
.filter_map(|ident| {
|
|
var_stacks.get(ident).and_then(|s| s.last().copied())
|
|
})
|
|
.collect();
|
|
if mapped.len() == needed {
|
|
Some(mapped)
|
|
} else {
|
|
None
|
|
}
|
|
};
|
|
if arg_uses.len() == 1 && arg_uses[0].len() >= needed {
|
|
// Shape (a): single positional arg whose idents are the
|
|
// array elements in source order (`Promise.all([x, y])`,
|
|
// `asyncio.gather([x, y])`).
|
|
map_idents(&arg_uses[0])
|
|
} else if arg_uses.len() >= needed
|
|
&& arg_uses.iter().take(needed).all(|g| g.len() == 1)
|
|
{
|
|
// Shape (b): N positional args, each with one ident
|
|
// (`tokio::join!(x, y)`).
|
|
let names: Vec<&String> =
|
|
arg_uses.iter().take(needed).map(|g| &g[0]).collect();
|
|
let mapped: SmallVec<[SsaValue; 4]> = names
|
|
.iter()
|
|
.filter_map(|ident| {
|
|
var_stacks
|
|
.get(ident.as_str())
|
|
.and_then(|s| s.last().copied())
|
|
})
|
|
.collect();
|
|
if mapped.len() == needed {
|
|
Some(mapped)
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
// Bare-array RHS destructure precision: when the LHS is an
|
|
// array_pattern / tuple_pattern / pattern_list / left_assignment_list
|
|
// AND the RHS is a bare array-literal, build per-source-position
|
|
// ops so each binding sees only its index's element instead of
|
|
// the scalar union of every RHS ident.
|
|
//
|
|
// Three slot shapes are recognised by `collect_rhs_array_literal_elements`:
|
|
//
|
|
// * `Ident(name)` — bare identifier. Emit `Assign(reaching_def)`.
|
|
// * `Literal` — syntactic literal (string/number/etc.). Emit
|
|
// `Const(None)` so the binding carries no taint.
|
|
// * `Complex(uses)` — call / binary / subscript / member access /
|
|
// interpolated string / nested array literal / etc. Emit
|
|
// `Assign(union of inner ident reaching defs)` — slot-scoped
|
|
// union, not the whole-RHS union the legacy path produced.
|
|
// Falls back to `Const(None)` when no inner idents resolve
|
|
// (pure literal subexpression like `1 + 2`).
|
|
//
|
|
// Closes FPs like `const [a, b] = [safe, tainted]; exec(b);`
|
|
// (Ident shape) and `const [c, d] = [fn(req.x), 'lit']; exec(d);`
|
|
// (Complex shape) where the legacy union painted the safe binding.
|
|
//
|
|
// The promise-combinator path above has already populated
|
|
// `promise_destruct_args` when its preconditions held, so the
|
|
// mutual exclusion is gated through `promise_destruct_args.is_none()`
|
|
// rather than `info.call.callee.is_none()`. The earlier
|
|
// callee-none gate was wrong because the outer
|
|
// variable_declarator node picks up `info.call.callee` whenever
|
|
// the RHS text matches a Source label — which is exactly the
|
|
// case where we need the per-slot rewrite most.
|
|
// The outer node may carry a `DataLabel::Source(_)` whose
|
|
// classification matched somewhere in the RHS expression text
|
|
// (`req.body.cmd`, `process.env.X`, etc.). For multi-slot
|
|
// RHS we can't statically partition WHICH slot caused that
|
|
// match, but it must originate from a Complex slot (Literal
|
|
// and bare-Ident slots whose names resolve through
|
|
// `var_stacks` carry their own SsaValue identity). Treat
|
|
// Complex slots as Source-emitting when the outer label set
|
|
// included Source — strict precision improvement over the
|
|
// legacy union path which painted EVERY slot, including
|
|
// Literal, with the outer Source.
|
|
let outer_is_source = info
|
|
.taint
|
|
.labels
|
|
.iter()
|
|
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)));
|
|
|
|
// Per-slot Source classification (see `RhsArraySlot::Complex.source_cap`):
|
|
// when at least one Complex slot's own subtree classified as
|
|
// Source, we know which slot(s) carried the source pattern, so
|
|
// sibling Complex slots without their own source_cap stay
|
|
// slot-scoped (Assign / Const). Otherwise (the outer node
|
|
// matched but no per-slot classifier fired — typical of subscript
|
|
// chains and other shapes whose source flows via reaching-def
|
|
// rather than static text), fall back to the conservative
|
|
// "all-Complex-are-Source" emission for legacy preservation.
|
|
use crate::cfg::RhsArraySlot;
|
|
let any_slot_has_source_cap = info.taint.rhs_array_elements.iter().any(|s| {
|
|
matches!(
|
|
s,
|
|
RhsArraySlot::Complex { source_cap, .. }
|
|
if !source_cap.is_empty()
|
|
)
|
|
});
|
|
let effective_outer_fallback = outer_is_source && !any_slot_has_source_cap;
|
|
|
|
let bare_array_ops: Option<(SmallVec<[SsaOp; 4]>, SmallVec<[bool; 4]>)> =
|
|
if !info.taint.rhs_array_elements.is_empty()
|
|
&& !binding_indices.is_empty()
|
|
&& promise_destruct_args.is_none()
|
|
{
|
|
let max_index = binding_indices.iter().copied().max().unwrap_or(0);
|
|
let needed = max_index + 1;
|
|
if info.taint.rhs_array_elements.len() < needed {
|
|
None
|
|
} else {
|
|
let mut per_pos: SmallVec<[SsaOp; 4]> = SmallVec::new();
|
|
let mut slot_scoped_mask: SmallVec<[bool; 4]> = SmallVec::new();
|
|
let mut bail = false;
|
|
for slot in info.taint.rhs_array_elements.iter().take(needed) {
|
|
let mut is_slot_scoped = false;
|
|
let slot_op = match slot {
|
|
RhsArraySlot::Ident(ident) => {
|
|
match var_stacks
|
|
.get(ident.as_str())
|
|
.and_then(|s| s.last().copied())
|
|
{
|
|
Some(sv) => SsaOp::Assign(SmallVec::from_elem(sv, 1)),
|
|
None => {
|
|
bail = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
RhsArraySlot::Literal => SsaOp::Const(None),
|
|
RhsArraySlot::Complex {
|
|
uses: inner_uses,
|
|
source_cap,
|
|
} => {
|
|
let mut mapped: SmallVec<[SsaValue; 4]> = SmallVec::new();
|
|
for ident in inner_uses.iter() {
|
|
if let Some(sv) = var_stacks
|
|
.get(ident.as_str())
|
|
.and_then(|s| s.last().copied())
|
|
{
|
|
if !mapped.contains(&sv) {
|
|
mapped.push(sv);
|
|
}
|
|
}
|
|
}
|
|
if !source_cap.is_empty() {
|
|
// Per-slot classification found a Source
|
|
// pattern (e.g. `req.body.cmd`) inside
|
|
// THIS slot's subtree. Emit Source so the
|
|
// binding inherits the outer-node Source
|
|
// caps for this slot's index.
|
|
SsaOp::Source
|
|
} else if outer_is_source && any_slot_has_source_cap {
|
|
// Some OTHER slot's subtree classified as
|
|
// Source; this slot did NOT. Emit
|
|
// Assign(mapped) and mark the slot as
|
|
// slot-scoped so the taint transfer's
|
|
// Assign arm skips outer-node Source
|
|
// label pickup for this binding (without
|
|
// losing transitive taint through inner
|
|
// uses). When `mapped` is empty, fall
|
|
// back to Const(None) — the binding
|
|
// carries no taint anyway.
|
|
if mapped.is_empty() {
|
|
SsaOp::Const(None)
|
|
} else {
|
|
is_slot_scoped = true;
|
|
SsaOp::Assign(mapped.clone())
|
|
}
|
|
} else if effective_outer_fallback {
|
|
// Outer-node Source label but no
|
|
// per-slot classifier fired on any slot
|
|
// (typical of subscript-on-tainted-local
|
|
// shapes). Preserve legacy conservative
|
|
// emission for unrecognised shapes.
|
|
SsaOp::Source
|
|
} else if mapped.is_empty() {
|
|
SsaOp::Const(None)
|
|
} else {
|
|
SsaOp::Assign(mapped)
|
|
}
|
|
}
|
|
};
|
|
per_pos.push(slot_op);
|
|
slot_scoped_mask.push(is_slot_scoped);
|
|
}
|
|
if bail {
|
|
None
|
|
} else {
|
|
Some((per_pos, slot_scoped_mask))
|
|
}
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
// Clone op for potential extra_defines before moving into SsaInst.
|
|
// For the destructure-promise / bare-array rewrites, the
|
|
// per-extra ops are built explicitly below, so the shared clone
|
|
// path is bypassed.
|
|
let primary_op_for_extras = if info.taint.extra_defines.is_empty()
|
|
|| promise_destruct_args.is_some()
|
|
|| bare_array_ops.is_some()
|
|
{
|
|
None
|
|
} else {
|
|
Some(op.clone())
|
|
};
|
|
|
|
// Override primary op to single-operand Assign when the
|
|
// destructure-promise rewrite fires. The primary's source-order
|
|
// index is `binding_indices[0]` — non-zero for skip-leading
|
|
// patterns like `const [, b]` where `b` is the FIRST (and only)
|
|
// binding but lives at pattern position 1.
|
|
let primary_op = if let Some(ref args) = promise_destruct_args {
|
|
let primary_idx = binding_indices.first().copied().unwrap_or(0);
|
|
let pick = args.get(primary_idx).copied().unwrap_or(args[0]);
|
|
SsaOp::Assign(SmallVec::from_elem(pick, 1))
|
|
} else if let Some((ref per_pos, ref slot_scoped_mask)) = bare_array_ops {
|
|
let primary_idx = binding_indices.first().copied().unwrap_or(0);
|
|
if slot_scoped_mask.get(primary_idx).copied().unwrap_or(false) {
|
|
slot_scoped_assigns.insert(v);
|
|
}
|
|
per_pos
|
|
.get(primary_idx)
|
|
.cloned()
|
|
.unwrap_or(SsaOp::Const(None))
|
|
} else {
|
|
op
|
|
};
|
|
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: v,
|
|
op: primary_op,
|
|
cfg_node: node,
|
|
var_name: var_name_for_ssa.clone(),
|
|
span: info.ast.span,
|
|
});
|
|
|
|
// Synthetic base update: when a dotted path is defined (e.g. `obj.data`),
|
|
// create synthetic Assign instructions for parent prefixes (e.g. `obj`)
|
|
// so that subsequent reads of the base variable see the field write.
|
|
// Only includes the new field value (not the old base) so that field
|
|
// overwrites properly kill taint: if obj.data is re-assigned to a
|
|
// constant, the base `obj` no longer carries that field's taint.
|
|
//
|
|
//each synthetic Assign also records its
|
|
// structural identity into `field_writes`, `(receiver_old_value,
|
|
// FieldId(field_name))`, so the taint engine can recognise the
|
|
// synthetic assign as a field WRITE and mirror the rhs taint
|
|
// into the matching `(loc, field)` cell on `SsaTaintState`.
|
|
// The "old" parent value is the reaching def of `parent` BEFORE
|
|
// we push the new `synth_v`; when no prior def exists (the
|
|
// parent is undefined at this point), we skip the side-table
|
|
// entry so the consumer's `pt(receiver)` walk produces no work.
|
|
if !nop_nodes.contains(&node) {
|
|
if let Some(ref d) = info.taint.defines {
|
|
let mut current = d.as_str();
|
|
let mut child_value = v;
|
|
while let Some(dot_pos) = current.rfind('.') {
|
|
let parent = ¤t[..dot_pos];
|
|
let field_name = ¤t[dot_pos + 1..];
|
|
// Snapshot prior reaching def of `parent` BEFORE we
|
|
// push the new synth_v. Used by the field-write
|
|
// side-table as the receiver SsaValue.
|
|
let prior_parent_value: Option<SsaValue> =
|
|
var_stacks.get(parent).and_then(|s| s.last().copied());
|
|
let synth_v = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
let synth_uses: SmallVec<[SsaValue; 4]> =
|
|
SmallVec::from_elem(child_value, 1);
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(parent.to_string()),
|
|
cfg_node: node,
|
|
block: block_id,
|
|
});
|
|
var_stacks
|
|
.entry(parent.to_string())
|
|
.or_default()
|
|
.push(synth_v);
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: synth_v,
|
|
op: SsaOp::Assign(synth_uses),
|
|
cfg_node: node,
|
|
var_name: Some(parent.to_string()),
|
|
span: info.ast.span,
|
|
});
|
|
// Record `(synth_v -> (prior_parent, field_id))` so
|
|
// the taint engine can lift the synthetic assign
|
|
// into a field-write hook. The field name is
|
|
// interned through the per-body `FieldInterner` so
|
|
// FieldProj reads downstream resolve to the same id.
|
|
if let Some(rcv) = prior_parent_value {
|
|
let fid = field_interner.intern(field_name);
|
|
field_writes.insert(synth_v, (rcv, fid));
|
|
}
|
|
child_value = synth_v;
|
|
current = parent;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Emit extra SSA instructions for destructuring bindings.
|
|
// Each extra define inherits the same op (Source/Call/Assign) as the primary.
|
|
//
|
|
// For the destructure-promise rewrite, each extra emits an Assign
|
|
// on its corresponding indexed argument so per-element taint is
|
|
// preserved instead of the scalar union. The source-order index
|
|
// for `extra_defines[i]` is `binding_indices[i + 1]` — accounts
|
|
// for skip slots like `const [a, , b]` where `b` sits at index 2,
|
|
// not at index 1.
|
|
if let Some(ref pd_args) = promise_destruct_args {
|
|
for (i, extra_def) in info.taint.extra_defines.iter().enumerate() {
|
|
let ev = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(extra_def.clone()),
|
|
cfg_node: node,
|
|
block: block_id,
|
|
});
|
|
var_stacks.entry(extra_def.clone()).or_default().push(ev);
|
|
let extra_idx = binding_indices.get(i + 1).copied().unwrap_or(i + 1);
|
|
let arg = pd_args.get(extra_idx).copied().unwrap_or(pd_args[0]);
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: ev,
|
|
op: SsaOp::Assign(SmallVec::from_elem(arg, 1)),
|
|
cfg_node: node,
|
|
var_name: Some(extra_def.clone()),
|
|
span: info.ast.span,
|
|
});
|
|
}
|
|
} else if let Some((ref per_pos, ref slot_scoped_mask)) = bare_array_ops {
|
|
// Bare-array RHS destructure: each extra emits the op for its
|
|
// source-order RHS position. Ident slots emit Assign of the
|
|
// ident's reaching SSA value; literal slots emit Const(None).
|
|
// Slot-scoped Assigns are registered in
|
|
// `slot_scoped_assigns` so the taint transfer skips
|
|
// outer-node Source pickup for those bindings.
|
|
for (i, extra_def) in info.taint.extra_defines.iter().enumerate() {
|
|
let ev = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(extra_def.clone()),
|
|
cfg_node: node,
|
|
block: block_id,
|
|
});
|
|
var_stacks.entry(extra_def.clone()).or_default().push(ev);
|
|
let extra_idx = binding_indices.get(i + 1).copied().unwrap_or(i + 1);
|
|
let op_for_extra = per_pos
|
|
.get(extra_idx)
|
|
.cloned()
|
|
.unwrap_or(SsaOp::Const(None));
|
|
if slot_scoped_mask.get(extra_idx).copied().unwrap_or(false) {
|
|
slot_scoped_assigns.insert(ev);
|
|
}
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: ev,
|
|
op: op_for_extra,
|
|
cfg_node: node,
|
|
var_name: Some(extra_def.clone()),
|
|
span: info.ast.span,
|
|
});
|
|
}
|
|
} else if let Some(ref primary_op) = primary_op_for_extras {
|
|
for extra_def in &info.taint.extra_defines {
|
|
let ev = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(extra_def.clone()),
|
|
cfg_node: node,
|
|
block: block_id,
|
|
});
|
|
var_stacks.entry(extra_def.clone()).or_default().push(ev);
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: ev,
|
|
op: primary_op.clone(),
|
|
cfg_node: node,
|
|
var_name: Some(extra_def.clone()),
|
|
span: info.ast.span,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
// 3. Set terminator
|
|
let succs = &block_succs[block_idx];
|
|
let last_node = *blocks_nodes[block_idx].last().unwrap();
|
|
|
|
ssa_blocks[block_idx].terminator = if succs.is_empty() {
|
|
// A block with no successors at the block level is one of:
|
|
// (1) a block containing a Throw, terminates with an
|
|
// exception; no normal fall-through.
|
|
// (2) a block containing a Return, terminates with a value
|
|
// (or void). After form_blocks strips the bookkeeping
|
|
// Seq edge from Return → fn_exit, every explicit-return
|
|
// block lands here, including `if cond { return X; }`
|
|
// early returns.
|
|
// (3) the function-exit (fn_exit) block itself when the
|
|
// function falls off the end (implicit return).
|
|
//
|
|
// Distinguish them by inspecting the block's CFG nodes.
|
|
let return_node = blocks_nodes[block_idx]
|
|
.iter()
|
|
.copied()
|
|
.find(|&n| cfg[n].kind == StmtKind::Return);
|
|
let has_throw_node = blocks_nodes[block_idx]
|
|
.iter()
|
|
.any(|&n| cfg[n].kind == StmtKind::Throw);
|
|
|
|
if has_throw_node && return_node.is_none() {
|
|
// Throw terminates control flow with an exception. No
|
|
// structured Throw terminator exists today; downstream
|
|
// analyses rely on `exception_edges` (recorded separately)
|
|
// for catch-block dispatch. Mark the normal-flow exit as
|
|
// Unreachable so successor consumers do not invent a
|
|
// synthetic fall-through edge.
|
|
Terminator::Unreachable
|
|
} else if let Some(rn) = return_node {
|
|
let return_info = &cfg[rn];
|
|
// Return-value resolution. Mirror the legacy
|
|
// `has_const_return` path so callers see exactly the same
|
|
// SSA shape they did before the merged-return fix, only
|
|
// the *terminator* changes (Goto(exit) → Return(_)), not
|
|
// the value selection.
|
|
//
|
|
// (a) Literal return (`return 'x'`, `return None`,
|
|
// `return []`, `return;`). Marked by
|
|
// `taint.uses.is_empty()` on the Return CFG node.
|
|
// Emit a synthetic Const inst so taint never leaks
|
|
// from an unrelated inst earlier in the same block
|
|
// (regression guard: C-1 inline-return precision).
|
|
// (b) Computed / passthrough return, last non-Nop body
|
|
// inst. Covers `return foo()` (Call sits before the
|
|
// Return Nop), `return x + y` (Assign), and the
|
|
// implicit tail expression collapsed into a single
|
|
// block by the leader-following loop. When the
|
|
// Return carries identifier uses (`return req`,
|
|
// `return { req.session, ... }`), the SSA defs for
|
|
// those identifiers are already on the body as
|
|
// Param / Assign / Source insts, picking the last
|
|
// one matches pre-fix behaviour exactly.
|
|
// (c) Void / unresolved, `Return(None)`.
|
|
if return_info.taint.uses.is_empty() {
|
|
let const_text = return_info.taint.const_text.clone();
|
|
let const_v = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
let block_id = BlockId(block_idx as u32);
|
|
value_defs.push(ValueDef {
|
|
var_name: None,
|
|
cfg_node: rn,
|
|
block: block_id,
|
|
});
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
|
value: const_v,
|
|
op: SsaOp::Const(const_text),
|
|
cfg_node: rn,
|
|
var_name: None,
|
|
span: return_info.ast.span,
|
|
});
|
|
Terminator::Return(Some(const_v))
|
|
} else {
|
|
let from_body = ssa_blocks[block_idx]
|
|
.body
|
|
.iter()
|
|
.rev()
|
|
.find(|inst| !matches!(inst.op, SsaOp::Nop))
|
|
.map(|inst| inst.value);
|
|
Terminator::Return(from_body)
|
|
}
|
|
} else {
|
|
// (3) fn_exit / true fall-off, no Return CFG node in this
|
|
// block. Use the last non-Nop body instruction as the
|
|
// implicit return value (e.g. the function's tail-position
|
|
// expression in Rust).
|
|
let ret_val = ssa_blocks[block_idx]
|
|
.body
|
|
.iter()
|
|
.rev()
|
|
.find(|inst| !matches!(inst.op, SsaOp::Nop))
|
|
.map(|inst| inst.value);
|
|
Terminator::Return(ret_val)
|
|
}
|
|
} else if succs.len() == 1 {
|
|
Terminator::Goto(BlockId(succs[0] as u32))
|
|
} else if succs.len() == 2 {
|
|
// Find the If/Loop node that branches
|
|
let cond_node = blocks_nodes[block_idx]
|
|
.iter()
|
|
.rev()
|
|
.find(|&&n| matches!(cfg[n].kind, StmtKind::If | StmtKind::Loop))
|
|
.copied()
|
|
.unwrap_or(last_node);
|
|
|
|
// Determine which successor is true/false by looking at edge kinds
|
|
let mut true_blk = succs[0];
|
|
let mut false_blk = succs[1];
|
|
|
|
// Check filtered edges from any node in this block to successors
|
|
for &(src, tgt, kind) in filtered_edges {
|
|
if blocks_nodes[block_idx].contains(&src) {
|
|
let tgt_blk_opt = succs.iter().position(|&s| {
|
|
blocks_nodes
|
|
.get(s)
|
|
.is_some_and(|nodes| nodes.contains(&tgt))
|
|
});
|
|
if let Some(tgt_blk_pos) = tgt_blk_opt {
|
|
match kind {
|
|
EdgeKind::True => true_blk = succs[tgt_blk_pos],
|
|
EdgeKind::False => false_blk = succs[tgt_blk_pos],
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Lower structured condition from CFG metadata
|
|
let cond_info = &cfg[cond_node];
|
|
let condition = if cond_info.condition_text.is_some()
|
|
&& !cond_info.condition_vars.is_empty()
|
|
{
|
|
let expr =
|
|
crate::constraint::lower::lower_condition_with_stacks(cond_info, var_stacks);
|
|
if matches!(expr, crate::constraint::lower::ConditionExpr::Unknown) {
|
|
None
|
|
} else {
|
|
Some(Box::new(expr))
|
|
}
|
|
} else {
|
|
None
|
|
};
|
|
|
|
Terminator::Branch {
|
|
cond: cond_node,
|
|
true_blk: BlockId(true_blk as u32),
|
|
false_blk: BlockId(false_blk as u32),
|
|
condition,
|
|
}
|
|
} else {
|
|
// More than 2 successors, model as a multi-way Switch.
|
|
//
|
|
// This replaces the previous `Goto(first)` collapse: the
|
|
// structured terminator now enumerates every target instead
|
|
// of hiding N-1 of them behind `block.succs`. Flow consumers
|
|
// (taint, const-prop, symex) still iterate `succs` as
|
|
// authoritative, but downstream tooling that inspects the
|
|
// terminator shape gets the full fanout.
|
|
//
|
|
// Note: today's switch-statement CFG construction decomposes
|
|
// cases into a cascade of binary `Branch` headers (see
|
|
// `build_switch` in src/cfg.rs), so real switch statements
|
|
// never reach this arm. Folding the cascade back into a
|
|
// single Switch node is a follow-up; in the meantime, this
|
|
// arm fires only on genuine multi-way CFG fanouts (e.g.
|
|
// future Go-switch / Java-arrow / Rust-match lowerings).
|
|
//
|
|
// Scrutinee: use the primary SSA value defined at the last
|
|
// node in this block when one exists; fall back to
|
|
// `SsaValue(0)` (a valid index, SSA numbering is 1-based
|
|
// only conceptually, and value 0 is always present in a
|
|
// non-empty body) when no value is defined. Downstream
|
|
// consumers that care about the scrutinee (abstract interp,
|
|
// symex per-case constraints) treat a missing/degenerate
|
|
// scrutinee as "unknown" rather than panicking.
|
|
let scrutinee = cfg_node_map.get(&last_node).copied().unwrap_or(SsaValue(0));
|
|
let targets: SmallVec<[BlockId; 4]> =
|
|
succs.iter().skip(1).map(|&s| BlockId(s as u32)).collect();
|
|
let default = BlockId(succs[0] as u32);
|
|
// Synthetic ≥3-way fanouts have no per-case literal metadata ,
|
|
// every entry is None (unknown), so the executor falls back to
|
|
// first-reachable behavior on this terminator.
|
|
let case_values: SmallVec<[Option<crate::constraint::domain::ConstValue>; 4]> =
|
|
std::iter::repeat_with(|| None)
|
|
.take(targets.len())
|
|
.collect();
|
|
tracing::debug!(
|
|
block = block_idx,
|
|
num_succs = succs.len(),
|
|
"emitting Terminator::Switch for ≥3-way fanout",
|
|
);
|
|
Terminator::Switch {
|
|
scrutinee,
|
|
targets,
|
|
default,
|
|
case_values,
|
|
}
|
|
};
|
|
|
|
// 4. Fill phi operands in successor blocks
|
|
for &succ in succs {
|
|
for (var, &phi_val) in &phi_values[succ] {
|
|
// The version of `var` reaching from this block
|
|
let reaching_val = var_stacks.get(var).and_then(|s| s.last().copied());
|
|
if let Some(rv) = reaching_val {
|
|
// Find the phi instruction and add this operand
|
|
for phi in &mut ssa_blocks[succ].phis {
|
|
if phi.value == phi_val {
|
|
if let SsaOp::Phi(ref mut operands) = phi.op {
|
|
operands.push((block_id, rv));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 5. Recurse into dominator tree children
|
|
for &child in &dom_tree_children[block_idx] {
|
|
process_block(
|
|
child,
|
|
cfg,
|
|
blocks_nodes,
|
|
block_succs,
|
|
block_preds,
|
|
phi_placements,
|
|
dom_tree_children,
|
|
filtered_edges,
|
|
var_stacks,
|
|
ssa_blocks,
|
|
phi_values,
|
|
value_defs,
|
|
cfg_node_map,
|
|
next_value,
|
|
nop_nodes,
|
|
field_interner,
|
|
field_writes,
|
|
slot_scoped_assigns,
|
|
);
|
|
}
|
|
|
|
// 6. Restore stacks
|
|
for (var, depth) in &saved {
|
|
if let Some(stack) = var_stacks.get_mut(var) {
|
|
stack.truncate(*depth);
|
|
}
|
|
}
|
|
// Remove any new variables that weren't in saved
|
|
let saved_vars: HashSet<&String> = saved.iter().map(|(k, _)| k).collect();
|
|
var_stacks.retain(|k, _| saved_vars.contains(k));
|
|
}
|
|
|
|
// Inject synthetic Param instructions at START of block 0 for external variables.
|
|
// These create SSA definitions so the rename pass can reference them.
|
|
// Pre-seed var_stacks so process_block sees them.
|
|
//
|
|
// `external_vars` contains both real formal parameters and free / closure-
|
|
// captured variables (variables read by the body but not declared as a
|
|
// formal and not assigned anywhere). Both end up emitted as
|
|
// [`SsaOp::Param`] in block 0; we record the SSA values that correspond
|
|
// to free vars in `synthetic_externals` so downstream analyses (the JS/TS
|
|
// handler-name auto-seed in particular) can avoid treating closure
|
|
// captures as if they were parameters of the function under analysis.
|
|
//
|
|
// **Conservative behaviour when the caller didn't supply formal-param
|
|
// info.** Several call sites (`lower_to_ssa`, `lower_to_ssa_scoped_nop`)
|
|
// don't supply formal parameter names; in that case we cannot distinguish
|
|
// formals from free vars structurally, so we leave `synthetic_externals`
|
|
// empty and the auto-seed pass keeps its pre-fix behaviour of treating
|
|
// every `Param` op as a candidate. Callers that opt in via
|
|
// `lower_to_ssa_with_params` set `with_params=true`, signalling that
|
|
// `formal_params` is the authoritative formal list — even when empty
|
|
// (arrow `() => {…}`). In that case every external becomes synthetic
|
|
// unless it appears in `formal_params`, so the auto-seed pass cannot
|
|
// mistake a bubbled-up free var (like `userId` lifted from a nested
|
|
// jest test callback) for a formal of the outer body.
|
|
let mut synthetic_externals: HashSet<SsaValue> = HashSet::new();
|
|
let formal_set: HashSet<&str> = formal_params.iter().map(|s| s.as_str()).collect();
|
|
let track_synthetic = with_params;
|
|
if !external_vars.is_empty() {
|
|
let entry_cfg_node = blocks_nodes[0][0];
|
|
let mut synthetic_body = Vec::with_capacity(external_vars.len());
|
|
let mut positional_idx: usize = 0;
|
|
for var in external_vars.iter() {
|
|
let v = SsaValue(next_value);
|
|
next_value += 1;
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(var.clone()),
|
|
cfg_node: entry_cfg_node,
|
|
block: BlockId(0),
|
|
});
|
|
let is_receiver = is_receiver_name(var);
|
|
let op = if is_receiver {
|
|
SsaOp::SelfParam
|
|
} else {
|
|
let op = SsaOp::Param {
|
|
index: positional_idx,
|
|
};
|
|
positional_idx += 1;
|
|
op
|
|
};
|
|
// A non-receiver var is "synthetic" (a free / closure capture)
|
|
// when it is *not* one of the function's declared formals AND
|
|
// not a dotted access on a formal (`input.cmd` where `input` is
|
|
// a formal — it represents a structural projection of the
|
|
// formal, not a free variable; the auto-seed should still treat
|
|
// it as part of the formal's own taint surface). Receivers are
|
|
// intentionally excluded: `this` / `self` represent the implicit
|
|
// receiver, which always belongs to the function.
|
|
//
|
|
// Only fire when the caller supplied formal-parameter names; see
|
|
// the `track_synthetic` rationale above.
|
|
let root_is_formal = var
|
|
.split_once('.')
|
|
.map(|(root, _)| formal_set.contains(root))
|
|
.unwrap_or(false);
|
|
if track_synthetic
|
|
&& !is_receiver
|
|
&& !formal_set.contains(var.as_str())
|
|
&& !root_is_formal
|
|
{
|
|
synthetic_externals.insert(v);
|
|
}
|
|
synthetic_body.push(SsaInst {
|
|
value: v,
|
|
op,
|
|
cfg_node: entry_cfg_node,
|
|
var_name: Some(var.clone()),
|
|
span: (0, 0),
|
|
});
|
|
var_stacks.entry(var.clone()).or_default().push(v);
|
|
}
|
|
// Prepend synthetic params before any existing body instructions
|
|
synthetic_body.append(&mut ssa_blocks[0].body);
|
|
ssa_blocks[0].body = synthetic_body;
|
|
}
|
|
|
|
process_block(
|
|
0, // entry block
|
|
cfg,
|
|
blocks_nodes,
|
|
block_succs,
|
|
block_preds,
|
|
phi_placements,
|
|
dom_tree_children,
|
|
filtered_edges,
|
|
&mut var_stacks,
|
|
&mut ssa_blocks,
|
|
&mut phi_values,
|
|
&mut value_defs,
|
|
&mut cfg_node_map,
|
|
&mut next_value,
|
|
nop_nodes,
|
|
&mut field_interner,
|
|
&mut field_writes,
|
|
&mut slot_scoped_assigns,
|
|
);
|
|
|
|
// Process orphan blocks (e.g. catch blocks disconnected after exception edge removal).
|
|
// These blocks have no predecessors and weren't reached by the dominator tree walk.
|
|
//
|
|
// Rebuild var_stacks from already-processed instructions so that catch blocks
|
|
// can reference variables defined before the try block (e.g. `userInput`).
|
|
let has_orphans =
|
|
(1..num_blocks).any(|bid| block_preds[bid].is_empty() && ssa_blocks[bid].body.is_empty());
|
|
if has_orphans {
|
|
// Rebuild var_stacks from all SSA instructions created during the main walk.
|
|
// This gives orphan blocks access to all variable definitions.
|
|
var_stacks.clear();
|
|
for block in &ssa_blocks {
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
|
if let Some(ref name) = inst.var_name {
|
|
var_stacks.entry(name.clone()).or_default().push(inst.value);
|
|
}
|
|
}
|
|
}
|
|
|
|
for bid in 1..num_blocks {
|
|
if block_preds[bid].is_empty() && ssa_blocks[bid].body.is_empty() {
|
|
process_block(
|
|
bid,
|
|
cfg,
|
|
blocks_nodes,
|
|
block_succs,
|
|
block_preds,
|
|
phi_placements,
|
|
dom_tree_children,
|
|
filtered_edges,
|
|
&mut var_stacks,
|
|
&mut ssa_blocks,
|
|
&mut phi_values,
|
|
&mut value_defs,
|
|
&mut cfg_node_map,
|
|
&mut next_value,
|
|
nop_nodes,
|
|
&mut field_interner,
|
|
&mut field_writes,
|
|
&mut slot_scoped_assigns,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
(
|
|
ssa_blocks,
|
|
value_defs,
|
|
cfg_node_map,
|
|
field_interner,
|
|
field_writes,
|
|
synthetic_externals,
|
|
slot_scoped_assigns,
|
|
)
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Debug invariant checkers
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/// Verify BFS block ordering: every non-entry, non-orphan block must have at
|
|
/// least one predecessor with a smaller block ID.
|
|
fn debug_assert_bfs_ordering(block_preds: &[Vec<usize>]) {
|
|
for (i, preds) in block_preds.iter().enumerate() {
|
|
if i == 0 {
|
|
continue; // entry block
|
|
}
|
|
if preds.is_empty() {
|
|
continue; // orphan block (e.g. catch block reached via exception edge)
|
|
}
|
|
let has_forward_pred = preds.iter().any(|&p| p < i);
|
|
debug_assert!(
|
|
has_forward_pred,
|
|
"Block {} has no forward predecessor — BFS ordering violated. Preds: {:?}",
|
|
i, preds
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Verify phi operand counts: each phi must have exactly one operand
|
|
/// per predecessor, and every operand must reference an actual
|
|
/// predecessor of the block.
|
|
///
|
|
/// Runs in release builds because phi-operand mismatches are
|
|
/// load-bearing for soundness, downstream taint, const, and abstract
|
|
/// analyses iterate phi operands by `(pred_blk, value)` pairs, and
|
|
/// either a missing operand (silent "no contribution" on that edge)
|
|
/// or a phantom operand (garbage into the join) corrupts analysis
|
|
/// without surfacing.
|
|
///
|
|
/// The invariant is strict equality. Predecessors that carry no
|
|
/// reaching definition for the phi's variable are filled with the
|
|
/// [`SsaOp::Undef`] sentinel in `fill_undef_phi_operands`, rather than
|
|
/// being dropped, so consumers that look up by `(pred_blk, value)`
|
|
/// see a real operand for every control-flow edge.
|
|
fn assert_phi_operand_counts(ssa_blocks: &[SsaBlock], block_preds: &[Vec<usize>]) {
|
|
use std::collections::HashSet;
|
|
for (i, block) in ssa_blocks.iter().enumerate() {
|
|
let pred_set: HashSet<u32> = block_preds[i].iter().map(|&p| p as u32).collect();
|
|
for phi in &block.phis {
|
|
if let SsaOp::Phi(ref operands) = phi.op {
|
|
assert_eq!(
|
|
operands.len(),
|
|
block_preds[i].len(),
|
|
"SSA phi operand count does not match predecessor count: block {} phi v{} \
|
|
(var={:?}) has {} operands but block has {} predecessors. \
|
|
preds={:?}, operand_preds={:?}",
|
|
i,
|
|
phi.value.0,
|
|
phi.var_name,
|
|
operands.len(),
|
|
block_preds[i].len(),
|
|
block_preds[i],
|
|
operands.iter().map(|(b, _)| b.0).collect::<Vec<_>>(),
|
|
);
|
|
// Each operand's pred block must be an actual predecessor,
|
|
// and no predecessor may appear more than once.
|
|
let mut seen: HashSet<u32> = HashSet::new();
|
|
for (pred_blk, _) in operands.iter() {
|
|
assert!(
|
|
pred_set.contains(&pred_blk.0),
|
|
"SSA phi operand references nonexistent predecessor: block {} phi v{} \
|
|
references pred B{} but block predecessors are {:?}",
|
|
i,
|
|
phi.value.0,
|
|
pred_blk.0,
|
|
block_preds[i],
|
|
);
|
|
assert!(
|
|
seen.insert(pred_blk.0),
|
|
"SSA phi operand duplicates predecessor: block {} phi v{} has two \
|
|
operands for pred B{}",
|
|
i,
|
|
phi.value.0,
|
|
pred_blk.0,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Post-rename pass: ensure every phi has one operand per predecessor.
|
|
///
|
|
/// During rename, phi operands are only pushed when the variable has a
|
|
/// live reaching definition on that predecessor edge. Edges where the
|
|
/// variable is not yet defined (e.g. a try-body rejoining after a
|
|
/// catch-only binding, an early-return branch on a later-defined
|
|
/// variable, an orphan catch block's implicit predecessors) leave the
|
|
/// phi with fewer operands than the block has predecessors.
|
|
///
|
|
/// This pass scans all phis, and for every missing `(pred_block, _)`
|
|
/// slot, pushes `(pred_block, undef_val)` where `undef_val` is a
|
|
/// single shared sentinel instruction ([`SsaOp::Undef`]) synthesized
|
|
/// at the end of block 0's body. Consumers iterate phi operands by
|
|
/// `(pred_blk, value)` and therefore see a real operand on every
|
|
/// control-flow edge, no implicit "missing = empty" semantics.
|
|
///
|
|
/// The Undef instruction is created lazily (only when at least one phi
|
|
/// has a gap) so functions with fully-dominating definitions pay zero
|
|
/// cost. All phis share the same Undef value: a phi operand is
|
|
/// identified by its `(pred_block, value)` pair, so sharing the value
|
|
/// across phis is safe and keeps the synthesized-instruction count at
|
|
/// most one per function body.
|
|
fn fill_undef_phi_operands(
|
|
ssa_blocks: &mut [SsaBlock],
|
|
block_preds: &[Vec<usize>],
|
|
value_defs: &mut Vec<ValueDef>,
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
|
) {
|
|
// Fast path: detect whether any phi has a gap. Avoid allocating
|
|
// the Undef value in the common case where every phi is saturated.
|
|
let needs_undef = ssa_blocks.iter().enumerate().any(|(bi, block)| {
|
|
block.phis.iter().any(|phi| {
|
|
if let SsaOp::Phi(ref operands) = phi.op {
|
|
operands.len() < block_preds[bi].len()
|
|
} else {
|
|
false
|
|
}
|
|
})
|
|
});
|
|
if !needs_undef {
|
|
return;
|
|
}
|
|
|
|
// Anchor the synthetic Undef instruction to the entry block's first
|
|
// CFG node so span lookups don't hit an invalid NodeIndex.
|
|
let anchor_node = blocks_nodes
|
|
.first()
|
|
.and_then(|b| b.first())
|
|
.copied()
|
|
.expect("entry block has at least one CFG node");
|
|
|
|
let undef_val = SsaValue(value_defs.len() as u32);
|
|
value_defs.push(ValueDef {
|
|
var_name: None,
|
|
cfg_node: anchor_node,
|
|
block: BlockId(0),
|
|
});
|
|
// Place the Undef instruction at the end of block 0's body so it
|
|
// appears after any synthetic Param / SelfParam emissions, its
|
|
// only role is to anchor the SsaValue; ordering relative to other
|
|
// body instructions is cosmetic (no consumer depends on its
|
|
// position, only on the value lookup).
|
|
ssa_blocks[0].body.push(SsaInst {
|
|
value: undef_val,
|
|
op: SsaOp::Undef,
|
|
cfg_node: anchor_node,
|
|
var_name: None,
|
|
span: (0, 0),
|
|
});
|
|
|
|
// Fill missing operand slots. Iterate `block_preds[bi]` in its
|
|
// natural order so the resulting phi operand list is deterministic
|
|
// across runs.
|
|
for (bi, block) in ssa_blocks.iter_mut().enumerate() {
|
|
for phi in block.phis.iter_mut() {
|
|
if let SsaOp::Phi(ref mut operands) = phi.op {
|
|
if operands.len() == block_preds[bi].len() {
|
|
continue;
|
|
}
|
|
use std::collections::HashSet;
|
|
let present: HashSet<u32> = operands.iter().map(|(b, _)| b.0).collect();
|
|
for &pred in &block_preds[bi] {
|
|
let pid = pred as u32;
|
|
if !present.contains(&pid) {
|
|
operands.push((BlockId(pid), undef_val));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::cfg::{EdgeKind, NodeInfo, StmtKind, TaintMeta};
|
|
use petgraph::Graph;
|
|
|
|
fn make_node(kind: StmtKind) -> NodeInfo {
|
|
NodeInfo {
|
|
kind,
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn linear_cfg_no_phis() {
|
|
// Entry → x=1 → y=x → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let n1 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let n2 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("y".into()),
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, n1, EdgeKind::Seq);
|
|
cfg.add_edge(n1, n2, EdgeKind::Seq);
|
|
cfg.add_edge(n2, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Should be a single block (all Seq edges, no branches)
|
|
assert_eq!(ssa.blocks.len(), 1);
|
|
// No phis in a linear CFG
|
|
assert!(ssa.blocks[0].phis.is_empty());
|
|
// 4 body instructions (entry, x=1, y=x, exit)
|
|
assert_eq!(ssa.blocks[0].body.len(), 4);
|
|
}
|
|
|
|
#[test]
|
|
fn diamond_cfg_produces_phi() {
|
|
// Entry → x=1 → If → [True: x=2] [False: x=3] → Join → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let def_x = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
|
let true_node = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let false_node = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let join = cfg.add_node(make_node(StmtKind::Seq));
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
|
cfg.add_edge(def_x, if_node, EdgeKind::Seq);
|
|
cfg.add_edge(if_node, true_node, EdgeKind::True);
|
|
cfg.add_edge(if_node, false_node, EdgeKind::False);
|
|
cfg.add_edge(true_node, join, EdgeKind::Seq);
|
|
cfg.add_edge(false_node, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Should have multiple blocks
|
|
assert!(ssa.blocks.len() >= 3);
|
|
|
|
// The join block should have a phi for "x"
|
|
let join_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| !b.phis.is_empty())
|
|
.expect("should have a block with a phi");
|
|
assert_eq!(join_block.phis.len(), 1);
|
|
assert_eq!(join_block.phis[0].var_name.as_deref(), Some("x"));
|
|
|
|
// Phi should have 2 operands (from true and false branches)
|
|
if let SsaOp::Phi(ref operands) = join_block.phis[0].op {
|
|
assert_eq!(operands.len(), 2);
|
|
} else {
|
|
panic!("expected Phi op");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn loop_cfg_produces_phi() {
|
|
// Entry → x=0 → Loop header → [Back: x=x+1] → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let def_x = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let loop_header = cfg.add_node(make_node(StmtKind::Loop));
|
|
let body = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
|
cfg.add_edge(def_x, loop_header, EdgeKind::Seq);
|
|
cfg.add_edge(loop_header, body, EdgeKind::True);
|
|
cfg.add_edge(body, loop_header, EdgeKind::Back);
|
|
cfg.add_edge(loop_header, exit, EdgeKind::False);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Loop header block should have a phi for "x" (from entry and back edge)
|
|
let header_phis: Vec<_> = ssa.blocks.iter().filter(|b| !b.phis.is_empty()).collect();
|
|
|
|
assert!(
|
|
!header_phis.is_empty(),
|
|
"loop header should have a phi for x"
|
|
);
|
|
|
|
let x_phi = header_phis[0]
|
|
.phis
|
|
.iter()
|
|
.find(|p| p.var_name.as_deref() == Some("x"));
|
|
assert!(x_phi.is_some(), "should have phi for variable x");
|
|
}
|
|
|
|
#[test]
|
|
fn multiple_reassignments_distinct_values() {
|
|
// Entry → x=1 → x=2 → x=3 → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let n1 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let n2 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let n3 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, n1, EdgeKind::Seq);
|
|
cfg.add_edge(n1, n2, EdgeKind::Seq);
|
|
cfg.add_edge(n2, n3, EdgeKind::Seq);
|
|
cfg.add_edge(n3, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Each definition of x should produce a distinct SsaValue
|
|
let x_values: Vec<_> = ssa
|
|
.value_defs
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|(_, vd)| vd.var_name.as_deref() == Some("x"))
|
|
.map(|(i, _)| SsaValue(i as u32))
|
|
.collect();
|
|
|
|
assert_eq!(x_values.len(), 3, "three definitions of x");
|
|
// All distinct
|
|
let unique: HashSet<_> = x_values.iter().collect();
|
|
assert_eq!(unique.len(), 3, "all SsaValues should be distinct");
|
|
}
|
|
|
|
#[test]
|
|
fn empty_cfg_returns_error() {
|
|
let cfg: Cfg = Graph::new();
|
|
let result = lower_to_ssa(&cfg, NodeIndex::new(0), None, true);
|
|
assert!(result.is_err());
|
|
}
|
|
|
|
// ── BFS ordering and phi invariant tests ─────────────────────────────
|
|
|
|
#[test]
|
|
fn bfs_ordering_holds_for_linear_cfg() {
|
|
// Entry → A → B → Exit, all blocks should satisfy BFS ordering
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let a = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let b = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("y".into()),
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, a, EdgeKind::Seq);
|
|
cfg.add_edge(a, b, EdgeKind::Seq);
|
|
cfg.add_edge(b, exit, EdgeKind::Seq);
|
|
|
|
// This exercises the debug_assert_bfs_ordering in debug builds
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
assert!(!ssa.blocks.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn bfs_ordering_holds_for_diamond_cfg() {
|
|
// Entry → If → [True] [False] → Join → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let def_x = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
|
let true_node = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let false_node = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let join = cfg.add_node(make_node(StmtKind::Seq));
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
|
cfg.add_edge(def_x, if_node, EdgeKind::Seq);
|
|
cfg.add_edge(if_node, true_node, EdgeKind::True);
|
|
cfg.add_edge(if_node, false_node, EdgeKind::False);
|
|
cfg.add_edge(true_node, join, EdgeKind::Seq);
|
|
cfg.add_edge(false_node, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
// Exercises both BFS ordering and phi operand count assertions
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
// The join block should have a phi with exactly 2 operands (== 2 preds)
|
|
let phi_block = ssa.blocks.iter().find(|b| !b.phis.is_empty());
|
|
if let Some(block) = phi_block {
|
|
assert_eq!(
|
|
block.preds.len(),
|
|
2,
|
|
"join block should have 2 predecessors"
|
|
);
|
|
for phi in &block.phis {
|
|
if let SsaOp::Phi(ref ops) = phi.op {
|
|
assert!(
|
|
ops.len() <= block.preds.len(),
|
|
"phi operands should not exceed predecessor count"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn bfs_ordering_holds_for_loop_with_back_edge() {
|
|
// Entry → x=0 → Loop → body(x=x+1) → [Back→Loop] → Exit
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let def_x = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let loop_h = cfg.add_node(make_node(StmtKind::Loop));
|
|
let body = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
|
cfg.add_edge(def_x, loop_h, EdgeKind::Seq);
|
|
cfg.add_edge(loop_h, body, EdgeKind::True);
|
|
cfg.add_edge(body, loop_h, EdgeKind::Back);
|
|
cfg.add_edge(loop_h, exit, EdgeKind::False);
|
|
|
|
// Exercises BFS ordering with back edges and phi on loop header
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
assert!(!ssa.blocks.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn orphan_catch_block_does_not_violate_bfs_ordering() {
|
|
// Entry → body → Exit, with an exception edge body → catch → Exit
|
|
// The catch block becomes an orphan (no normal-flow predecessors)
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let body = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let catch = cfg.add_node(NodeInfo {
|
|
catch_param: true,
|
|
taint: TaintMeta {
|
|
defines: Some("e".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, body, EdgeKind::Seq);
|
|
cfg.add_edge(body, exit, EdgeKind::Seq);
|
|
cfg.add_edge(body, catch, EdgeKind::Exception);
|
|
cfg.add_edge(catch, exit, EdgeKind::Seq);
|
|
|
|
// The catch block is reached via exception edge (stripped from normal flow)
|
|
// so it may appear as an orphan. The BFS assertion should skip it.
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
assert!(!ssa.blocks.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn phi_operand_count_equals_pred_count_in_diamond() {
|
|
// Specific test: phi operands == predecessor count (not just <=)
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
|
let t = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("v".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let f = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("v".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let join = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["v".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, if_node, EdgeKind::Seq);
|
|
cfg.add_edge(if_node, t, EdgeKind::True);
|
|
cfg.add_edge(if_node, f, EdgeKind::False);
|
|
cfg.add_edge(t, join, EdgeKind::Seq);
|
|
cfg.add_edge(f, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
let phi_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| !b.phis.is_empty())
|
|
.expect("should have a phi block");
|
|
|
|
for phi in &phi_block.phis {
|
|
if let SsaOp::Phi(ref ops) = phi.op {
|
|
assert_eq!(
|
|
ops.len(),
|
|
phi_block.preds.len(),
|
|
"phi operand count should equal predecessor count in a clean diamond"
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn bfs_assertion_helper_accepts_valid_orderings() {
|
|
// Direct unit test of the assertion helper with valid input
|
|
let block_preds = vec![
|
|
vec![], // block 0: entry (no preds)
|
|
vec![0], // block 1: pred is block 0 (forward)
|
|
vec![0, 1], // block 2: both forward preds
|
|
vec![], // block 3: orphan (no preds)
|
|
vec![2], // block 4: forward pred
|
|
];
|
|
// Should not panic
|
|
debug_assert_bfs_ordering(&block_preds);
|
|
}
|
|
|
|
/// Regression guard: a catch block that joins an exception
|
|
/// predecessor and a normal control-flow predecessor must lower to a
|
|
/// consistent phi. For variables defined before the try (live on
|
|
/// *both* edges), the phi at the catch block has exactly two operands
|
|
///, one per predecessor, and the release assertion accepts it.
|
|
#[test]
|
|
fn catch_block_join_phi_has_operand_per_live_predecessor() {
|
|
// Entry → defines `x` → Try → (Seq) → Join ← (Exception via body) Catch
|
|
// ↑
|
|
// A phi for `x` at the join block should carry
|
|
// one operand from each of its two predecessors.
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let define_x = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let body = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let catch = cfg.add_node(NodeInfo {
|
|
catch_param: true,
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let join = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, define_x, EdgeKind::Seq);
|
|
cfg.add_edge(define_x, body, EdgeKind::Seq);
|
|
cfg.add_edge(body, join, EdgeKind::Seq);
|
|
cfg.add_edge(body, catch, EdgeKind::Exception);
|
|
cfg.add_edge(catch, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
// Lowering must succeed, the assertion is active in release.
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Locate the block containing a phi for `x`; it must be the join
|
|
// block with two reachable predecessors. The phi must have
|
|
// exactly two operands.
|
|
let phi_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| {
|
|
b.phis
|
|
.iter()
|
|
.any(|p| p.var_name.as_deref() == Some("x") && matches!(p.op, SsaOp::Phi(_)))
|
|
})
|
|
.expect("expected a phi for `x` at the catch/normal join");
|
|
assert_eq!(
|
|
phi_block.preds.len(),
|
|
2,
|
|
"catch/normal join block must have 2 predecessors, got {}",
|
|
phi_block.preds.len()
|
|
);
|
|
let phi_for_x = phi_block
|
|
.phis
|
|
.iter()
|
|
.find(|p| p.var_name.as_deref() == Some("x"))
|
|
.unwrap();
|
|
if let SsaOp::Phi(ref operands) = phi_for_x.op {
|
|
assert_eq!(
|
|
operands.len(),
|
|
2,
|
|
"phi for `x` at the catch/normal join must have one operand per \
|
|
predecessor, got {}",
|
|
operands.len()
|
|
);
|
|
} else {
|
|
panic!("expected SsaOp::Phi for `x`");
|
|
}
|
|
}
|
|
|
|
/// Regression guard for the Undef fill pass. When a variable is
|
|
/// only defined on one branch of a join (e.g. a catch-only binding
|
|
/// rejoining the normal path), the lowering must still emit one
|
|
/// phi operand per predecessor, the missing edge becoming a
|
|
/// reference to the synthesized `SsaOp::Undef` sentinel rather
|
|
/// than being dropped.
|
|
#[test]
|
|
fn partial_phi_edge_fills_with_undef_sentinel() {
|
|
// Entry → Body → Join
|
|
// ↓
|
|
// Catch (defines `e`) → Join
|
|
//
|
|
// `e` is defined only on the exception path; on the normal path
|
|
// from Body → Join it has no reaching definition. The phi for `e`
|
|
// at Join must have two operands (one per predecessor), with the
|
|
// Body-side operand pointing at the Undef sentinel.
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let body = cfg.add_node(make_node(StmtKind::Seq));
|
|
let catch = cfg.add_node(NodeInfo {
|
|
catch_param: true,
|
|
taint: TaintMeta {
|
|
defines: Some("e".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let join = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["e".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, body, EdgeKind::Seq);
|
|
cfg.add_edge(body, join, EdgeKind::Seq);
|
|
cfg.add_edge(body, catch, EdgeKind::Exception);
|
|
cfg.add_edge(catch, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Find the phi for `e`.
|
|
let phi_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| b.phis.iter().any(|p| p.var_name.as_deref() == Some("e")))
|
|
.expect("expected a phi for `e`");
|
|
let phi_for_e = phi_block
|
|
.phis
|
|
.iter()
|
|
.find(|p| p.var_name.as_deref() == Some("e"))
|
|
.unwrap();
|
|
let operands = match &phi_for_e.op {
|
|
SsaOp::Phi(ops) => ops,
|
|
_ => panic!("expected SsaOp::Phi for `e`"),
|
|
};
|
|
|
|
// Strict invariant: one operand per predecessor.
|
|
assert_eq!(
|
|
operands.len(),
|
|
phi_block.preds.len(),
|
|
"phi for `e` must have one operand per predecessor",
|
|
);
|
|
|
|
// At least one operand must reference the Undef sentinel (the
|
|
// Body-side edge where `e` has no reaching definition).
|
|
let found_inst = |v: SsaValue| -> Option<&SsaInst> {
|
|
ssa.blocks
|
|
.iter()
|
|
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
|
|
.find(|i| i.value == v)
|
|
};
|
|
let any_undef = operands.iter().any(|(_, v)| {
|
|
found_inst(*v)
|
|
.map(|i| matches!(i.op, SsaOp::Undef))
|
|
.unwrap_or(false)
|
|
});
|
|
assert!(
|
|
any_undef,
|
|
"phi for `e` at the catch-join must reference SsaOp::Undef \
|
|
on the normal-path predecessor edge",
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phi_assertion_helper_accepts_exact_operand_count() {
|
|
// Direct test of the assertion helper: a phi with exactly as many
|
|
// operands as the block has predecessors must not panic.
|
|
let dummy_node = NodeIndex::new(0);
|
|
let block = SsaBlock {
|
|
id: BlockId(1),
|
|
phis: vec![SsaInst {
|
|
value: SsaValue(0),
|
|
op: SsaOp::Phi(smallvec::smallvec![
|
|
(BlockId(0), SsaValue(1)),
|
|
(BlockId(2), SsaValue(2)),
|
|
]),
|
|
cfg_node: dummy_node,
|
|
var_name: Some("x".into()),
|
|
span: (0, 0),
|
|
}],
|
|
body: vec![],
|
|
terminator: Terminator::Unreachable,
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
|
succs: smallvec::smallvec![],
|
|
};
|
|
let block_preds = vec![vec![], vec![0, 2], vec![0]];
|
|
assert_phi_operand_counts(
|
|
&[
|
|
SsaBlock {
|
|
id: BlockId(0),
|
|
phis: vec![],
|
|
body: vec![],
|
|
terminator: Terminator::Goto(BlockId(1)),
|
|
preds: smallvec::smallvec![],
|
|
succs: smallvec::smallvec![BlockId(1)],
|
|
},
|
|
block,
|
|
SsaBlock {
|
|
id: BlockId(2),
|
|
phis: vec![],
|
|
body: vec![],
|
|
terminator: Terminator::Goto(BlockId(1)),
|
|
preds: smallvec::smallvec![BlockId(0)],
|
|
succs: smallvec::smallvec![BlockId(1)],
|
|
},
|
|
],
|
|
&block_preds,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic(expected = "SSA phi operand count does not match predecessor count")]
|
|
fn phi_assertion_helper_rejects_more_operands_than_preds() {
|
|
// A phi with MORE operands than preds references a nonexistent
|
|
// predecessor, unsound because downstream consumers either
|
|
// panic on the lookup or silently feed garbage taint into the
|
|
// join. Strict-equality invariant catches this.
|
|
let dummy_node = NodeIndex::new(0);
|
|
let block = SsaBlock {
|
|
id: BlockId(1),
|
|
phis: vec![SsaInst {
|
|
value: SsaValue(0),
|
|
op: SsaOp::Phi(smallvec::smallvec![
|
|
(BlockId(0), SsaValue(1)),
|
|
(BlockId(2), SsaValue(2)),
|
|
(BlockId(3), SsaValue(3)),
|
|
]),
|
|
cfg_node: dummy_node,
|
|
var_name: Some("x".into()),
|
|
span: (0, 0),
|
|
}],
|
|
body: vec![],
|
|
terminator: Terminator::Unreachable,
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
|
succs: smallvec::smallvec![],
|
|
};
|
|
let block_preds = vec![vec![], vec![0, 2]];
|
|
assert_phi_operand_counts(
|
|
&[
|
|
SsaBlock {
|
|
id: BlockId(0),
|
|
phis: vec![],
|
|
body: vec![],
|
|
terminator: Terminator::Goto(BlockId(1)),
|
|
preds: smallvec::smallvec![],
|
|
succs: smallvec::smallvec![BlockId(1)],
|
|
},
|
|
block,
|
|
],
|
|
&block_preds,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic(expected = "SSA phi operand count does not match predecessor count")]
|
|
fn phi_assertion_helper_rejects_fewer_operands_than_preds() {
|
|
// A phi with fewer operands than preds violates the strict-equality
|
|
// invariant: `fill_undef_phi_operands` is responsible for filling
|
|
// every missing slot with an Undef sentinel, so the final body
|
|
// should never have gaps. This test guards the post-pass.
|
|
let dummy_node = NodeIndex::new(0);
|
|
let block = SsaBlock {
|
|
id: BlockId(1),
|
|
phis: vec![SsaInst {
|
|
value: SsaValue(0),
|
|
op: SsaOp::Phi(smallvec::smallvec![(BlockId(0), SsaValue(1))]),
|
|
cfg_node: dummy_node,
|
|
var_name: Some("e".into()),
|
|
span: (0, 0),
|
|
}],
|
|
body: vec![],
|
|
terminator: Terminator::Unreachable,
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
|
succs: smallvec::smallvec![],
|
|
};
|
|
let block_preds = vec![vec![], vec![0, 2]];
|
|
assert_phi_operand_counts(
|
|
&[
|
|
SsaBlock {
|
|
id: BlockId(0),
|
|
phis: vec![],
|
|
body: vec![],
|
|
terminator: Terminator::Goto(BlockId(1)),
|
|
preds: smallvec::smallvec![],
|
|
succs: smallvec::smallvec![BlockId(1)],
|
|
},
|
|
block,
|
|
],
|
|
&block_preds,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
#[should_panic(expected = "SSA phi operand references nonexistent predecessor")]
|
|
fn phi_assertion_helper_rejects_wrong_pred_block() {
|
|
// A phi with the correct operand count but referencing a block
|
|
// that isn't actually a predecessor must also fail the invariant.
|
|
let dummy_node = NodeIndex::new(0);
|
|
let block = SsaBlock {
|
|
id: BlockId(1),
|
|
phis: vec![SsaInst {
|
|
value: SsaValue(0),
|
|
op: SsaOp::Phi(smallvec::smallvec![
|
|
(BlockId(0), SsaValue(1)),
|
|
(BlockId(3), SsaValue(2)),
|
|
]),
|
|
cfg_node: dummy_node,
|
|
var_name: Some("x".into()),
|
|
span: (0, 0),
|
|
}],
|
|
body: vec![],
|
|
terminator: Terminator::Unreachable,
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
|
succs: smallvec::smallvec![],
|
|
};
|
|
let block_preds = vec![vec![], vec![0, 2]];
|
|
assert_phi_operand_counts(
|
|
&[
|
|
SsaBlock {
|
|
id: BlockId(0),
|
|
phis: vec![],
|
|
body: vec![],
|
|
terminator: Terminator::Goto(BlockId(1)),
|
|
preds: smallvec::smallvec![],
|
|
succs: smallvec::smallvec![BlockId(1)],
|
|
},
|
|
block,
|
|
],
|
|
&block_preds,
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn three_successor_collapse_produces_switch() {
|
|
// Build a CFG where a single node has 3 successors. The
|
|
// structured `Terminator::Switch` replaced the old
|
|
// `Goto(first)` collapse so every target is visible on the
|
|
// terminator shape (not only on `block.succs`).
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let branch = cfg.add_node(make_node(StmtKind::If));
|
|
let s0 = cfg.add_node(make_node(StmtKind::Seq));
|
|
let s1 = cfg.add_node(make_node(StmtKind::Seq));
|
|
let s2 = cfg.add_node(make_node(StmtKind::Seq));
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, branch, EdgeKind::Seq);
|
|
cfg.add_edge(branch, s0, EdgeKind::True);
|
|
cfg.add_edge(branch, s1, EdgeKind::False);
|
|
cfg.add_edge(branch, s2, EdgeKind::Seq);
|
|
cfg.add_edge(s0, exit, EdgeKind::Seq);
|
|
cfg.add_edge(s1, exit, EdgeKind::Seq);
|
|
cfg.add_edge(s2, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
assert!(!ssa.blocks.is_empty());
|
|
|
|
let switch_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| matches!(b.terminator, Terminator::Switch { .. }) && b.succs.len() >= 3)
|
|
.expect("expected a block with a Switch terminator and ≥3 succs");
|
|
|
|
assert_eq!(
|
|
switch_block.succs.len(),
|
|
3,
|
|
"≥3-successor lowering must retain all succs on block.succs, got {:?}",
|
|
switch_block.succs
|
|
);
|
|
|
|
if let Terminator::Switch {
|
|
targets, default, ..
|
|
} = &switch_block.terminator
|
|
{
|
|
// Default is the first succ (deterministic ordering); the
|
|
// remaining N-1 succs populate `targets` in order.
|
|
assert_eq!(
|
|
*default, switch_block.succs[0],
|
|
"Switch default must match succs[0]"
|
|
);
|
|
assert_eq!(
|
|
targets.len(),
|
|
switch_block.succs.len() - 1,
|
|
"Switch targets must cover every succ except default"
|
|
);
|
|
for (i, t) in targets.iter().enumerate() {
|
|
assert_eq!(
|
|
*t,
|
|
switch_block.succs[i + 1],
|
|
"Switch target[{i}] must match succs[{}]",
|
|
i + 1
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn normal_two_successor_produces_branch() {
|
|
// Regression: normal 2-successor case should still produce Branch
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
|
let t = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let f = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, if_node, EdgeKind::Seq);
|
|
cfg.add_edge(if_node, t, EdgeKind::True);
|
|
cfg.add_edge(if_node, f, EdgeKind::False);
|
|
cfg.add_edge(t, exit, EdgeKind::Seq);
|
|
cfg.add_edge(f, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
let has_branch = ssa
|
|
.blocks
|
|
.iter()
|
|
.any(|b| matches!(b.terminator, Terminator::Branch { .. }));
|
|
assert!(
|
|
has_branch,
|
|
"normal 2-successor case must produce Branch, not Goto"
|
|
);
|
|
}
|
|
|
|
/// Regression: a block containing an explicit Return CFG node must
|
|
/// terminate with [`Terminator::Return`], never [`Terminator::Goto`]
|
|
/// to a synthetic exit block. Previously, the bookkeeping
|
|
/// `Return → fn_exit` `Seq` edge made early-return blocks fall into
|
|
/// the single-successor `Goto` arm, and the fall-through tail
|
|
/// expression's body got merged into the shared exit block, every
|
|
/// early-return path therefore appeared to also execute the tail.
|
|
/// Mirrors the `if cond { return X; } Y` shape that motivated the fix.
|
|
#[test]
|
|
fn early_return_block_terminates_with_return_not_goto_to_exit() {
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
// Param-style external use (x is read by the if condition).
|
|
let if_node = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::If)
|
|
});
|
|
// True branch: return constant. uses=[] + const_text=Some triggers
|
|
// the literal-return path, ensuring the block emits a synthetic
|
|
// Const + Return(Some(_)), the same shape `return None` /
|
|
// `return String::new()` produces in real Rust code.
|
|
let early_ret = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
const_text: Some("\"\"".to_string()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Return)
|
|
});
|
|
// False branch: tail expression that defines `y` (the implicit
|
|
// function return value).
|
|
let tail = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("y".into()),
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, if_node, EdgeKind::Seq);
|
|
cfg.add_edge(if_node, early_ret, EdgeKind::True);
|
|
cfg.add_edge(if_node, tail, EdgeKind::False);
|
|
// Bookkeeping wire-up the real CFG construction performs in
|
|
// `build_cfg`, Return / Throw → fn_exit via Seq, so the SSA
|
|
// lowering has to handle it.
|
|
cfg.add_edge(early_ret, exit, EdgeKind::Seq);
|
|
cfg.add_edge(tail, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Locate the block containing the early-return CFG node and
|
|
// assert it terminates with Return, not Goto(_) into the
|
|
// shared exit block.
|
|
let early_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| {
|
|
b.body
|
|
.iter()
|
|
.chain(b.phis.iter())
|
|
.any(|inst| inst.cfg_node == early_ret)
|
|
})
|
|
.expect("early-return CFG node must live in some SSA block");
|
|
assert!(
|
|
matches!(early_block.terminator, Terminator::Return(_)),
|
|
"early-return block must terminate with Return, got {:?}",
|
|
early_block.terminator
|
|
);
|
|
assert!(
|
|
early_block.succs.is_empty(),
|
|
"early-return block must have no successors at the block level, \
|
|
got succs = {:?}",
|
|
early_block.succs
|
|
);
|
|
|
|
// The fall-through (tail) block must NOT have the early-return
|
|
// block as a predecessor. Pre-fix, both the early-return path
|
|
// and the tail path merged into the shared fn_exit block, so the
|
|
// tail's body was reachable from the early-return path, that's
|
|
// the merged-return defect.
|
|
let tail_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| {
|
|
b.body
|
|
.iter()
|
|
.chain(b.phis.iter())
|
|
.any(|inst| inst.cfg_node == tail)
|
|
})
|
|
.expect("tail CFG node must live in some SSA block");
|
|
let early_block_id = early_block.id;
|
|
assert!(
|
|
!tail_block.preds.contains(&early_block_id),
|
|
"tail block must not have early-return block as a predecessor; \
|
|
merged-return defect would re-emerge. tail.preds = {:?}, \
|
|
early_block_id = {:?}",
|
|
tail_block.preds,
|
|
early_block_id
|
|
);
|
|
}
|
|
|
|
/// Regression: an OR-chain rejection arm such as
|
|
/// `if a || b || c { return X; } Y` must have its rejection body emit a
|
|
/// `Terminator::Return(_)` and have `succs.is_empty()`. Pre-fix the
|
|
/// rejection body's String::new() Call shared a block whose only
|
|
/// successor was the merged tail, losing the early-return semantics
|
|
/// entirely and diluting per-return-path PathFact narrowing.
|
|
#[test]
|
|
fn or_chain_rejection_block_terminates_with_return() {
|
|
use crate::cfg::build_cfg;
|
|
|
|
let src = br#"
|
|
fn sanitize_path(s: &str) -> String {
|
|
if s.contains("..") || s.starts_with('/') || s.starts_with('\\') {
|
|
return String::new();
|
|
}
|
|
s.to_string()
|
|
}
|
|
"#;
|
|
let mut parser = tree_sitter::Parser::new();
|
|
parser
|
|
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
|
|
.unwrap();
|
|
let tree = parser.parse(src.as_slice(), None).unwrap();
|
|
let file_cfg = build_cfg(&tree, src.as_slice(), "rust", "test.rs", None);
|
|
let body = if file_cfg.bodies.len() > 1 {
|
|
&file_cfg.bodies[1]
|
|
} else {
|
|
file_cfg.first_body()
|
|
};
|
|
let cfg = &body.graph;
|
|
let entry = body.entry;
|
|
|
|
// Locate the Return CFG node sourced from the if-body and the tail
|
|
// expression's Call node so the assertions are meaningful even if
|
|
// block ordering shifts.
|
|
let mut rejection_call: Option<NodeIndex> = None;
|
|
for idx in cfg.node_indices() {
|
|
let info = &cfg[idx];
|
|
if info.kind == StmtKind::Call {
|
|
if let Some(callee) = &info.call.callee {
|
|
if callee == "String::new" || callee.ends_with("String::new") {
|
|
rejection_call = Some(idx);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
let rejection_call = rejection_call
|
|
.expect("CFG must contain a String::new() Call node for the rejection arm");
|
|
|
|
let ssa = lower_to_ssa(cfg, entry, None, true).expect("SSA lowering should succeed");
|
|
|
|
// Find the SSA block containing the String::new() Call. This is
|
|
// the rejection-arm block.
|
|
let rejection_block = ssa
|
|
.blocks
|
|
.iter()
|
|
.find(|b| {
|
|
b.body
|
|
.iter()
|
|
.chain(b.phis.iter())
|
|
.any(|inst| inst.cfg_node == rejection_call)
|
|
})
|
|
.expect("rejection-arm Call must live in some SSA block");
|
|
|
|
assert!(
|
|
rejection_block.succs.is_empty(),
|
|
"rejection-arm block must have no block-level successors after \
|
|
return-frontier strip; got succs = {:?}",
|
|
rejection_block.succs
|
|
);
|
|
assert!(
|
|
matches!(rejection_block.terminator, Terminator::Return(_)),
|
|
"rejection-arm block must terminate with Terminator::Return; got {:?}",
|
|
rejection_block.terminator
|
|
);
|
|
}
|
|
|
|
/// Cross-language regression: the same merged-return defect that the Rust
|
|
/// fix closed must not appear in C. The C OR-chain shape from
|
|
/// `tests/benchmark/corpus/c/safe/safe_direct_path_sanitizer.c` has both
|
|
/// a rejection arm (`return ""`) and a tail return (`return s`). Both
|
|
/// must produce blocks whose terminator is `Terminator::Return(_)`.
|
|
#[test]
|
|
fn c_or_chain_both_return_arms_terminate_with_return() {
|
|
use crate::cfg::build_cfg;
|
|
|
|
let src = br#"
|
|
const char *sanitize_path(const char *s) {
|
|
if (strstr(s, "..") != NULL || s[0] == '/' || s[0] == '\\') {
|
|
return "";
|
|
}
|
|
return s;
|
|
}
|
|
"#;
|
|
let mut parser = tree_sitter::Parser::new();
|
|
parser
|
|
.set_language(&tree_sitter::Language::from(tree_sitter_c::LANGUAGE))
|
|
.unwrap();
|
|
let tree = parser.parse(src.as_slice(), None).unwrap();
|
|
let file_cfg = build_cfg(&tree, src.as_slice(), "c", "test.c", None);
|
|
let body = file_cfg.first_body();
|
|
let cfg = &body.graph;
|
|
let entry = body.entry;
|
|
|
|
let ssa = lower_to_ssa(cfg, entry, None, true).expect("SSA lowering should succeed");
|
|
|
|
let return_blocks: Vec<&SsaBlock> = ssa
|
|
.blocks
|
|
.iter()
|
|
.filter(|b| matches!(b.terminator, Terminator::Return(_)))
|
|
.collect();
|
|
assert!(
|
|
return_blocks.len() >= 2,
|
|
"Expected ≥2 Return-terminated blocks (rejection arm + tail); got {}: {:?}",
|
|
return_blocks.len(),
|
|
ssa.blocks
|
|
.iter()
|
|
.map(|b| (b.id, &b.terminator))
|
|
.collect::<Vec<_>>()
|
|
);
|
|
|
|
// Each Return-terminated block must have an empty successor list
|
|
// (no fall-through past Return).
|
|
for b in &return_blocks {
|
|
assert!(
|
|
b.succs.is_empty(),
|
|
"Return-terminated block id={:?} has succs={:?}",
|
|
b.id,
|
|
b.succs
|
|
);
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────
|
|
// FieldProj chain lowering tests
|
|
// ─────────────────────────────────────────────────────────────────
|
|
//
|
|
// These tests pin the contract that `try_lower_field_proj_chain`
|
|
// emits a `FieldProj` chain for chained-receiver method calls
|
|
// (`a.b.c.method()`) and bails (preserving the existing single-Call
|
|
// lowering) for everything else. Per-language end-to-end coverage
|
|
// lives below in `phase2_e2e_*` tests; the unit tests here pin the
|
|
// helper's behaviour without going through tree-sitter.
|
|
|
|
/// Build a freshly-allocated empty SSA scratch state suitable for
|
|
/// invoking `try_lower_field_proj_chain` in isolation. Returns
|
|
/// `(var_stacks, field_interner, ssa_blocks, value_defs, next_value)`.
|
|
fn fresh_proj_scratch() -> (
|
|
std::collections::HashMap<String, Vec<SsaValue>>,
|
|
crate::ssa::ir::FieldInterner,
|
|
Vec<SsaBlock>,
|
|
Vec<ValueDef>,
|
|
u32,
|
|
) {
|
|
let blocks = vec![SsaBlock {
|
|
id: BlockId(0),
|
|
phis: Vec::new(),
|
|
body: Vec::new(),
|
|
terminator: Terminator::Unreachable,
|
|
preds: SmallVec::new(),
|
|
succs: SmallVec::new(),
|
|
}];
|
|
(
|
|
std::collections::HashMap::new(),
|
|
crate::ssa::ir::FieldInterner::new(),
|
|
blocks,
|
|
Vec::new(),
|
|
0,
|
|
)
|
|
}
|
|
|
|
/// Seed a single SSA value `SsaValue(0)` for `name` so the chain
|
|
/// helper's base lookup succeeds.
|
|
fn seed_var(
|
|
var_stacks: &mut std::collections::HashMap<String, Vec<SsaValue>>,
|
|
value_defs: &mut Vec<ValueDef>,
|
|
next_value: &mut u32,
|
|
name: &str,
|
|
) -> SsaValue {
|
|
let v = SsaValue(*next_value);
|
|
*next_value += 1;
|
|
value_defs.push(ValueDef {
|
|
var_name: Some(name.into()),
|
|
cfg_node: NodeIndex::new(0),
|
|
block: BlockId(0),
|
|
});
|
|
var_stacks.entry(name.into()).or_default().push(v);
|
|
v
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_too_few_segments_returns_none() {
|
|
// 0 dots: bare callee → no chain.
|
|
let (mut vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
seed_var(&mut vs, &mut defs, &mut nv, "obj");
|
|
assert!(
|
|
try_lower_field_proj_chain(
|
|
"foo",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.is_none()
|
|
);
|
|
|
|
// 1 dot: simple receiver, NOT decomposed (existing receiver channel
|
|
// already handles `obj.method()` calls).
|
|
assert!(
|
|
try_lower_field_proj_chain(
|
|
"obj.method",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.is_none()
|
|
);
|
|
|
|
// No FieldProj instructions emitted; interner stays empty.
|
|
assert!(blocks[0].body.is_empty());
|
|
assert!(interner.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_complex_token_returns_none() {
|
|
// Each of these contains a token signaling complexity that breaks
|
|
// the simple `<ident>.<ident>...` shape; helper must bail.
|
|
let cases = [
|
|
"Foo::bar::baz", // Rust path
|
|
"ptr->field.f", // C-style arrow
|
|
"obj.f().g", // intermediate call
|
|
"vec[0].field", // index expression
|
|
"obj.f.<T>", // template-ish
|
|
"obj.f g", // whitespace
|
|
"obj?.f.g", // optional chain
|
|
];
|
|
let (mut vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
seed_var(&mut vs, &mut defs, &mut nv, "obj");
|
|
for s in &cases {
|
|
assert!(
|
|
try_lower_field_proj_chain(
|
|
s,
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.is_none(),
|
|
"expected bail on complex callee {s}"
|
|
);
|
|
}
|
|
assert!(blocks[0].body.is_empty());
|
|
assert!(interner.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_unknown_base_returns_none() {
|
|
// The chain root must be a known SSA variable; otherwise the chain
|
|
// root SSA value is unrecoverable and we must fall back.
|
|
let (vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
// "ghost" intentionally not seeded.
|
|
assert!(
|
|
try_lower_field_proj_chain(
|
|
"ghost.f.method",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.is_none()
|
|
);
|
|
assert!(blocks[0].body.is_empty());
|
|
assert!(interner.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_basic_two_dots_emits_one_proj() {
|
|
// `c.mu.Lock()` → emit one FieldProj, return (v_mu, "Lock").
|
|
let (mut vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
let v_c = seed_var(&mut vs, &mut defs, &mut nv, "c");
|
|
|
|
let (recv, method) = try_lower_field_proj_chain(
|
|
"c.mu.Lock",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(10, 20),
|
|
)
|
|
.expect("chain decomposition should succeed");
|
|
|
|
// The returned receiver is a NEW SsaValue (one past v_c).
|
|
assert_eq!(recv, SsaValue(1));
|
|
assert_eq!(method, "Lock");
|
|
// Exactly one FieldProj op was emitted.
|
|
assert_eq!(blocks[0].body.len(), 1);
|
|
let inst = &blocks[0].body[0];
|
|
match &inst.op {
|
|
SsaOp::FieldProj {
|
|
receiver,
|
|
field,
|
|
projected_type,
|
|
} => {
|
|
assert_eq!(*receiver, v_c);
|
|
assert_eq!(interner.resolve(*field), "mu");
|
|
assert!(projected_type.is_none());
|
|
}
|
|
other => panic!("expected FieldProj, got {other:?}"),
|
|
}
|
|
// Span propagated to the FieldProj instruction.
|
|
assert_eq!(inst.span, (10, 20));
|
|
assert_eq!(inst.var_name.as_deref(), Some("c.mu"));
|
|
// value_defs has an entry for the new SSA value.
|
|
assert_eq!(defs.last().unwrap().var_name.as_deref(), Some("c.mu"));
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_three_dots_emits_two_projs_chained() {
|
|
// `c.writer.header.set` → 2 FieldProj ops, chained: v_writer reads c,
|
|
// v_header reads v_writer.
|
|
let (mut vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
let v_c = seed_var(&mut vs, &mut defs, &mut nv, "c");
|
|
|
|
let (recv, method) = try_lower_field_proj_chain(
|
|
"c.writer.header.set",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.expect("chain decomposition should succeed");
|
|
assert_eq!(method, "set");
|
|
assert_eq!(recv, SsaValue(2)); // v_c=0, v_writer=1, v_header=2
|
|
|
|
assert_eq!(blocks[0].body.len(), 2, "expected 2 FieldProj ops");
|
|
match &blocks[0].body[0].op {
|
|
SsaOp::FieldProj {
|
|
receiver, field, ..
|
|
} => {
|
|
assert_eq!(*receiver, v_c);
|
|
assert_eq!(interner.resolve(*field), "writer");
|
|
}
|
|
other => panic!("expected FieldProj, got {other:?}"),
|
|
}
|
|
match &blocks[0].body[1].op {
|
|
SsaOp::FieldProj {
|
|
receiver, field, ..
|
|
} => {
|
|
assert_eq!(*receiver, SsaValue(1)); // chained on v_writer
|
|
assert_eq!(interner.resolve(*field), "header");
|
|
}
|
|
other => panic!("expected FieldProj, got {other:?}"),
|
|
}
|
|
// var_names form a readable chain
|
|
assert_eq!(blocks[0].body[0].var_name.as_deref(), Some("c.writer"));
|
|
assert_eq!(
|
|
blocks[0].body[1].var_name.as_deref(),
|
|
Some("c.writer.header")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_dedupes_field_names() {
|
|
// Two separate chains that share a field name should reuse the
|
|
// same FieldId via the per-body interner.
|
|
let (mut vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
let v_a = seed_var(&mut vs, &mut defs, &mut nv, "a");
|
|
let v_b = seed_var(&mut vs, &mut defs, &mut nv, "b");
|
|
|
|
let _ = try_lower_field_proj_chain(
|
|
"a.shared.f",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.unwrap();
|
|
let _ = try_lower_field_proj_chain(
|
|
"b.shared.g",
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.unwrap();
|
|
|
|
// Two FieldProj insts emitted, both pointing at the same FieldId.
|
|
assert_eq!(blocks[0].body.len(), 2);
|
|
let f0 = match &blocks[0].body[0].op {
|
|
SsaOp::FieldProj { field, .. } => *field,
|
|
_ => panic!(),
|
|
};
|
|
let f1 = match &blocks[0].body[1].op {
|
|
SsaOp::FieldProj { field, .. } => *field,
|
|
_ => panic!(),
|
|
};
|
|
assert_eq!(f0, f1, "dedup should reuse FieldId");
|
|
assert_eq!(interner.len(), 1, "only one unique field name interned");
|
|
let _ = (v_a, v_b);
|
|
}
|
|
|
|
#[test]
|
|
fn try_lower_field_proj_chain_rejects_empty_segments() {
|
|
// Defensive: leading/trailing/double dots are not a member chain.
|
|
let (mut vs, mut interner, mut blocks, mut defs, mut nv) = fresh_proj_scratch();
|
|
seed_var(&mut vs, &mut defs, &mut nv, "x");
|
|
for s in [".x.f", "x..f", "x.f."] {
|
|
assert!(
|
|
try_lower_field_proj_chain(
|
|
s,
|
|
&vs,
|
|
&mut interner,
|
|
0,
|
|
BlockId(0),
|
|
&mut nv,
|
|
&mut blocks,
|
|
&mut defs,
|
|
NodeIndex::new(0),
|
|
(0, 0),
|
|
)
|
|
.is_none(),
|
|
"expected bail on {s}"
|
|
);
|
|
}
|
|
assert!(blocks[0].body.is_empty());
|
|
}
|
|
|
|
// ── End-to-end SSA decomposition tests via real tree-sitter parsing ──────────
|
|
//
|
|
// These exercise the integration between CFG construction (which sets
|
|
// `info.call.callee = "c.mu.Lock"`) and SSA lowering. We assert that
|
|
// the resulting SsaBody contains a `FieldProj` op whose interned name
|
|
// matches the source-level field name.
|
|
|
|
fn parse_to_first_body(
|
|
src: &[u8],
|
|
lang: &str,
|
|
ts_lang: tree_sitter::Language,
|
|
path: &str,
|
|
) -> SsaBody {
|
|
let mut parser = tree_sitter::Parser::new();
|
|
parser.set_language(&ts_lang).unwrap();
|
|
let tree = parser.parse(src, None).unwrap();
|
|
let file_cfg = crate::cfg::build_cfg(&tree, src, lang, path, None);
|
|
// Prefer the first non-top-level body (a function), fall back to top.
|
|
let body = if file_cfg.bodies.len() > 1 {
|
|
&file_cfg.bodies[1]
|
|
} else {
|
|
&file_cfg.bodies[0]
|
|
};
|
|
// Mirror the production lowering path: function bodies use
|
|
// lower_to_ssa_with_params so formal parameters get synthetic
|
|
// Param/SelfParam injections at block 0, without them, the
|
|
// FieldProj chain helper has no SSA root to anchor to.
|
|
if body.meta.name.is_some() {
|
|
let func_name = body.meta.name.clone().unwrap_or_default();
|
|
lower_to_ssa_with_params(
|
|
&body.graph,
|
|
body.entry,
|
|
Some(&func_name),
|
|
false,
|
|
&body.meta.params,
|
|
)
|
|
.expect("SSA lowering should succeed")
|
|
} else {
|
|
lower_to_ssa(&body.graph, body.entry, None, true).expect("SSA lowering should succeed")
|
|
}
|
|
}
|
|
|
|
/// Iterate every FieldProj instance in `body` along with its resolved
|
|
/// field name.
|
|
fn collect_field_projs(body: &SsaBody) -> Vec<(SsaValue, SsaValue, String)> {
|
|
let mut out = Vec::new();
|
|
for blk in &body.blocks {
|
|
for inst in blk.phis.iter().chain(blk.body.iter()) {
|
|
if let SsaOp::FieldProj {
|
|
receiver, field, ..
|
|
} = &inst.op
|
|
{
|
|
out.push((inst.value, *receiver, body.field_name(*field).to_string()));
|
|
}
|
|
}
|
|
}
|
|
out
|
|
}
|
|
|
|
/// Iterate every Call instance in `body` along with its callee + callee_text.
|
|
fn collect_calls(body: &SsaBody) -> Vec<(String, Option<String>, Option<SsaValue>)> {
|
|
let mut out = Vec::new();
|
|
for blk in &body.blocks {
|
|
for inst in blk.body.iter() {
|
|
if let SsaOp::Call {
|
|
callee,
|
|
callee_text,
|
|
receiver,
|
|
..
|
|
} = &inst.op
|
|
{
|
|
out.push((callee.clone(), callee_text.clone(), *receiver));
|
|
}
|
|
}
|
|
}
|
|
out
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_go_chained_receiver_emits_field_proj() {
|
|
// Go: `c.writer.header.set(k, v)`, 3-segment receiver, 2 FieldProjs.
|
|
// Chain root `c` is a function parameter so it is resolvable.
|
|
let src = b"package p\nfunc f(c *T, k string, v string) { c.writer.header.set(k, v) }\n";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"go",
|
|
tree_sitter::Language::from(tree_sitter_go::LANGUAGE),
|
|
"test.go",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
assert!(
|
|
projs.len() >= 2,
|
|
"expected ≥2 FieldProj ops for c.writer.header.<m>; got {projs:?}"
|
|
);
|
|
// Field names match the source-level field structure.
|
|
let names: Vec<&str> = projs.iter().map(|(_, _, n)| n.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"writer"),
|
|
"missing 'writer' projection in {names:?}"
|
|
);
|
|
assert!(
|
|
names.contains(&"header"),
|
|
"missing 'header' projection in {names:?}"
|
|
);
|
|
|
|
// The Call op carries the bare method name and callee_text retains the path.
|
|
let calls = collect_calls(&body);
|
|
let bare = calls.iter().find(|(c, _, _)| c == "set");
|
|
assert!(
|
|
bare.is_some(),
|
|
"expected a Call with bare callee 'set'; got {calls:?}"
|
|
);
|
|
let (_, ctext, recv) = bare.unwrap();
|
|
assert!(recv.is_some(), "decomposed call must carry an SSA receiver");
|
|
assert_eq!(
|
|
ctext.as_deref(),
|
|
Some("c.writer.header.set"),
|
|
"callee_text should preserve the original textual path"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_python_chained_receiver_emits_field_proj() {
|
|
// Python: `obj.client.session.send(p)`, 3-segment receiver.
|
|
let src = b"def f(obj, p):\n obj.client.session.send(p)\n";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"python",
|
|
tree_sitter::Language::from(tree_sitter_python::LANGUAGE),
|
|
"test.py",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
let names: Vec<&str> = projs.iter().map(|(_, _, n)| n.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"client") && names.contains(&"session"),
|
|
"expected client + session projections, got {names:?}"
|
|
);
|
|
let calls = collect_calls(&body);
|
|
assert!(
|
|
calls.iter().any(|(c, ct, r)| c == "send"
|
|
&& ct.as_deref() == Some("obj.client.session.send")
|
|
&& r.is_some()),
|
|
"expected bare 'send' Call with callee_text retained; got {calls:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_javascript_chained_receiver_emits_field_proj() {
|
|
// JS: `obj.foo.bar.baz()`, 3-segment receiver.
|
|
let src = b"function f(obj) { obj.foo.bar.baz(); }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
let names: Vec<&str> = projs.iter().map(|(_, _, n)| n.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"foo") && names.contains(&"bar"),
|
|
"expected foo + bar projections, got {names:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_java_chained_receiver_emits_field_proj() {
|
|
// Java: `obj.config.handler.run()`, 3-segment receiver chain through
|
|
// a parameter `obj`. We avoid `this.…` because `this` is a Java
|
|
// keyword (not an identifier_node) so it isn't extracted as an
|
|
// external use, outside SSA decomposition.s scope.
|
|
let src = b"class C { void f(Object obj) { obj.config.handler.run(); } }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"java",
|
|
tree_sitter::Language::from(tree_sitter_java::LANGUAGE),
|
|
"test.java",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
let names: Vec<&str> = projs.iter().map(|(_, _, n)| n.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"config") && names.contains(&"handler"),
|
|
"expected config + handler projections, got {names:?}; full body:\n{body}"
|
|
);
|
|
let calls = collect_calls(&body);
|
|
assert!(
|
|
calls.iter().any(|(c, ct, r)| c == "run"
|
|
&& ct.as_deref() == Some("obj.config.handler.run")
|
|
&& r.is_some()),
|
|
"expected bare 'run' Call with callee_text retained; got {calls:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_simple_receiver_no_field_proj() {
|
|
// REGRESSION: `obj.foo()`, single-dot receiver. SSA lowering must NOT
|
|
// decompose this into a FieldProj chain (existing receiver channel
|
|
// already covers it). Verify the body has zero FieldProj ops and
|
|
// the Call's callee_text stays None.
|
|
let src = b"function f(obj) { obj.foo(); }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
assert!(
|
|
collect_field_projs(&body).is_empty(),
|
|
"single-dot call should not generate FieldProj"
|
|
);
|
|
let calls = collect_calls(&body);
|
|
assert!(
|
|
calls.iter().any(|(_, ct, _)| ct.is_none()),
|
|
"single-dot Call should have callee_text=None; calls={calls:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_bare_call_no_field_proj() {
|
|
// REGRESSION: a free-function call `foo()` must produce zero
|
|
// FieldProj ops and an empty per-body interner.
|
|
let src = b"function f() { foo(1, 2); }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
assert!(collect_field_projs(&body).is_empty());
|
|
assert!(
|
|
body.field_interner.is_empty(),
|
|
"no chain → interner stays empty"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_global_root_chain_still_emits_field_proj() {
|
|
// REGRESSION-NEGATIVE: when the chain root is a global identifier
|
|
// (`Math.foo.bar()`), the lowerer's external-var synthesis makes
|
|
// `Math` available as a synthetic Param, the chain still
|
|
// decomposes, treating `Math` as the SSA receiver. This is the
|
|
// semantically correct outcome even for global-rooted chains: the
|
|
// FieldProj op precisely captures the field-access structure.
|
|
let src = b"function f() { Math.foo.bar(); }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
let names: Vec<&str> = projs.iter().map(|(_, _, n)| n.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"foo"),
|
|
"expected 'foo' projection (chain root Math is a synthesized external var); got {names:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_rust_method_call_through_field_emits_field_proj() {
|
|
// Rust: `c.mu.lock()`, `c` is a function parameter, `mu` is a field,
|
|
// `lock` is the method. Verifies we generate FieldProj for `mu`.
|
|
// (Rust paths like `std::env::var` use `::` and are excluded by
|
|
// the helper's complex-token check.)
|
|
let src = b"fn f(c: &T) { c.mu.lock(); }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"rust",
|
|
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
|
|
"test.rs",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
let names: Vec<&str> = projs.iter().map(|(_, _, n)| n.as_str()).collect();
|
|
assert!(
|
|
names.contains(&"mu"),
|
|
"expected 'mu' projection from c.mu.lock(); got {names:?}; body:\n{body}"
|
|
);
|
|
let calls = collect_calls(&body);
|
|
assert!(
|
|
calls
|
|
.iter()
|
|
.any(|(c, ct, r)| c == "lock" && ct.as_deref() == Some("c.mu.lock") && r.is_some()),
|
|
"expected bare 'lock' Call with callee_text='c.mu.lock'; got {calls:?}"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_rust_path_call_does_not_emit_field_proj() {
|
|
// REGRESSION: `std::env::var(...)` is a Rust path (uses `::`), NOT
|
|
// a member-access chain. Helper must bail.
|
|
let src = br#"fn f() { let _ = std::env::var("X"); }"#;
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"rust",
|
|
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
|
|
"test.rs",
|
|
);
|
|
assert!(
|
|
collect_field_projs(&body).is_empty(),
|
|
"Rust path expression must not be decomposed into FieldProj"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_field_interner_populated_only_when_chain_emitted() {
|
|
// Helper invariant: a body with a chained call has a non-empty
|
|
// interner; a body with no chained calls has an empty interner.
|
|
let src_chain = b"function f(o) { o.a.b.c(); }";
|
|
let src_plain = b"function f(o) { o.foo(); }";
|
|
let body_chain = parse_to_first_body(
|
|
src_chain,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
let body_plain = parse_to_first_body(
|
|
src_plain,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
assert!(
|
|
!body_chain.field_interner.is_empty(),
|
|
"interner should hold the chain field names"
|
|
);
|
|
assert!(
|
|
body_plain.field_interner.is_empty(),
|
|
"single-dot call should not populate interner"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn phase2_e2e_field_proj_chain_preserves_receiver_dataflow() {
|
|
// The FieldProj receiver chain must trace back to the chain root
|
|
// (parameter `c` here) via `uses_iter()`. This is the contract
|
|
// every downstream consumer relies on for taint propagation.
|
|
let src = b"function f(c) { c.a.b.m(); }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
let projs = collect_field_projs(&body);
|
|
assert_eq!(projs.len(), 2, "expected 2 FieldProj ops, got {projs:?}");
|
|
|
|
// The first FieldProj's receiver should be a parameter or external
|
|
// var; the second FieldProj's receiver should be the first
|
|
// FieldProj's value.
|
|
let v_first = projs[0].0;
|
|
let r_second = projs[1].1;
|
|
assert_eq!(
|
|
r_second, v_first,
|
|
"second FieldProj must chain off the first's value"
|
|
);
|
|
}
|
|
|
|
/// End-to-end: lowering an `obj.f = rhs` statement populates
|
|
/// `SsaBody.field_writes` with the synthetic base-update Assign's
|
|
/// `(receiver, FieldId)` mapping. A single-write shape suffices ,
|
|
/// every formal gets a Param op at block 0 so the first write
|
|
/// finds the formal in `var_stacks`.
|
|
#[test]
|
|
fn w1_end_to_end_field_write_records_side_table_when_parent_has_prior_def() {
|
|
// Single write to `obj.cache`: the formal `obj` provides the
|
|
// prior reaching def via the synthetic Param at block 0.
|
|
let src = b"function f(obj) { obj.cache = 42; }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
assert!(
|
|
!body.field_writes.is_empty(),
|
|
"single `obj.cache = 42` on a JS formal must populate \
|
|
field_writes via the formal's W1.b synthetic Param; got \
|
|
body.field_writes={:?}\nbody:\n{body}",
|
|
body.field_writes,
|
|
);
|
|
// Every recorded field name resolves to "cache".
|
|
for (_rcv, fid) in body.field_writes.values() {
|
|
assert_eq!(body.field_interner.resolve(*fid), "cache");
|
|
}
|
|
}
|
|
|
|
/// W1.b: Python, single `obj.cache = 42` on a formal also
|
|
/// populates `field_writes` thanks to the formal Param op.
|
|
#[test]
|
|
fn w1b_single_write_records_field_write_python() {
|
|
let src = b"def f(obj):\n obj.cache = 42\n";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"python",
|
|
tree_sitter::Language::from(tree_sitter_python::LANGUAGE),
|
|
"test.py",
|
|
);
|
|
assert!(
|
|
!body.field_writes.is_empty(),
|
|
"Python single `obj.cache = 42` must populate field_writes; \
|
|
got body.field_writes={:?}\nbody:\n{body}",
|
|
body.field_writes,
|
|
);
|
|
}
|
|
|
|
/// W1.b: Rust, single `obj.cache = 42` on a method-style formal
|
|
/// (`fn f(obj: &mut O)`) also populates `field_writes`.
|
|
#[test]
|
|
fn w1b_single_write_records_field_write_rust() {
|
|
let src = b"struct O { cache: i32 } fn f(obj: &mut O) { obj.cache = 42; }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"rust",
|
|
tree_sitter::Language::from(tree_sitter_rust::LANGUAGE),
|
|
"test.rs",
|
|
);
|
|
assert!(
|
|
!body.field_writes.is_empty(),
|
|
"Rust single `obj.cache = 42` must populate field_writes; \
|
|
got body.field_writes={:?}\nbody:\n{body}",
|
|
body.field_writes,
|
|
);
|
|
}
|
|
|
|
/// REGRESSION: when the body takes a real handler-named formal
|
|
/// (`userId`), that formal must NOT end up in
|
|
/// `synthetic_externals` — the JS/TS / Java auto-seed pass relies
|
|
/// on this distinction to seed only real formals as
|
|
/// `Source(UserInput)` and skip closure captures. Companion
|
|
/// integration coverage for the empty-formals shape (arrow
|
|
/// `() => {…}` lifting bubbled-up free vars as synthetic) lives
|
|
/// in `tests/fixtures/fp_guards/framework_jest_test_callback_arrow/`
|
|
/// — that fixture exercises the full CFG construction path which
|
|
/// this unit test cannot reproduce in isolation.
|
|
#[test]
|
|
fn arrow_with_handler_formal_keeps_param_non_synthetic() {
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(NodeInfo {
|
|
ast: crate::cfg::AstMeta {
|
|
enclosing_func: Some("lookup".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Entry)
|
|
});
|
|
let use_node = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["userId".into()],
|
|
..Default::default()
|
|
},
|
|
ast: crate::cfg::AstMeta {
|
|
enclosing_func: Some("lookup".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(NodeInfo {
|
|
ast: crate::cfg::AstMeta {
|
|
enclosing_func: Some("lookup".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Exit)
|
|
});
|
|
cfg.add_edge(entry, use_node, EdgeKind::Seq);
|
|
cfg.add_edge(use_node, exit, EdgeKind::Seq);
|
|
|
|
let formals = vec!["userId".to_string()];
|
|
let body = lower_to_ssa_with_params(&cfg, entry, Some("lookup"), false, &formals)
|
|
.expect("SSA lowering should succeed");
|
|
let user_id_param = body
|
|
.blocks
|
|
.first()
|
|
.and_then(|b| {
|
|
b.body.iter().find(|inst| {
|
|
matches!(inst.op, SsaOp::Param { .. })
|
|
&& inst.var_name.as_deref() == Some("userId")
|
|
})
|
|
})
|
|
.expect("userId Param should be present");
|
|
assert!(
|
|
!body.synthetic_externals.contains(&user_id_param.value),
|
|
"real formal `userId` must not be marked synthetic; \
|
|
synthetic_externals={:?}",
|
|
body.synthetic_externals,
|
|
);
|
|
}
|
|
|
|
/// W1: a plain non-dotted assignment (`x = 1`) records nothing
|
|
/// in `field_writes`. Strict-additive: existing behaviour is
|
|
/// unchanged for non-field-write shapes.
|
|
#[test]
|
|
fn w1_end_to_end_plain_assign_records_no_field_write() {
|
|
let src = b"function f() { let x = 1; x = 2; }";
|
|
let body = parse_to_first_body(
|
|
src,
|
|
"javascript",
|
|
tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE),
|
|
"test.js",
|
|
);
|
|
assert!(
|
|
body.field_writes.is_empty(),
|
|
"plain assign must not populate field_writes; got {:?}",
|
|
body.field_writes,
|
|
);
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────
|
|
// SSA edge cases: loop induction, multi-variable phis, multiple
|
|
// returns, switch-cases, and shadowing. These plug holes in the
|
|
// dominator-frontier / variable-renaming coverage.
|
|
// ─────────────────────────────────────────────────────────────────
|
|
|
|
/// Loop induction variable: `x = x + 1` inside a loop is the
|
|
/// canonical SSA challenge, the body uses `x` then redefines it,
|
|
/// and the join with the entry definition must produce a phi that
|
|
/// distinguishes the entry value from the body's redefinition.
|
|
/// Induction-var pruning depends on this shape being lowered
|
|
/// correctly.
|
|
#[test]
|
|
fn loop_self_assignment_induction_phi_is_distinct() {
|
|
// Entry → x=0 → Loop header → [Body: use x; x = x_new] → Loop
|
|
// The body both uses and defines x, modeling `x = x + 1`.
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let init_x = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let header = cfg.add_node(make_node(StmtKind::Loop));
|
|
let body = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, init_x, EdgeKind::Seq);
|
|
cfg.add_edge(init_x, header, EdgeKind::Seq);
|
|
cfg.add_edge(header, body, EdgeKind::True);
|
|
cfg.add_edge(body, header, EdgeKind::Back);
|
|
cfg.add_edge(header, exit, EdgeKind::False);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// We expect THREE distinct SSA values for `x`:
|
|
// - init_x (entry value)
|
|
// - body's redefinition
|
|
// - the loop-header phi
|
|
let x_defs: Vec<_> = ssa
|
|
.value_defs
|
|
.iter()
|
|
.filter(|vd| vd.var_name.as_deref() == Some("x"))
|
|
.collect();
|
|
assert!(
|
|
x_defs.len() >= 3,
|
|
"expected ≥3 SSA values for x (init, phi, body-redef), got {}",
|
|
x_defs.len()
|
|
);
|
|
|
|
// The header's phi for x must have exactly two operands (entry
|
|
// value + back-edge value) and they must NOT both be the same
|
|
// SsaValue (otherwise the renaming collapsed the two arms).
|
|
let phi_ops = ssa
|
|
.blocks
|
|
.iter()
|
|
.flat_map(|b| b.phis.iter())
|
|
.find(|p| p.var_name.as_deref() == Some("x"))
|
|
.and_then(|p| match &p.op {
|
|
SsaOp::Phi(ops) => Some(ops.clone()),
|
|
_ => None,
|
|
})
|
|
.expect("expected a Phi op for x at the loop header");
|
|
assert_eq!(
|
|
phi_ops.len(),
|
|
2,
|
|
"loop header phi for x should have 2 operands, got {}",
|
|
phi_ops.len()
|
|
);
|
|
let unique: HashSet<_> = phi_ops.iter().map(|(_, v)| v).collect();
|
|
assert_eq!(
|
|
unique.len(),
|
|
2,
|
|
"phi operands must be distinct (entry vs back-edge), got {:?}",
|
|
phi_ops
|
|
);
|
|
}
|
|
|
|
/// Diamond join with two distinct variables defined in both arms:
|
|
/// the merge block must contain a phi for EACH of the variables,
|
|
/// not just one. Guards against single-variable phi insertion.
|
|
#[test]
|
|
fn diamond_join_produces_phi_per_variable() {
|
|
// Entry → cond → [True: x=1; y=10] → join
|
|
// ↘ [False: x=2; y=20] ↗
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let cond = cfg.add_node(make_node(StmtKind::If));
|
|
let true_def = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let true_def2 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("y".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let false_def = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let false_def2 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("y".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let join = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["x".into(), "y".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, cond, EdgeKind::Seq);
|
|
cfg.add_edge(cond, true_def, EdgeKind::True);
|
|
cfg.add_edge(true_def, true_def2, EdgeKind::Seq);
|
|
cfg.add_edge(true_def2, join, EdgeKind::Seq);
|
|
cfg.add_edge(cond, false_def, EdgeKind::False);
|
|
cfg.add_edge(false_def, false_def2, EdgeKind::Seq);
|
|
cfg.add_edge(false_def2, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
let phi_vars: HashSet<&str> = ssa
|
|
.blocks
|
|
.iter()
|
|
.flat_map(|b| b.phis.iter())
|
|
.filter_map(|p| p.var_name.as_deref())
|
|
.collect();
|
|
assert!(
|
|
phi_vars.contains("x"),
|
|
"expected phi for x at diamond join, got {:?}",
|
|
phi_vars
|
|
);
|
|
assert!(
|
|
phi_vars.contains("y"),
|
|
"expected phi for y at diamond join, got {:?}",
|
|
phi_vars
|
|
);
|
|
}
|
|
|
|
/// Two reachable Return nodes from different branches must each
|
|
/// produce a `Terminator::Return`. Common before: only the last
|
|
/// CFG-Return survived as a real return, others were Goto'd to
|
|
/// Exit. Regression for the early-return check.
|
|
#[test]
|
|
fn two_branches_with_returns_each_terminates_with_return() {
|
|
// Entry → cond → [True: r1=1; return r1] / [False: r2=2; return r2]
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let cond = cfg.add_node(make_node(StmtKind::If));
|
|
let r1 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("r1".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let ret1 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["r1".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Return)
|
|
});
|
|
let r2 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("r2".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let ret2 = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["r2".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Return)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
|
|
cfg.add_edge(entry, cond, EdgeKind::Seq);
|
|
cfg.add_edge(cond, r1, EdgeKind::True);
|
|
cfg.add_edge(r1, ret1, EdgeKind::Seq);
|
|
cfg.add_edge(ret1, exit, EdgeKind::Seq);
|
|
cfg.add_edge(cond, r2, EdgeKind::False);
|
|
cfg.add_edge(r2, ret2, EdgeKind::Seq);
|
|
cfg.add_edge(ret2, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Count blocks ending with `Terminator::Return(_)`.
|
|
let return_blocks = ssa
|
|
.blocks
|
|
.iter()
|
|
.filter(|b| matches!(&b.terminator, Terminator::Return(_)))
|
|
.count();
|
|
assert_eq!(
|
|
return_blocks, 2,
|
|
"expected 2 Return-terminated blocks, got {}",
|
|
return_blocks
|
|
);
|
|
}
|
|
|
|
/// Variable defined ONLY in one branch of a conditional must be
|
|
/// undef on the other path. The phi at the join should include an
|
|
/// undef sentinel for the missing arm, guards against the
|
|
/// renamer silently dropping the missing operand.
|
|
#[test]
|
|
fn conditional_define_only_one_arm_phi_has_undef_operand() {
|
|
// Entry → cond → [True: x=1] → join (uses x)
|
|
// ↘ [False: nop] ↗
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let cond = cfg.add_node(make_node(StmtKind::If));
|
|
let true_def = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
defines: Some("x".into()),
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let false_nop = cfg.add_node(make_node(StmtKind::Seq));
|
|
let join = cfg.add_node(NodeInfo {
|
|
taint: TaintMeta {
|
|
uses: vec!["x".into()],
|
|
..Default::default()
|
|
},
|
|
..make_node(StmtKind::Seq)
|
|
});
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
cfg.add_edge(entry, cond, EdgeKind::Seq);
|
|
cfg.add_edge(cond, true_def, EdgeKind::True);
|
|
cfg.add_edge(true_def, join, EdgeKind::Seq);
|
|
cfg.add_edge(cond, false_nop, EdgeKind::False);
|
|
cfg.add_edge(false_nop, join, EdgeKind::Seq);
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
|
|
// Find a phi for x and verify it has 2 operands. The "undef"
|
|
// operand can manifest as a Nop-defined SsaValue or a sentinel
|
|
//, both are acceptable; the invariant is that arity == preds.
|
|
let x_phi_ops = ssa
|
|
.blocks
|
|
.iter()
|
|
.flat_map(|b| b.phis.iter())
|
|
.find(|p| p.var_name.as_deref() == Some("x"))
|
|
.and_then(|p| match &p.op {
|
|
SsaOp::Phi(ops) => Some(ops.clone()),
|
|
_ => None,
|
|
});
|
|
if let Some(ops) = x_phi_ops {
|
|
assert_eq!(
|
|
ops.len(),
|
|
2,
|
|
"phi for x at the join must have 2 operands (one per pred), got {}",
|
|
ops.len()
|
|
);
|
|
}
|
|
// Acceptable alternative: SSA may skip phi insertion when one
|
|
// arm is undef. The invariant we care about is that lowering
|
|
// doesn't panic, which `lower_to_ssa(...).unwrap()` already
|
|
// exercises.
|
|
}
|
|
|
|
/// `lower_to_ssa` on a CFG with NO definitions of any variable
|
|
/// must still succeed and produce a body with at least entry/exit
|
|
/// blocks. Regression for trivial-function lowering.
|
|
#[test]
|
|
fn empty_function_body_only_entry_and_exit_lowers_cleanly() {
|
|
let mut cfg: Cfg = Graph::new();
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
|
cfg.add_edge(entry, exit, EdgeKind::Seq);
|
|
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
|
assert!(
|
|
!ssa.blocks.is_empty(),
|
|
"even an empty body should produce at least one block"
|
|
);
|
|
// No phis (nothing converged), no value_defs except possibly
|
|
// entry sentinels. We just assert it lowered without panic.
|
|
}
|
|
}
|