mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
2865 lines
111 KiB
Rust
2865 lines
111 KiB
Rust
|
|
#![allow(
|
||
|
|
clippy::collapsible_if,
|
||
|
|
clippy::if_same_then_else,
|
||
|
|
clippy::needless_range_loop,
|
||
|
|
clippy::only_used_in_recursion,
|
||
|
|
clippy::too_many_arguments,
|
||
|
|
clippy::type_complexity,
|
||
|
|
clippy::unnecessary_unwrap
|
||
|
|
)]
|
||
|
|
|
||
|
|
use crate::cfg::{Cfg, EdgeKind, StmtKind};
|
||
|
|
use petgraph::algo::dominators::{Dominators, simple_fast};
|
||
|
|
use petgraph::graph::NodeIndex;
|
||
|
|
use petgraph::prelude::*;
|
||
|
|
use petgraph::visit::EdgeRef;
|
||
|
|
use smallvec::SmallVec;
|
||
|
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
|
||
|
|
|
||
|
|
use super::ir::*;
|
||
|
|
|
||
|
|
/// Lower a CFG to SSA form for a single function scope.
|
||
|
|
///
|
||
|
|
/// `scope` filters nodes by `enclosing_func`:
|
||
|
|
/// - `None` → top-level code only (`enclosing_func.is_none()`)
|
||
|
|
/// - `Some(name)` → only nodes with `enclosing_func == Some(name)`
|
||
|
|
///
|
||
|
|
/// If `scope_all` is true, all nodes reachable from `entry` are included
|
||
|
|
/// regardless of `enclosing_func`.
|
||
|
|
pub fn lower_to_ssa(
|
||
|
|
cfg: &Cfg,
|
||
|
|
entry: NodeIndex,
|
||
|
|
scope: Option<&str>,
|
||
|
|
scope_all: bool,
|
||
|
|
) -> Result<SsaBody, SsaError> {
|
||
|
|
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, &[])
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Like `lower_to_ssa` but with formal parameter names supplied in declaration
|
||
|
|
/// order. External variables that match these names are placed first (in
|
||
|
|
/// declaration order) so that `Param { index }` indices 0..N correspond to
|
||
|
|
/// call-site argument positions.
|
||
|
|
pub fn lower_to_ssa_with_params(
|
||
|
|
cfg: &Cfg,
|
||
|
|
entry: NodeIndex,
|
||
|
|
scope: Option<&str>,
|
||
|
|
scope_all: bool,
|
||
|
|
formal_params: &[String],
|
||
|
|
) -> Result<SsaBody, SsaError> {
|
||
|
|
lower_to_ssa_inner(cfg, entry, scope, scope_all, false, formal_params)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Like `lower_to_ssa` but with `scope_nop`: when true, all nodes are included
|
||
|
|
/// in the SSA body for graph connectivity, but out-of-scope nodes become Nop
|
||
|
|
/// (their defines/uses are ignored). This is used for the JS two-level solve
|
||
|
|
/// where the CFG linearizes function bodies inline.
|
||
|
|
pub fn lower_to_ssa_scoped_nop(
|
||
|
|
cfg: &Cfg,
|
||
|
|
entry: NodeIndex,
|
||
|
|
scope: Option<&str>,
|
||
|
|
) -> Result<SsaBody, SsaError> {
|
||
|
|
lower_to_ssa_inner(cfg, entry, scope, false, true, &[])
|
||
|
|
}
|
||
|
|
|
||
|
|
fn lower_to_ssa_inner(
|
||
|
|
cfg: &Cfg,
|
||
|
|
entry: NodeIndex,
|
||
|
|
scope: Option<&str>,
|
||
|
|
scope_all: bool,
|
||
|
|
scope_nop: bool,
|
||
|
|
formal_params: &[String],
|
||
|
|
) -> Result<SsaBody, SsaError> {
|
||
|
|
if cfg.node_count() == 0 {
|
||
|
|
return Err(SsaError::EmptyCfg);
|
||
|
|
}
|
||
|
|
|
||
|
|
// When scope_nop is set, traverse all nodes (scope_all=true) for graph connectivity
|
||
|
|
let traverse_all = scope_all || scope_nop;
|
||
|
|
|
||
|
|
// Collect reachable nodes in scope, stripping exception edges.
|
||
|
|
let (reachable, filtered_edges, raw_exception_edges) =
|
||
|
|
collect_reachable(cfg, entry, scope, traverse_all);
|
||
|
|
|
||
|
|
// Build the set of nodes that should be treated as Nop (out-of-scope but included)
|
||
|
|
let nop_nodes: HashSet<NodeIndex> = if scope_nop {
|
||
|
|
let in_scope = |node: NodeIndex| -> bool {
|
||
|
|
let info = &cfg[node];
|
||
|
|
match scope {
|
||
|
|
None => info.ast.enclosing_func.is_none(),
|
||
|
|
Some(name) => info.ast.enclosing_func.as_deref() == Some(name),
|
||
|
|
}
|
||
|
|
};
|
||
|
|
reachable
|
||
|
|
.iter()
|
||
|
|
.filter(|&&n| !in_scope(n) && !matches!(cfg[n].kind, StmtKind::Entry | StmtKind::Exit))
|
||
|
|
.copied()
|
||
|
|
.collect()
|
||
|
|
} else {
|
||
|
|
HashSet::new()
|
||
|
|
};
|
||
|
|
if reachable.is_empty() {
|
||
|
|
return Err(SsaError::EmptyCfg);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 1. Form basic blocks
|
||
|
|
let (blocks_nodes, block_of_node, block_succs, block_preds) =
|
||
|
|
form_blocks(cfg, entry, &reachable, &filtered_edges);
|
||
|
|
|
||
|
|
let num_blocks = blocks_nodes.len();
|
||
|
|
if num_blocks == 0 {
|
||
|
|
return Err(SsaError::EmptyCfg);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. Compute dominators on block-level graph
|
||
|
|
let (block_graph, block_graph_entry) = build_block_graph(num_blocks, &block_succs, BlockId(0));
|
||
|
|
let doms = simple_fast(&block_graph, block_graph_entry);
|
||
|
|
|
||
|
|
// 3. Compute dominance frontiers
|
||
|
|
let dom_frontiers = compute_dominance_frontiers(num_blocks, &block_preds, &doms, &block_graph);
|
||
|
|
|
||
|
|
// 4. Collect variable definitions per block (skip nop nodes)
|
||
|
|
let mut var_defs = collect_var_defs(cfg, &blocks_nodes, &nop_nodes);
|
||
|
|
|
||
|
|
// 4b. For per-function scope: identify external variables (used but not defined)
|
||
|
|
// and inject synthetic Param defs at entry block so rename can find them.
|
||
|
|
// When formal_params is supplied, reorder so formal params come first in
|
||
|
|
// declaration order — this makes Param indices correspond to call-site positions.
|
||
|
|
//
|
||
|
|
let external_vars = if scope.is_some() && !scope_all && !scope_nop {
|
||
|
|
let raw = identify_external_uses(cfg, &blocks_nodes, &var_defs);
|
||
|
|
reorder_external_vars(raw, formal_params)
|
||
|
|
} else {
|
||
|
|
vec![]
|
||
|
|
};
|
||
|
|
// Register external vars as defined in block 0 so phi insertion considers them
|
||
|
|
for var in &external_vars {
|
||
|
|
var_defs.entry(var.clone()).or_default().insert(0);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 5. Phi insertion (Cytron algorithm)
|
||
|
|
let phi_placements = insert_phis(&var_defs, &dom_frontiers, num_blocks);
|
||
|
|
|
||
|
|
// 6. Rename variables (dominator tree preorder walk)
|
||
|
|
let dom_tree_children = build_dom_tree_children(num_blocks, &doms, &block_graph);
|
||
|
|
let (mut ssa_blocks, mut value_defs, cfg_node_map) = rename_variables(
|
||
|
|
cfg,
|
||
|
|
&blocks_nodes,
|
||
|
|
&block_succs,
|
||
|
|
&block_preds,
|
||
|
|
&phi_placements,
|
||
|
|
&dom_tree_children,
|
||
|
|
&filtered_edges,
|
||
|
|
&external_vars,
|
||
|
|
&nop_nodes,
|
||
|
|
);
|
||
|
|
|
||
|
|
// 6b. Fill any missing phi operands with a shared Undef sentinel so
|
||
|
|
// every phi has exactly one operand per predecessor. See
|
||
|
|
// `fill_undef_phi_operands` for the invariant rationale.
|
||
|
|
fill_undef_phi_operands(
|
||
|
|
&mut ssa_blocks,
|
||
|
|
&block_preds,
|
||
|
|
&mut value_defs,
|
||
|
|
&blocks_nodes,
|
||
|
|
);
|
||
|
|
|
||
|
|
// 7. Fill in preds/succs on SsaBlocks
|
||
|
|
for bid in 0..num_blocks {
|
||
|
|
let id = BlockId(bid as u32);
|
||
|
|
ssa_blocks[bid].id = id;
|
||
|
|
ssa_blocks[bid].preds = block_preds[bid]
|
||
|
|
.iter()
|
||
|
|
.map(|&b| BlockId(b as u32))
|
||
|
|
.collect();
|
||
|
|
ssa_blocks[bid].succs = block_succs[bid]
|
||
|
|
.iter()
|
||
|
|
.map(|&b| BlockId(b as u32))
|
||
|
|
.collect();
|
||
|
|
}
|
||
|
|
|
||
|
|
// 7b. Debug assertions: verify structural invariants.
|
||
|
|
#[cfg(debug_assertions)]
|
||
|
|
{
|
||
|
|
debug_assert_bfs_ordering(&block_preds);
|
||
|
|
}
|
||
|
|
// Phi operand counts are a release-level invariant: every phi must
|
||
|
|
// have exactly one operand per predecessor. Missing operands are
|
||
|
|
// filled with an explicit Undef sentinel in
|
||
|
|
// `fill_undef_phi_operands`; extra operands would reference
|
||
|
|
// nonexistent predecessors and corrupt analysis silently.
|
||
|
|
assert_phi_operand_counts(&ssa_blocks, &block_preds);
|
||
|
|
|
||
|
|
// 8. Map exception edges from CFG node indices to SSA block IDs
|
||
|
|
let exception_edges: Vec<(BlockId, BlockId)> = raw_exception_edges
|
||
|
|
.iter()
|
||
|
|
.filter_map(|(src_node, catch_node)| {
|
||
|
|
let src_block = block_of_node.get(src_node)?;
|
||
|
|
let catch_block = block_of_node.get(catch_node)?;
|
||
|
|
Some((BlockId(*src_block as u32), BlockId(*catch_block as u32)))
|
||
|
|
})
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
let body = SsaBody {
|
||
|
|
blocks: ssa_blocks,
|
||
|
|
entry: BlockId(0),
|
||
|
|
value_defs,
|
||
|
|
cfg_node_map,
|
||
|
|
exception_edges,
|
||
|
|
};
|
||
|
|
|
||
|
|
// 9. Catch-block reachability invariant.
|
||
|
|
//
|
||
|
|
// A CatchParam-carrying block that is neither reachable from entry nor
|
||
|
|
// listed as an exception target indicates a CFG construction bug. Debug
|
||
|
|
// builds panic loudly; release builds warn, record an engine note so
|
||
|
|
// downstream findings carry "SSA lowering bailed" provenance, and fall
|
||
|
|
// through to the existing orphan handling above (the "all definitions"
|
||
|
|
// fallback) which remains sound for taint reachability.
|
||
|
|
check_catch_block_reachability_gated(&body);
|
||
|
|
|
||
|
|
Ok(body)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Runtime gate around [`check_catch_block_reachability`] that panics in
|
||
|
|
/// debug builds and warns + records an engine note in release builds.
|
||
|
|
///
|
||
|
|
/// The current lowering's orphan handling (`process_block` fallback in
|
||
|
|
/// `rename_variables`) already widens to an "all definitions" conservative
|
||
|
|
/// state for blocks without predecessors. That preserves soundness for
|
||
|
|
/// taint reachability but masks CFG-builder bugs: this gate surfaces them.
|
||
|
|
fn check_catch_block_reachability_gated(body: &SsaBody) {
|
||
|
|
let result = super::invariants::check_catch_block_reachability(body);
|
||
|
|
if let Err(err) = result {
|
||
|
|
#[cfg(debug_assertions)]
|
||
|
|
{
|
||
|
|
if !catch_invariant_do_not_panic() {
|
||
|
|
panic!(
|
||
|
|
"SSA catch-block reachability invariant violated:\n{}",
|
||
|
|
err.joined()
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
tracing::warn!(
|
||
|
|
violations = %err.joined(),
|
||
|
|
"SSA catch-block reachability invariant violated; proceeding with \
|
||
|
|
conservative orphan fallback"
|
||
|
|
);
|
||
|
|
crate::taint::ssa_transfer::record_engine_note(
|
||
|
|
crate::engine_notes::EngineNote::SsaLoweringBailed {
|
||
|
|
reason: format!("catch_block_orphan: {}", err.joined()),
|
||
|
|
},
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Test-only escape hatch: when set, `check_catch_block_reachability_gated`
|
||
|
|
// takes the release-build path (warn + engine note, no panic) even under
|
||
|
|
// `debug_assertions`. Used by the invariant test that constructs a
|
||
|
|
// synthetic orphan catch body.
|
||
|
|
#[cfg(debug_assertions)]
|
||
|
|
thread_local! {
|
||
|
|
static CATCH_INVARIANT_DO_NOT_PANIC: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(debug_assertions)]
|
||
|
|
#[allow(dead_code)]
|
||
|
|
pub(crate) fn set_catch_invariant_do_not_panic(on: bool) {
|
||
|
|
CATCH_INVARIANT_DO_NOT_PANIC.with(|c| c.set(on));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(debug_assertions)]
|
||
|
|
fn catch_invariant_do_not_panic() -> bool {
|
||
|
|
CATCH_INVARIANT_DO_NOT_PANIC.with(|c| c.get())
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Collect reachable nodes (BFS from entry), filtering by scope and stripping exception edges.
|
||
|
|
/// Returns (reachable set, filtered edges, exception edges as (src_node, catch_node)).
|
||
|
|
fn collect_reachable(
|
||
|
|
cfg: &Cfg,
|
||
|
|
entry: NodeIndex,
|
||
|
|
scope: Option<&str>,
|
||
|
|
scope_all: bool,
|
||
|
|
) -> (
|
||
|
|
HashSet<NodeIndex>,
|
||
|
|
Vec<(NodeIndex, NodeIndex, EdgeKind)>,
|
||
|
|
Vec<(NodeIndex, NodeIndex)>,
|
||
|
|
) {
|
||
|
|
let mut reachable = HashSet::new();
|
||
|
|
let mut edges = Vec::new();
|
||
|
|
let mut exception_edges = Vec::new();
|
||
|
|
let mut queue = VecDeque::new();
|
||
|
|
|
||
|
|
// Check if a node is in scope
|
||
|
|
let in_scope = |node: NodeIndex| -> bool {
|
||
|
|
if scope_all {
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
let info = &cfg[node];
|
||
|
|
match scope {
|
||
|
|
None => info.ast.enclosing_func.is_none(),
|
||
|
|
Some(name) => info.ast.enclosing_func.as_deref() == Some(name),
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
if !in_scope(entry) && !scope_all {
|
||
|
|
// Entry must be in scope; for top-level, Entry node often has no enclosing_func
|
||
|
|
// Accept Entry/Exit nodes regardless of scope
|
||
|
|
if !matches!(cfg[entry].kind, StmtKind::Entry | StmtKind::Exit) {
|
||
|
|
return (reachable, edges, exception_edges);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
reachable.insert(entry);
|
||
|
|
queue.push_back(entry);
|
||
|
|
|
||
|
|
while let Some(node) = queue.pop_front() {
|
||
|
|
for edge in cfg.edges(node) {
|
||
|
|
let kind = *edge.weight();
|
||
|
|
let target = edge.target();
|
||
|
|
|
||
|
|
// Strip exception edges from the graph, but still visit targets
|
||
|
|
// so catch-block nodes are included in the SSA body.
|
||
|
|
if matches!(kind, EdgeKind::Exception) {
|
||
|
|
if (in_scope(target)
|
||
|
|
|| matches!(cfg[target].kind, StmtKind::Entry | StmtKind::Exit))
|
||
|
|
&& reachable.insert(target)
|
||
|
|
{
|
||
|
|
queue.push_back(target);
|
||
|
|
}
|
||
|
|
// Record exception edge for taint seeding
|
||
|
|
exception_edges.push((node, target));
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Allow Entry/Exit nodes and nodes in scope
|
||
|
|
if !in_scope(target) && !matches!(cfg[target].kind, StmtKind::Entry | StmtKind::Exit) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
edges.push((node, target, kind));
|
||
|
|
|
||
|
|
if reachable.insert(target) {
|
||
|
|
queue.push_back(target);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
(reachable, edges, exception_edges)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Form basic blocks from filtered CFG nodes.
|
||
|
|
///
|
||
|
|
/// Returns:
|
||
|
|
/// - blocks_nodes: Vec<Vec<NodeIndex>> — nodes per block (in order)
|
||
|
|
/// - block_of_node: HashMap<NodeIndex, usize> — node → block index
|
||
|
|
/// - block_succs: Vec<Vec<usize>> — successors per block
|
||
|
|
/// - block_preds: Vec<Vec<usize>> — predecessors per block
|
||
|
|
fn form_blocks(
|
||
|
|
cfg: &Cfg,
|
||
|
|
entry: NodeIndex,
|
||
|
|
reachable: &HashSet<NodeIndex>,
|
||
|
|
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
||
|
|
) -> (
|
||
|
|
Vec<Vec<NodeIndex>>,
|
||
|
|
HashMap<NodeIndex, usize>,
|
||
|
|
Vec<Vec<usize>>,
|
||
|
|
Vec<Vec<usize>>,
|
||
|
|
) {
|
||
|
|
// Build adjacency from filtered edges
|
||
|
|
let mut successors: HashMap<NodeIndex, Vec<(NodeIndex, EdgeKind)>> = HashMap::new();
|
||
|
|
let mut in_degree: HashMap<NodeIndex, usize> = HashMap::new();
|
||
|
|
let mut has_branching_in: HashMap<NodeIndex, bool> = HashMap::new();
|
||
|
|
|
||
|
|
for node in reachable {
|
||
|
|
in_degree.entry(*node).or_insert(0);
|
||
|
|
has_branching_in.entry(*node).or_insert(false);
|
||
|
|
}
|
||
|
|
|
||
|
|
// CFG construction wires every Return / Throw node to the synthetic
|
||
|
|
// function-exit node via a `Seq` edge so the underlying graph is a single
|
||
|
|
// connected component. Those edges are bookkeeping only: control flow
|
||
|
|
// does not actually fall through a Return into the exit block. Treating
|
||
|
|
// them as block successors causes an early-return block to share its
|
||
|
|
// post-exit body with the function's fall-through tail, silently merging
|
||
|
|
// two distinct paths into one (the "merged-return" defect). Strip them
|
||
|
|
// here so block-level adjacency reflects real control flow; the SSA
|
||
|
|
// terminator for the containing block becomes Return / Unreachable
|
||
|
|
// instead of Goto(exit).
|
||
|
|
let is_terminating =
|
||
|
|
|n: NodeIndex| -> bool { matches!(cfg[n].kind, StmtKind::Return | StmtKind::Throw) };
|
||
|
|
|
||
|
|
for &(src, tgt, kind) in filtered_edges {
|
||
|
|
if is_terminating(src) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
successors.entry(src).or_default().push((tgt, kind));
|
||
|
|
*in_degree.entry(tgt).or_insert(0) += 1;
|
||
|
|
if matches!(kind, EdgeKind::True | EdgeKind::False | EdgeKind::Back) {
|
||
|
|
*has_branching_in.entry(tgt).or_insert(false) = true;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Determine block leaders
|
||
|
|
let mut is_leader: HashSet<NodeIndex> = HashSet::new();
|
||
|
|
is_leader.insert(entry); // entry is always a leader
|
||
|
|
|
||
|
|
for &node in reachable {
|
||
|
|
let in_deg = in_degree.get(&node).copied().unwrap_or(0);
|
||
|
|
if in_deg > 1 || has_branching_in.get(&node).copied().unwrap_or(false) {
|
||
|
|
is_leader.insert(node);
|
||
|
|
}
|
||
|
|
// Orphan nodes (reachable via exception edges but no filtered predecessors)
|
||
|
|
// must be leaders so they get their own block (e.g. catch block entries).
|
||
|
|
if in_deg == 0 && node != entry {
|
||
|
|
is_leader.insert(node);
|
||
|
|
}
|
||
|
|
// Node following a multi-exit node
|
||
|
|
let succs = successors.get(&node).map(|s| s.len()).unwrap_or(0);
|
||
|
|
if succs > 1 {
|
||
|
|
for &(tgt, _) in successors.get(&node).unwrap_or(&vec![]) {
|
||
|
|
is_leader.insert(tgt);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Build blocks by following single-successor Seq edges from each leader
|
||
|
|
let mut blocks_nodes: Vec<Vec<NodeIndex>> = Vec::new();
|
||
|
|
let mut block_of_node: HashMap<NodeIndex, usize> = HashMap::new();
|
||
|
|
let mut visited: HashSet<NodeIndex> = HashSet::new();
|
||
|
|
|
||
|
|
// BFS order to assign blocks deterministically (entry first)
|
||
|
|
let mut leader_queue: VecDeque<NodeIndex> = VecDeque::new();
|
||
|
|
leader_queue.push_back(entry);
|
||
|
|
let mut leader_visited: HashSet<NodeIndex> = HashSet::new();
|
||
|
|
leader_visited.insert(entry);
|
||
|
|
|
||
|
|
// Discover leaders in BFS order over `cfg`, but skip edges whose
|
||
|
|
// source is a terminating (Return / Throw) node. Walking the raw
|
||
|
|
// `cfg` directly here would re-introduce the bookkeeping
|
||
|
|
// Return/Throw → fn_exit edges we just stripped — fn_exit (or any
|
||
|
|
// post-return join) would be discovered through them and assigned a
|
||
|
|
// block ID before its true block-level predecessors, breaking the
|
||
|
|
// BFS-forward-pred invariant (`debug_assert_bfs_ordering`).
|
||
|
|
//
|
||
|
|
// We can't simply BFS our `successors` map because that excludes
|
||
|
|
// exception edges entirely (collect_reachable strips them and records
|
||
|
|
// them separately in `exception_edges`). Catch-block nodes are still
|
||
|
|
// in `reachable` and must be discoverable as leaders via the
|
||
|
|
// try-body → catch path — only the terminating-source bookkeeping
|
||
|
|
// edges are bogus.
|
||
|
|
{
|
||
|
|
let mut bfs_queue: VecDeque<NodeIndex> = VecDeque::new();
|
||
|
|
let mut bfs_seen: HashSet<NodeIndex> = HashSet::new();
|
||
|
|
bfs_queue.push_back(entry);
|
||
|
|
bfs_seen.insert(entry);
|
||
|
|
while let Some(node) = bfs_queue.pop_front() {
|
||
|
|
if reachable.contains(&node) && is_leader.contains(&node) && leader_visited.insert(node)
|
||
|
|
{
|
||
|
|
leader_queue.push_back(node);
|
||
|
|
}
|
||
|
|
if is_terminating(node) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
for edge in cfg.edges(node) {
|
||
|
|
let tgt = edge.target();
|
||
|
|
if reachable.contains(&tgt) && bfs_seen.insert(tgt) {
|
||
|
|
bfs_queue.push_back(tgt);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Belt-and-braces: any leader still unvisited gets appended in
|
||
|
|
// CFG-node-index order so block-ID assignment remains
|
||
|
|
// deterministic. We do NOT include the synthetic function-exit
|
||
|
|
// node when it is unreachable through filtered edges — that
|
||
|
|
// happens whenever every path in the body terminates explicitly
|
||
|
|
// (e.g. a function whose only return is `return buf.toString()`
|
||
|
|
// at the tail). Including it would emit an orphan SSA block
|
||
|
|
// with no real predecessors and no semantic meaning, which the
|
||
|
|
// structural reachability invariant correctly rejects.
|
||
|
|
// Genuine orphan handlers (catch blocks reached via stripped
|
||
|
|
// exception edges) keep their entries here.
|
||
|
|
let mut orphan_leaders: Vec<NodeIndex> = is_leader
|
||
|
|
.iter()
|
||
|
|
.copied()
|
||
|
|
.filter(|n| !leader_visited.contains(n))
|
||
|
|
.filter(|n| !matches!(cfg[*n].kind, StmtKind::Exit))
|
||
|
|
.collect();
|
||
|
|
orphan_leaders.sort_by_key(|n| n.index());
|
||
|
|
for n in orphan_leaders {
|
||
|
|
if leader_visited.insert(n) {
|
||
|
|
leader_queue.push_back(n);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for leader in leader_queue {
|
||
|
|
if visited.contains(&leader) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
let block_idx = blocks_nodes.len();
|
||
|
|
let mut block = vec![leader];
|
||
|
|
visited.insert(leader);
|
||
|
|
block_of_node.insert(leader, block_idx);
|
||
|
|
|
||
|
|
// Follow single-successor Seq edges
|
||
|
|
let mut current = leader;
|
||
|
|
loop {
|
||
|
|
let succs = successors.get(¤t).cloned().unwrap_or_default();
|
||
|
|
if succs.len() == 1
|
||
|
|
&& matches!(succs[0].1, EdgeKind::Seq)
|
||
|
|
&& !is_leader.contains(&succs[0].0)
|
||
|
|
{
|
||
|
|
let next = succs[0].0;
|
||
|
|
if visited.insert(next) {
|
||
|
|
block.push(next);
|
||
|
|
block_of_node.insert(next, block_idx);
|
||
|
|
current = next;
|
||
|
|
} else {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
blocks_nodes.push(block);
|
||
|
|
}
|
||
|
|
|
||
|
|
// Build block-level successor/predecessor lists
|
||
|
|
let num_blocks = blocks_nodes.len();
|
||
|
|
let mut block_succs: Vec<Vec<usize>> = vec![vec![]; num_blocks];
|
||
|
|
let mut block_preds: Vec<Vec<usize>> = vec![vec![]; num_blocks];
|
||
|
|
|
||
|
|
for &(src, tgt, _kind) in filtered_edges {
|
||
|
|
// Mirror the adjacency-construction filter above: edges out of
|
||
|
|
// Return/Throw CFG nodes are not real successors at the block level.
|
||
|
|
if is_terminating(src) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if let (Some(&src_blk), Some(&tgt_blk)) = (block_of_node.get(&src), block_of_node.get(&tgt))
|
||
|
|
{
|
||
|
|
if src_blk != tgt_blk && !block_succs[src_blk].contains(&tgt_blk) {
|
||
|
|
block_succs[src_blk].push(tgt_blk);
|
||
|
|
block_preds[tgt_blk].push(src_blk);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
(blocks_nodes, block_of_node, block_succs, block_preds)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Build a block-level petgraph for dominator computation.
|
||
|
|
fn build_block_graph(
|
||
|
|
num_blocks: usize,
|
||
|
|
block_succs: &[Vec<usize>],
|
||
|
|
_entry: BlockId,
|
||
|
|
) -> (Graph<BlockId, ()>, NodeIndex) {
|
||
|
|
let mut g: Graph<BlockId, ()> = Graph::new();
|
||
|
|
let mut block_nodes: Vec<NodeIndex> = Vec::with_capacity(num_blocks);
|
||
|
|
|
||
|
|
for i in 0..num_blocks {
|
||
|
|
block_nodes.push(g.add_node(BlockId(i as u32)));
|
||
|
|
}
|
||
|
|
|
||
|
|
for (i, succs) in block_succs.iter().enumerate() {
|
||
|
|
for &s in succs {
|
||
|
|
g.add_edge(block_nodes[i], block_nodes[s], ());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
let entry_gnode = block_nodes[0]; // block 0 is always entry
|
||
|
|
(g, entry_gnode)
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Compute dominance frontiers for all blocks.
|
||
|
|
fn compute_dominance_frontiers(
|
||
|
|
num_blocks: usize,
|
||
|
|
block_preds: &[Vec<usize>],
|
||
|
|
doms: &Dominators<NodeIndex>,
|
||
|
|
block_graph: &Graph<BlockId, ()>,
|
||
|
|
) -> Vec<HashSet<usize>> {
|
||
|
|
let mut df: Vec<HashSet<usize>> = vec![HashSet::new(); num_blocks];
|
||
|
|
|
||
|
|
// Map block index → graph NodeIndex
|
||
|
|
let block_node: Vec<NodeIndex> = block_graph.node_indices().collect();
|
||
|
|
|
||
|
|
for n in 0..num_blocks {
|
||
|
|
let preds = &block_preds[n];
|
||
|
|
if preds.len() >= 2 {
|
||
|
|
for &p in preds {
|
||
|
|
let mut runner = p;
|
||
|
|
// idom(n) in the block graph
|
||
|
|
let n_gnode = block_node[n];
|
||
|
|
let idom_n = doms.immediate_dominator(n_gnode);
|
||
|
|
loop {
|
||
|
|
let runner_gnode = block_node[runner];
|
||
|
|
if idom_n == Some(runner_gnode) {
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
df[runner].insert(n);
|
||
|
|
// Move runner to its immediate dominator
|
||
|
|
match doms.immediate_dominator(runner_gnode) {
|
||
|
|
Some(idom_runner) if idom_runner != runner_gnode => {
|
||
|
|
// Find block index from graph node
|
||
|
|
runner = block_graph[idom_runner].0 as usize;
|
||
|
|
}
|
||
|
|
_ => break, // reached root
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
df
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Identify variables used but not defined within the scoped blocks.
|
||
|
|
/// These represent external (e.g. global/top-level) variables that need
|
||
|
|
/// synthetic Param instructions so the SSA rename pass can reference them.
|
||
|
|
fn identify_external_uses(
|
||
|
|
cfg: &Cfg,
|
||
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
||
|
|
var_defs: &BTreeMap<String, HashSet<usize>>,
|
||
|
|
) -> Vec<String> {
|
||
|
|
let mut used: HashSet<String> = HashSet::new();
|
||
|
|
for nodes in blocks_nodes {
|
||
|
|
for &node in nodes {
|
||
|
|
for u in &cfg[node].taint.uses {
|
||
|
|
used.insert(u.clone());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// External = used but never defined in any block
|
||
|
|
let mut external: Vec<String> = used
|
||
|
|
.into_iter()
|
||
|
|
.filter(|u| !var_defs.contains_key(u))
|
||
|
|
.collect();
|
||
|
|
external.sort(); // deterministic order
|
||
|
|
external
|
||
|
|
}
|
||
|
|
|
||
|
|
/// True iff `name` is a language-reserved method receiver identifier
|
||
|
|
/// (Rust/Python `self`, JS/TS/Java/PHP/C++ `this`).
|
||
|
|
///
|
||
|
|
/// Receivers get their own IR node ([`SsaOp::SelfParam`]) and are therefore
|
||
|
|
/// tracked as a distinct channel from positional parameters. Keeping the
|
||
|
|
/// check localised to one helper ensures the set of receiver names stays
|
||
|
|
/// consistent across lowering and summary extraction.
|
||
|
|
pub(crate) fn is_receiver_name(name: &str) -> bool {
|
||
|
|
matches!(name, "self" | "this")
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Reorder external variables so the receiver (`self`/`this`) comes first,
|
||
|
|
/// followed by formal positional parameters in declaration order, followed
|
||
|
|
/// by remaining external vars in alphabetical order.
|
||
|
|
///
|
||
|
|
/// This fixed order is what the synthetic-parameter injection step relies
|
||
|
|
/// on to emit one [`SsaOp::SelfParam`] (for the leading receiver slot, when
|
||
|
|
/// present) followed by a contiguous run of [`SsaOp::Param { index }`] values
|
||
|
|
/// whose indices 0..N correspond exactly to positional call-site argument
|
||
|
|
/// positions — no receiver offset required anywhere downstream.
|
||
|
|
fn reorder_external_vars(external: Vec<String>, formal_params: &[String]) -> Vec<String> {
|
||
|
|
if formal_params.is_empty() {
|
||
|
|
return external; // no reordering — preserve existing alphabetical sort
|
||
|
|
}
|
||
|
|
let ext_set: HashSet<&str> = external.iter().map(|s| s.as_str()).collect();
|
||
|
|
let formal_set: HashSet<&str> = formal_params.iter().map(|s| s.as_str()).collect();
|
||
|
|
let mut result = Vec::with_capacity(external.len());
|
||
|
|
// Receiver first (highest priority), regardless of whether it appears in
|
||
|
|
// formal_params or was discovered purely as an external reference.
|
||
|
|
// Languages with explicit self (Rust/Python) put it in formal_params;
|
||
|
|
// languages with implicit this (JS/TS/Java/PHP) have it only as an
|
||
|
|
// external reference. Either way, SelfParam should be emitted first.
|
||
|
|
if ext_set.contains("self") {
|
||
|
|
result.push("self".to_string());
|
||
|
|
} else if ext_set.contains("this") {
|
||
|
|
result.push("this".to_string());
|
||
|
|
}
|
||
|
|
// Formal positional params next (declaration order), skipping any
|
||
|
|
// receiver that was already emitted above.
|
||
|
|
for p in formal_params {
|
||
|
|
if is_receiver_name(p) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if ext_set.contains(p.as_str()) {
|
||
|
|
result.push(p.clone());
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// Remaining external vars alphabetically (external is already sorted),
|
||
|
|
// excluding anything already placed.
|
||
|
|
let placed: HashSet<String> = result.iter().cloned().collect();
|
||
|
|
for v in external {
|
||
|
|
if placed.contains(&v) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if !formal_set.contains(v.as_str()) && !is_receiver_name(&v) {
|
||
|
|
result.push(v);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
result
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Collect variable definitions per block: var_name → set of block indices.
|
||
|
|
/// Nodes in `nop_nodes` are skipped (they won't define variables in SSA).
|
||
|
|
fn collect_var_defs(
|
||
|
|
cfg: &Cfg,
|
||
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
||
|
|
nop_nodes: &HashSet<NodeIndex>,
|
||
|
|
) -> BTreeMap<String, HashSet<usize>> {
|
||
|
|
let mut defs: BTreeMap<String, HashSet<usize>> = BTreeMap::new();
|
||
|
|
|
||
|
|
for (block_idx, nodes) in blocks_nodes.iter().enumerate() {
|
||
|
|
for &node in nodes {
|
||
|
|
if nop_nodes.contains(&node) {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if let Some(ref d) = cfg[node].taint.defines {
|
||
|
|
defs.entry(d.clone()).or_default().insert(block_idx);
|
||
|
|
// Register parent prefixes for synthetic base updates on field writes.
|
||
|
|
// E.g. `obj.data` also registers `obj` so phi insertion works correctly.
|
||
|
|
let mut path = d.as_str();
|
||
|
|
while let Some(dot_pos) = path.rfind('.') {
|
||
|
|
path = &path[..dot_pos];
|
||
|
|
defs.entry(path.to_string()).or_default().insert(block_idx);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// Register extra defines from destructuring patterns.
|
||
|
|
for ed in &cfg[node].taint.extra_defines {
|
||
|
|
defs.entry(ed.clone()).or_default().insert(block_idx);
|
||
|
|
}
|
||
|
|
// Implicit definitions for uninitialized declarations (e.g., C/C++
|
||
|
|
// `char buf[256]`). The variable appears in uses but not defines
|
||
|
|
// because def_use() doesn't treat declarations without initializers
|
||
|
|
// as definitions. Registering here ensures phi insertion at join points.
|
||
|
|
if cfg[node].taint.defines.is_none()
|
||
|
|
&& cfg[node].call.callee.is_none()
|
||
|
|
&& cfg[node].kind == StmtKind::Seq
|
||
|
|
&& cfg[node].taint.uses.len() == 1
|
||
|
|
{
|
||
|
|
defs.entry(cfg[node].taint.uses[0].clone())
|
||
|
|
.or_default()
|
||
|
|
.insert(block_idx);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
defs
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Cytron-style phi insertion: returns phi_placements[block] = set of var names needing phis.
|
||
|
|
///
|
||
|
|
/// Returns a `BTreeSet<String>` per block so downstream consumers that iterate
|
||
|
|
/// the set (notably `rename_variables`) observe a deterministic, alphabetical
|
||
|
|
/// order regardless of the underlying hasher state. The Cytron algorithm
|
||
|
|
/// itself is order-independent — only its observers are.
|
||
|
|
fn insert_phis(
|
||
|
|
var_defs: &BTreeMap<String, HashSet<usize>>,
|
||
|
|
dom_frontiers: &[HashSet<usize>],
|
||
|
|
_num_blocks: usize,
|
||
|
|
) -> Vec<BTreeSet<String>> {
|
||
|
|
let num_blocks = dom_frontiers.len();
|
||
|
|
let mut phi_placements: Vec<BTreeSet<String>> = vec![BTreeSet::new(); num_blocks];
|
||
|
|
|
||
|
|
for (var, def_blocks) in var_defs {
|
||
|
|
let mut worklist: VecDeque<usize> = def_blocks.iter().copied().collect();
|
||
|
|
let mut has_phi: HashSet<usize> = HashSet::new();
|
||
|
|
|
||
|
|
while let Some(b) = worklist.pop_front() {
|
||
|
|
for &f in &dom_frontiers[b] {
|
||
|
|
if has_phi.insert(f) {
|
||
|
|
phi_placements[f].insert(var.clone());
|
||
|
|
// Phi is a new definition — add to worklist
|
||
|
|
if !def_blocks.contains(&f) {
|
||
|
|
worklist.push_back(f);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
phi_placements
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Build dominator tree children lists.
|
||
|
|
fn build_dom_tree_children(
|
||
|
|
num_blocks: usize,
|
||
|
|
doms: &Dominators<NodeIndex>,
|
||
|
|
block_graph: &Graph<BlockId, ()>,
|
||
|
|
) -> Vec<Vec<usize>> {
|
||
|
|
let mut children: Vec<Vec<usize>> = vec![vec![]; num_blocks];
|
||
|
|
let block_nodes: Vec<NodeIndex> = block_graph.node_indices().collect();
|
||
|
|
|
||
|
|
for i in 0..num_blocks {
|
||
|
|
if let Some(idom) = doms.immediate_dominator(block_nodes[i]) {
|
||
|
|
let idom_idx = block_graph[idom].0 as usize;
|
||
|
|
if idom_idx != i {
|
||
|
|
children[idom_idx].push(i);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
children
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Rename variables: dominator tree preorder walk with per-variable stacks.
|
||
|
|
///
|
||
|
|
/// Returns (ssa_blocks, value_defs, cfg_node_map).
|
||
|
|
fn rename_variables(
|
||
|
|
cfg: &Cfg,
|
||
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
||
|
|
block_succs: &[Vec<usize>],
|
||
|
|
block_preds: &[Vec<usize>],
|
||
|
|
phi_placements: &[BTreeSet<String>],
|
||
|
|
dom_tree_children: &[Vec<usize>],
|
||
|
|
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
||
|
|
external_vars: &[String],
|
||
|
|
nop_nodes: &HashSet<NodeIndex>,
|
||
|
|
) -> (Vec<SsaBlock>, Vec<ValueDef>, HashMap<NodeIndex, SsaValue>) {
|
||
|
|
let num_blocks = blocks_nodes.len();
|
||
|
|
let mut next_value: u32 = 0;
|
||
|
|
let mut value_defs: Vec<ValueDef> = Vec::new();
|
||
|
|
let mut cfg_node_map: HashMap<NodeIndex, SsaValue> = HashMap::new();
|
||
|
|
|
||
|
|
// Per-variable rename stacks
|
||
|
|
let mut var_stacks: HashMap<String, Vec<SsaValue>> = HashMap::new();
|
||
|
|
|
||
|
|
// Pre-allocate SSA blocks
|
||
|
|
let mut ssa_blocks: Vec<SsaBlock> = (0..num_blocks)
|
||
|
|
.map(|i| SsaBlock {
|
||
|
|
id: BlockId(i as u32),
|
||
|
|
phis: Vec::new(),
|
||
|
|
body: Vec::new(),
|
||
|
|
terminator: Terminator::Unreachable,
|
||
|
|
preds: SmallVec::new(),
|
||
|
|
succs: SmallVec::new(),
|
||
|
|
})
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
// `BTreeMap` guarantees a deterministic (alphabetical) iteration order when
|
||
|
|
// pushing phi values onto `var_stacks` and when filling operands on
|
||
|
|
// successor phis — both sites are observable in SSA numbering if they
|
||
|
|
// reordered between runs.
|
||
|
|
let mut phi_values: Vec<BTreeMap<String, SsaValue>> = vec![BTreeMap::new(); num_blocks];
|
||
|
|
|
||
|
|
// Pre-create phi instructions for all blocks (operands filled during rename)
|
||
|
|
for (block_idx, vars) in phi_placements.iter().enumerate() {
|
||
|
|
let block_id = BlockId(block_idx as u32);
|
||
|
|
let cfg_node = blocks_nodes[block_idx][0]; // anchor to first node
|
||
|
|
for var in vars {
|
||
|
|
let v = SsaValue(next_value);
|
||
|
|
next_value += 1;
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: Some(var.clone()),
|
||
|
|
cfg_node,
|
||
|
|
block: block_id,
|
||
|
|
});
|
||
|
|
phi_values[block_idx].insert(var.clone(), v);
|
||
|
|
ssa_blocks[block_idx].phis.push(SsaInst {
|
||
|
|
value: v,
|
||
|
|
op: SsaOp::Phi(SmallVec::new()),
|
||
|
|
cfg_node,
|
||
|
|
var_name: Some(var.clone()),
|
||
|
|
span: cfg[cfg_node].ast.span,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Process blocks in dominator tree preorder
|
||
|
|
// We need to track stack depths to restore after processing subtrees
|
||
|
|
// Use iterative approach: process block, then process children, restore
|
||
|
|
|
||
|
|
// Simpler approach: preorder walk with explicit save/restore
|
||
|
|
fn process_block(
|
||
|
|
block_idx: usize,
|
||
|
|
cfg: &Cfg,
|
||
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
||
|
|
block_succs: &[Vec<usize>],
|
||
|
|
block_preds: &[Vec<usize>],
|
||
|
|
phi_placements: &[BTreeSet<String>],
|
||
|
|
dom_tree_children: &[Vec<usize>],
|
||
|
|
filtered_edges: &[(NodeIndex, NodeIndex, EdgeKind)],
|
||
|
|
var_stacks: &mut HashMap<String, Vec<SsaValue>>,
|
||
|
|
ssa_blocks: &mut [SsaBlock],
|
||
|
|
phi_values: &mut [BTreeMap<String, SsaValue>],
|
||
|
|
value_defs: &mut Vec<ValueDef>,
|
||
|
|
cfg_node_map: &mut HashMap<NodeIndex, SsaValue>,
|
||
|
|
next_value: &mut u32,
|
||
|
|
nop_nodes: &HashSet<NodeIndex>,
|
||
|
|
) {
|
||
|
|
let block_id = BlockId(block_idx as u32);
|
||
|
|
|
||
|
|
// Save stack depths for rollback
|
||
|
|
let saved: Vec<(String, usize)> = var_stacks
|
||
|
|
.iter()
|
||
|
|
.map(|(k, v)| (k.clone(), v.len()))
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
// 1. Push pre-created phi values onto var stacks
|
||
|
|
for (var, &v) in &phi_values[block_idx] {
|
||
|
|
var_stacks.entry(var.clone()).or_default().push(v);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 2. Process body nodes
|
||
|
|
for &node in &blocks_nodes[block_idx] {
|
||
|
|
let info = &cfg[node];
|
||
|
|
|
||
|
|
// Helper: build Call args from arg_uses, falling back to info.taint.uses
|
||
|
|
let build_call_args = |info: &crate::cfg::NodeInfo,
|
||
|
|
var_stacks: &HashMap<String, Vec<SsaValue>>|
|
||
|
|
-> (Vec<SmallVec<[SsaValue; 2]>>, Option<SsaValue>) {
|
||
|
|
let receiver = info
|
||
|
|
.call
|
||
|
|
.receiver
|
||
|
|
.as_ref()
|
||
|
|
.and_then(|r| var_stacks.get(r).and_then(|s| s.last().copied()));
|
||
|
|
let args = if !info.call.arg_uses.is_empty() {
|
||
|
|
let mut args: Vec<SmallVec<[SsaValue; 2]>> = info
|
||
|
|
.call
|
||
|
|
.arg_uses
|
||
|
|
.iter()
|
||
|
|
.map(|arg_idents| {
|
||
|
|
arg_idents
|
||
|
|
.iter()
|
||
|
|
.filter_map(|ident| {
|
||
|
|
var_stacks.get(ident).and_then(|s| s.last().copied())
|
||
|
|
})
|
||
|
|
.collect()
|
||
|
|
})
|
||
|
|
.collect();
|
||
|
|
// For chained calls (e.g. fetch(url).then(fn)), arg_uses only
|
||
|
|
// captures the final call's args. Variables used by intermediate
|
||
|
|
// calls (like `url` in fetch) are in info.taint.uses but not arg_uses.
|
||
|
|
// Add them as an extra group so sink detection can see them.
|
||
|
|
//
|
||
|
|
// Exclude the receiver ident: it's carried on its own typed
|
||
|
|
// channel (`SsaOp::Call.receiver`). Callers that care about
|
||
|
|
// positional arity must read it from `info.call.arg_uses.len()`,
|
||
|
|
// not `args.len()`, since this implicit group inflates args.
|
||
|
|
let arg_uses_flat: HashSet<&str> = info
|
||
|
|
.call
|
||
|
|
.arg_uses
|
||
|
|
.iter()
|
||
|
|
.flat_map(|g| g.iter().map(|s| s.as_str()))
|
||
|
|
.collect();
|
||
|
|
let receiver_ident = info.call.receiver.as_deref();
|
||
|
|
let implicit: SmallVec<[SsaValue; 2]> = info
|
||
|
|
.taint
|
||
|
|
.uses
|
||
|
|
.iter()
|
||
|
|
.filter(|u| !arg_uses_flat.contains(u.as_str()))
|
||
|
|
.filter(|u| Some(u.as_str()) != receiver_ident)
|
||
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
||
|
|
.collect();
|
||
|
|
if !implicit.is_empty() {
|
||
|
|
args.push(implicit);
|
||
|
|
}
|
||
|
|
args
|
||
|
|
} else {
|
||
|
|
// Fallback: treat all uses as a single argument group
|
||
|
|
let all_uses: SmallVec<[SsaValue; 2]> = info
|
||
|
|
.taint
|
||
|
|
.uses
|
||
|
|
.iter()
|
||
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
||
|
|
.collect();
|
||
|
|
if all_uses.is_empty() {
|
||
|
|
vec![]
|
||
|
|
} else {
|
||
|
|
vec![all_uses]
|
||
|
|
}
|
||
|
|
};
|
||
|
|
(args, receiver)
|
||
|
|
};
|
||
|
|
|
||
|
|
// Determine operation and collect uses
|
||
|
|
// Out-of-scope nodes (nop_nodes) become Nop: they preserve graph
|
||
|
|
// connectivity but don't participate in taint flow.
|
||
|
|
let op = if nop_nodes.contains(&node) {
|
||
|
|
SsaOp::Nop
|
||
|
|
} else if info.catch_param {
|
||
|
|
SsaOp::CatchParam
|
||
|
|
} else if info
|
||
|
|
.taint
|
||
|
|
.labels
|
||
|
|
.iter()
|
||
|
|
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)))
|
||
|
|
&& info.call.callee.is_none()
|
||
|
|
{
|
||
|
|
// Pure source (e.g. $_GET, env var) — no callee, so no args to track.
|
||
|
|
// Source-labeled calls (e.g. file_get_contents) fall through to Call
|
||
|
|
// so argument taint and sink detection still work.
|
||
|
|
SsaOp::Source
|
||
|
|
} else if info.call.callee.is_some() {
|
||
|
|
let callee = info.call.callee.as_deref().unwrap_or("").to_string();
|
||
|
|
let (args, receiver) = build_call_args(info, var_stacks);
|
||
|
|
SsaOp::Call {
|
||
|
|
callee,
|
||
|
|
args,
|
||
|
|
receiver,
|
||
|
|
}
|
||
|
|
} else if info.taint.defines.is_some()
|
||
|
|
&& info.taint.uses.is_empty()
|
||
|
|
&& !info
|
||
|
|
.taint
|
||
|
|
.labels
|
||
|
|
.iter()
|
||
|
|
.any(|l| matches!(l, crate::labels::DataLabel::Source(_)))
|
||
|
|
{
|
||
|
|
// Reassignment kill: a node that defines a variable but has no
|
||
|
|
// uses (operands) and is not a source is a constant/literal
|
||
|
|
// assignment. SSA rename allocates a fresh SsaValue, so
|
||
|
|
// downstream references see this new (untainted) value — the
|
||
|
|
// prior tainted definition is implicitly dead.
|
||
|
|
SsaOp::Const(info.taint.const_text.clone())
|
||
|
|
} else if info.taint.defines.is_some() {
|
||
|
|
let mut uses: SmallVec<[SsaValue; 4]> = info
|
||
|
|
.taint
|
||
|
|
.uses
|
||
|
|
.iter()
|
||
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
||
|
|
.collect();
|
||
|
|
// Inject Const for binary expression literal operand.
|
||
|
|
// When a binary expression has one identifier and one numeric literal
|
||
|
|
// (e.g., `flags & 0x07`), the literal isn't in `uses`. Inject a
|
||
|
|
// synthetic Const instruction so the Assign has 2 uses, preventing
|
||
|
|
// copy propagation from eliminating the operation.
|
||
|
|
if uses.len() == 1 && info.bin_op.is_some() && info.bin_op_const.is_some() {
|
||
|
|
let const_val = info.bin_op_const.unwrap();
|
||
|
|
let const_v = SsaValue(*next_value);
|
||
|
|
*next_value += 1;
|
||
|
|
let const_inst = SsaInst {
|
||
|
|
value: const_v,
|
||
|
|
op: SsaOp::Const(Some(const_val.to_string())),
|
||
|
|
cfg_node: node,
|
||
|
|
var_name: None,
|
||
|
|
span: info.ast.span,
|
||
|
|
};
|
||
|
|
ssa_blocks[block_idx].body.push(const_inst);
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: None,
|
||
|
|
cfg_node: node,
|
||
|
|
block: block_id,
|
||
|
|
});
|
||
|
|
uses.push(const_v);
|
||
|
|
}
|
||
|
|
SsaOp::Assign(uses)
|
||
|
|
} else if matches!(info.kind, StmtKind::Return | StmtKind::Throw)
|
||
|
|
&& !info.taint.uses.is_empty()
|
||
|
|
{
|
||
|
|
// `return s` / `throw e` with identifier uses: emit an
|
||
|
|
// `Assign(uses)` so the SSA carries an explicit pass-through
|
||
|
|
// for the returned/thrown value. Without this, the Return
|
||
|
|
// node was lowered as a `Nop` and the terminator-setup
|
||
|
|
// "last non-Nop body inst" search returned None — producing
|
||
|
|
// `Terminator::Return(None)` for a function that visibly
|
||
|
|
// returns an identifier. That broke per-return-path
|
||
|
|
// PathFact narrowing for non-Rust languages where the
|
||
|
|
// returned identifier wasn't computed in the same block
|
||
|
|
// (e.g. Python `def f(s): return s` — `s` is a Param in
|
||
|
|
// block 0, the Return block itself has no body insts).
|
||
|
|
let uses: SmallVec<[SsaValue; 4]> = info
|
||
|
|
.taint
|
||
|
|
.uses
|
||
|
|
.iter()
|
||
|
|
.filter_map(|u| var_stacks.get(u).and_then(|s| s.last().copied()))
|
||
|
|
.collect();
|
||
|
|
if uses.is_empty() {
|
||
|
|
SsaOp::Nop
|
||
|
|
} else {
|
||
|
|
SsaOp::Assign(uses)
|
||
|
|
}
|
||
|
|
} else if matches!(
|
||
|
|
info.kind,
|
||
|
|
StmtKind::Entry
|
||
|
|
| StmtKind::Exit
|
||
|
|
| StmtKind::If
|
||
|
|
| StmtKind::Loop
|
||
|
|
| StmtKind::Break
|
||
|
|
| StmtKind::Continue
|
||
|
|
| StmtKind::Return
|
||
|
|
| StmtKind::Throw
|
||
|
|
) {
|
||
|
|
SsaOp::Nop
|
||
|
|
} else if info.call.callee.is_some() {
|
||
|
|
let callee = info.call.callee.as_deref().unwrap_or("").to_string();
|
||
|
|
let (args, receiver) = build_call_args(info, var_stacks);
|
||
|
|
SsaOp::Call {
|
||
|
|
callee,
|
||
|
|
args,
|
||
|
|
receiver,
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
SsaOp::Nop
|
||
|
|
};
|
||
|
|
|
||
|
|
// Allocate SSA value
|
||
|
|
let v = SsaValue(*next_value);
|
||
|
|
*next_value += 1;
|
||
|
|
let var_name_for_ssa = if nop_nodes.contains(&node) {
|
||
|
|
None
|
||
|
|
} else if info.taint.defines.is_some() {
|
||
|
|
info.taint.defines.clone()
|
||
|
|
} else if info.kind == StmtKind::Seq
|
||
|
|
&& info.call.callee.is_none()
|
||
|
|
&& info.taint.uses.len() == 1
|
||
|
|
&& !var_stacks.contains_key(&info.taint.uses[0])
|
||
|
|
{
|
||
|
|
// Implicit definition for uninitialized declarations (e.g.,
|
||
|
|
// C/C++ `char buf[256]`). Creates a reaching definition so
|
||
|
|
// output-parameter sources like fgets() can taint the buffer
|
||
|
|
// and subsequent uses (e.g., system(buf)) see the tainted value.
|
||
|
|
Some(info.taint.uses[0].clone())
|
||
|
|
} else {
|
||
|
|
None
|
||
|
|
};
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: var_name_for_ssa.clone(),
|
||
|
|
cfg_node: node,
|
||
|
|
block: block_id,
|
||
|
|
});
|
||
|
|
|
||
|
|
// Push defined variable onto stack (skip nop nodes)
|
||
|
|
if let Some(ref d) = var_name_for_ssa {
|
||
|
|
var_stacks.entry(d.clone()).or_default().push(v);
|
||
|
|
}
|
||
|
|
|
||
|
|
cfg_node_map.insert(node, v);
|
||
|
|
|
||
|
|
// Clone op for potential extra_defines before moving into SsaInst
|
||
|
|
let primary_op_for_extras = if info.taint.extra_defines.is_empty() {
|
||
|
|
None
|
||
|
|
} else {
|
||
|
|
Some(op.clone())
|
||
|
|
};
|
||
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
||
|
|
value: v,
|
||
|
|
op,
|
||
|
|
cfg_node: node,
|
||
|
|
var_name: var_name_for_ssa.clone(),
|
||
|
|
span: info.ast.span,
|
||
|
|
});
|
||
|
|
|
||
|
|
// Synthetic base update: when a dotted path is defined (e.g. `obj.data`),
|
||
|
|
// create synthetic Assign instructions for parent prefixes (e.g. `obj`)
|
||
|
|
// so that subsequent reads of the base variable see the field write.
|
||
|
|
// Only includes the new field value (not the old base) so that field
|
||
|
|
// overwrites properly kill taint: if obj.data is re-assigned to a
|
||
|
|
// constant, the base `obj` no longer carries that field's taint.
|
||
|
|
if !nop_nodes.contains(&node) {
|
||
|
|
if let Some(ref d) = info.taint.defines {
|
||
|
|
let mut current = d.as_str();
|
||
|
|
let mut child_value = v;
|
||
|
|
while let Some(dot_pos) = current.rfind('.') {
|
||
|
|
let parent = ¤t[..dot_pos];
|
||
|
|
let synth_v = SsaValue(*next_value);
|
||
|
|
*next_value += 1;
|
||
|
|
let synth_uses: SmallVec<[SsaValue; 4]> =
|
||
|
|
SmallVec::from_elem(child_value, 1);
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: Some(parent.to_string()),
|
||
|
|
cfg_node: node,
|
||
|
|
block: block_id,
|
||
|
|
});
|
||
|
|
var_stacks
|
||
|
|
.entry(parent.to_string())
|
||
|
|
.or_default()
|
||
|
|
.push(synth_v);
|
||
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
||
|
|
value: synth_v,
|
||
|
|
op: SsaOp::Assign(synth_uses),
|
||
|
|
cfg_node: node,
|
||
|
|
var_name: Some(parent.to_string()),
|
||
|
|
span: info.ast.span,
|
||
|
|
});
|
||
|
|
child_value = synth_v;
|
||
|
|
current = parent;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Emit extra SSA instructions for destructuring bindings.
|
||
|
|
// Each extra define inherits the same op (Source/Call/Assign) as the primary.
|
||
|
|
if let Some(ref primary_op) = primary_op_for_extras {
|
||
|
|
for extra_def in &info.taint.extra_defines {
|
||
|
|
let ev = SsaValue(*next_value);
|
||
|
|
*next_value += 1;
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: Some(extra_def.clone()),
|
||
|
|
cfg_node: node,
|
||
|
|
block: block_id,
|
||
|
|
});
|
||
|
|
var_stacks.entry(extra_def.clone()).or_default().push(ev);
|
||
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
||
|
|
value: ev,
|
||
|
|
op: primary_op.clone(),
|
||
|
|
cfg_node: node,
|
||
|
|
var_name: Some(extra_def.clone()),
|
||
|
|
span: info.ast.span,
|
||
|
|
});
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 3. Set terminator
|
||
|
|
let succs = &block_succs[block_idx];
|
||
|
|
let last_node = *blocks_nodes[block_idx].last().unwrap();
|
||
|
|
|
||
|
|
ssa_blocks[block_idx].terminator = if succs.is_empty() {
|
||
|
|
// A block with no successors at the block level is one of:
|
||
|
|
// (1) a block containing a Throw — terminates with an
|
||
|
|
// exception; no normal fall-through.
|
||
|
|
// (2) a block containing a Return — terminates with a value
|
||
|
|
// (or void). After form_blocks strips the bookkeeping
|
||
|
|
// Seq edge from Return → fn_exit, every explicit-return
|
||
|
|
// block lands here, including `if cond { return X; }`
|
||
|
|
// early returns.
|
||
|
|
// (3) the function-exit (fn_exit) block itself when the
|
||
|
|
// function falls off the end (implicit return).
|
||
|
|
//
|
||
|
|
// Distinguish them by inspecting the block's CFG nodes.
|
||
|
|
let return_node = blocks_nodes[block_idx]
|
||
|
|
.iter()
|
||
|
|
.copied()
|
||
|
|
.find(|&n| cfg[n].kind == StmtKind::Return);
|
||
|
|
let has_throw_node = blocks_nodes[block_idx]
|
||
|
|
.iter()
|
||
|
|
.any(|&n| cfg[n].kind == StmtKind::Throw);
|
||
|
|
|
||
|
|
if has_throw_node && return_node.is_none() {
|
||
|
|
// Throw terminates control flow with an exception. No
|
||
|
|
// structured Throw terminator exists today; downstream
|
||
|
|
// analyses rely on `exception_edges` (recorded separately)
|
||
|
|
// for catch-block dispatch. Mark the normal-flow exit as
|
||
|
|
// Unreachable so successor consumers do not invent a
|
||
|
|
// synthetic fall-through edge.
|
||
|
|
Terminator::Unreachable
|
||
|
|
} else if let Some(rn) = return_node {
|
||
|
|
let return_info = &cfg[rn];
|
||
|
|
// Return-value resolution. Mirror the legacy
|
||
|
|
// `has_const_return` path so callers see exactly the same
|
||
|
|
// SSA shape they did before the merged-return fix — only
|
||
|
|
// the *terminator* changes (Goto(exit) → Return(_)), not
|
||
|
|
// the value selection.
|
||
|
|
//
|
||
|
|
// (a) Literal return (`return 'x'`, `return None`,
|
||
|
|
// `return []`, `return;`). Marked by
|
||
|
|
// `taint.uses.is_empty()` on the Return CFG node.
|
||
|
|
// Emit a synthetic Const inst so taint never leaks
|
||
|
|
// from an unrelated inst earlier in the same block
|
||
|
|
// (regression guard: C-1 inline-return precision).
|
||
|
|
// (b) Computed / passthrough return — last non-Nop body
|
||
|
|
// inst. Covers `return foo()` (Call sits before the
|
||
|
|
// Return Nop), `return x + y` (Assign), and the
|
||
|
|
// implicit tail expression collapsed into a single
|
||
|
|
// block by the leader-following loop. When the
|
||
|
|
// Return carries identifier uses (`return req`,
|
||
|
|
// `return { req.session, ... }`), the SSA defs for
|
||
|
|
// those identifiers are already on the body as
|
||
|
|
// Param / Assign / Source insts — picking the last
|
||
|
|
// one matches pre-fix behaviour exactly.
|
||
|
|
// (c) Void / unresolved — `Return(None)`.
|
||
|
|
if return_info.taint.uses.is_empty() {
|
||
|
|
let const_text = return_info.taint.const_text.clone();
|
||
|
|
let const_v = SsaValue(*next_value);
|
||
|
|
*next_value += 1;
|
||
|
|
let block_id = BlockId(block_idx as u32);
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: None,
|
||
|
|
cfg_node: rn,
|
||
|
|
block: block_id,
|
||
|
|
});
|
||
|
|
ssa_blocks[block_idx].body.push(SsaInst {
|
||
|
|
value: const_v,
|
||
|
|
op: SsaOp::Const(const_text),
|
||
|
|
cfg_node: rn,
|
||
|
|
var_name: None,
|
||
|
|
span: return_info.ast.span,
|
||
|
|
});
|
||
|
|
Terminator::Return(Some(const_v))
|
||
|
|
} else {
|
||
|
|
let from_body = ssa_blocks[block_idx]
|
||
|
|
.body
|
||
|
|
.iter()
|
||
|
|
.rev()
|
||
|
|
.find(|inst| !matches!(inst.op, SsaOp::Nop))
|
||
|
|
.map(|inst| inst.value);
|
||
|
|
Terminator::Return(from_body)
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// (3) fn_exit / true fall-off — no Return CFG node in this
|
||
|
|
// block. Use the last non-Nop body instruction as the
|
||
|
|
// implicit return value (e.g. the function's tail-position
|
||
|
|
// expression in Rust).
|
||
|
|
let ret_val = ssa_blocks[block_idx]
|
||
|
|
.body
|
||
|
|
.iter()
|
||
|
|
.rev()
|
||
|
|
.find(|inst| !matches!(inst.op, SsaOp::Nop))
|
||
|
|
.map(|inst| inst.value);
|
||
|
|
Terminator::Return(ret_val)
|
||
|
|
}
|
||
|
|
} else if succs.len() == 1 {
|
||
|
|
Terminator::Goto(BlockId(succs[0] as u32))
|
||
|
|
} else if succs.len() == 2 {
|
||
|
|
// Find the If/Loop node that branches
|
||
|
|
let cond_node = blocks_nodes[block_idx]
|
||
|
|
.iter()
|
||
|
|
.rev()
|
||
|
|
.find(|&&n| matches!(cfg[n].kind, StmtKind::If | StmtKind::Loop))
|
||
|
|
.copied()
|
||
|
|
.unwrap_or(last_node);
|
||
|
|
|
||
|
|
// Determine which successor is true/false by looking at edge kinds
|
||
|
|
let mut true_blk = succs[0];
|
||
|
|
let mut false_blk = succs[1];
|
||
|
|
|
||
|
|
// Check filtered edges from any node in this block to successors
|
||
|
|
for &(src, tgt, kind) in filtered_edges {
|
||
|
|
if blocks_nodes[block_idx].contains(&src) {
|
||
|
|
let tgt_blk_opt = succs.iter().position(|&s| {
|
||
|
|
blocks_nodes
|
||
|
|
.get(s)
|
||
|
|
.is_some_and(|nodes| nodes.contains(&tgt))
|
||
|
|
});
|
||
|
|
if let Some(tgt_blk_pos) = tgt_blk_opt {
|
||
|
|
match kind {
|
||
|
|
EdgeKind::True => true_blk = succs[tgt_blk_pos],
|
||
|
|
EdgeKind::False => false_blk = succs[tgt_blk_pos],
|
||
|
|
_ => {}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Lower structured condition from CFG metadata
|
||
|
|
let cond_info = &cfg[cond_node];
|
||
|
|
let condition = if cond_info.condition_text.is_some()
|
||
|
|
&& !cond_info.condition_vars.is_empty()
|
||
|
|
{
|
||
|
|
let expr =
|
||
|
|
crate::constraint::lower::lower_condition_with_stacks(cond_info, var_stacks);
|
||
|
|
if matches!(expr, crate::constraint::lower::ConditionExpr::Unknown) {
|
||
|
|
None
|
||
|
|
} else {
|
||
|
|
Some(Box::new(expr))
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
None
|
||
|
|
};
|
||
|
|
|
||
|
|
Terminator::Branch {
|
||
|
|
cond: cond_node,
|
||
|
|
true_blk: BlockId(true_blk as u32),
|
||
|
|
false_blk: BlockId(false_blk as u32),
|
||
|
|
condition,
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
// More than 2 successors — model as a multi-way Switch.
|
||
|
|
//
|
||
|
|
// This replaces the previous `Goto(first)` collapse: the
|
||
|
|
// structured terminator now enumerates every target instead
|
||
|
|
// of hiding N-1 of them behind `block.succs`. Flow consumers
|
||
|
|
// (taint, const-prop, symex) still iterate `succs` as
|
||
|
|
// authoritative, but downstream tooling that inspects the
|
||
|
|
// terminator shape gets the full fanout.
|
||
|
|
//
|
||
|
|
// Note: today's switch-statement CFG construction decomposes
|
||
|
|
// cases into a cascade of binary `Branch` headers (see
|
||
|
|
// `build_switch` in src/cfg.rs), so real switch statements
|
||
|
|
// never reach this arm. Folding the cascade back into a
|
||
|
|
// single Switch node is a follow-up; in the meantime, this
|
||
|
|
// arm fires only on genuine multi-way CFG fanouts (e.g.
|
||
|
|
// future Go-switch / Java-arrow / Rust-match lowerings).
|
||
|
|
//
|
||
|
|
// Scrutinee: use the primary SSA value defined at the last
|
||
|
|
// node in this block when one exists; fall back to
|
||
|
|
// `SsaValue(0)` (a valid index — SSA numbering is 1-based
|
||
|
|
// only conceptually, and value 0 is always present in a
|
||
|
|
// non-empty body) when no value is defined. Downstream
|
||
|
|
// consumers that care about the scrutinee (abstract interp,
|
||
|
|
// symex per-case constraints) treat a missing/degenerate
|
||
|
|
// scrutinee as "unknown" rather than panicking.
|
||
|
|
let scrutinee = cfg_node_map.get(&last_node).copied().unwrap_or(SsaValue(0));
|
||
|
|
let targets: SmallVec<[BlockId; 4]> =
|
||
|
|
succs.iter().skip(1).map(|&s| BlockId(s as u32)).collect();
|
||
|
|
let default = BlockId(succs[0] as u32);
|
||
|
|
// Synthetic ≥3-way fanouts have no per-case literal metadata —
|
||
|
|
// every entry is None (unknown), so the executor falls back to
|
||
|
|
// first-reachable behavior on this terminator.
|
||
|
|
let case_values: SmallVec<[Option<crate::constraint::domain::ConstValue>; 4]> =
|
||
|
|
std::iter::repeat_with(|| None)
|
||
|
|
.take(targets.len())
|
||
|
|
.collect();
|
||
|
|
tracing::debug!(
|
||
|
|
block = block_idx,
|
||
|
|
num_succs = succs.len(),
|
||
|
|
"emitting Terminator::Switch for ≥3-way fanout",
|
||
|
|
);
|
||
|
|
Terminator::Switch {
|
||
|
|
scrutinee,
|
||
|
|
targets,
|
||
|
|
default,
|
||
|
|
case_values,
|
||
|
|
}
|
||
|
|
};
|
||
|
|
|
||
|
|
// 4. Fill phi operands in successor blocks
|
||
|
|
for &succ in succs {
|
||
|
|
for (var, &phi_val) in &phi_values[succ] {
|
||
|
|
// The version of `var` reaching from this block
|
||
|
|
let reaching_val = var_stacks.get(var).and_then(|s| s.last().copied());
|
||
|
|
if let Some(rv) = reaching_val {
|
||
|
|
// Find the phi instruction and add this operand
|
||
|
|
for phi in &mut ssa_blocks[succ].phis {
|
||
|
|
if phi.value == phi_val {
|
||
|
|
if let SsaOp::Phi(ref mut operands) = phi.op {
|
||
|
|
operands.push((block_id, rv));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// 5. Recurse into dominator tree children
|
||
|
|
for &child in &dom_tree_children[block_idx] {
|
||
|
|
process_block(
|
||
|
|
child,
|
||
|
|
cfg,
|
||
|
|
blocks_nodes,
|
||
|
|
block_succs,
|
||
|
|
block_preds,
|
||
|
|
phi_placements,
|
||
|
|
dom_tree_children,
|
||
|
|
filtered_edges,
|
||
|
|
var_stacks,
|
||
|
|
ssa_blocks,
|
||
|
|
phi_values,
|
||
|
|
value_defs,
|
||
|
|
cfg_node_map,
|
||
|
|
next_value,
|
||
|
|
nop_nodes,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
// 6. Restore stacks
|
||
|
|
for (var, depth) in &saved {
|
||
|
|
if let Some(stack) = var_stacks.get_mut(var) {
|
||
|
|
stack.truncate(*depth);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// Remove any new variables that weren't in saved
|
||
|
|
let saved_vars: HashSet<&String> = saved.iter().map(|(k, _)| k).collect();
|
||
|
|
var_stacks.retain(|k, _| saved_vars.contains(k));
|
||
|
|
}
|
||
|
|
|
||
|
|
// Inject synthetic Param instructions at START of block 0 for external variables.
|
||
|
|
// These create SSA definitions so the rename pass can reference them.
|
||
|
|
// Pre-seed var_stacks so process_block sees them.
|
||
|
|
if !external_vars.is_empty() {
|
||
|
|
let entry_cfg_node = blocks_nodes[0][0];
|
||
|
|
let mut synthetic_body = Vec::with_capacity(external_vars.len());
|
||
|
|
let mut positional_idx: usize = 0;
|
||
|
|
for var in external_vars.iter() {
|
||
|
|
let v = SsaValue(next_value);
|
||
|
|
next_value += 1;
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: Some(var.clone()),
|
||
|
|
cfg_node: entry_cfg_node,
|
||
|
|
block: BlockId(0),
|
||
|
|
});
|
||
|
|
let op = if is_receiver_name(var) {
|
||
|
|
SsaOp::SelfParam
|
||
|
|
} else {
|
||
|
|
let op = SsaOp::Param {
|
||
|
|
index: positional_idx,
|
||
|
|
};
|
||
|
|
positional_idx += 1;
|
||
|
|
op
|
||
|
|
};
|
||
|
|
synthetic_body.push(SsaInst {
|
||
|
|
value: v,
|
||
|
|
op,
|
||
|
|
cfg_node: entry_cfg_node,
|
||
|
|
var_name: Some(var.clone()),
|
||
|
|
span: (0, 0),
|
||
|
|
});
|
||
|
|
var_stacks.entry(var.clone()).or_default().push(v);
|
||
|
|
}
|
||
|
|
// Prepend synthetic params before any existing body instructions
|
||
|
|
synthetic_body.append(&mut ssa_blocks[0].body);
|
||
|
|
ssa_blocks[0].body = synthetic_body;
|
||
|
|
}
|
||
|
|
|
||
|
|
process_block(
|
||
|
|
0, // entry block
|
||
|
|
cfg,
|
||
|
|
blocks_nodes,
|
||
|
|
block_succs,
|
||
|
|
block_preds,
|
||
|
|
phi_placements,
|
||
|
|
dom_tree_children,
|
||
|
|
filtered_edges,
|
||
|
|
&mut var_stacks,
|
||
|
|
&mut ssa_blocks,
|
||
|
|
&mut phi_values,
|
||
|
|
&mut value_defs,
|
||
|
|
&mut cfg_node_map,
|
||
|
|
&mut next_value,
|
||
|
|
nop_nodes,
|
||
|
|
);
|
||
|
|
|
||
|
|
// Process orphan blocks (e.g. catch blocks disconnected after exception edge removal).
|
||
|
|
// These blocks have no predecessors and weren't reached by the dominator tree walk.
|
||
|
|
//
|
||
|
|
// Rebuild var_stacks from already-processed instructions so that catch blocks
|
||
|
|
// can reference variables defined before the try block (e.g. `userInput`).
|
||
|
|
let has_orphans =
|
||
|
|
(1..num_blocks).any(|bid| block_preds[bid].is_empty() && ssa_blocks[bid].body.is_empty());
|
||
|
|
if has_orphans {
|
||
|
|
// Rebuild var_stacks from all SSA instructions created during the main walk.
|
||
|
|
// This gives orphan blocks access to all variable definitions.
|
||
|
|
var_stacks.clear();
|
||
|
|
for block in &ssa_blocks {
|
||
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
||
|
|
if let Some(ref name) = inst.var_name {
|
||
|
|
var_stacks.entry(name.clone()).or_default().push(inst.value);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
for bid in 1..num_blocks {
|
||
|
|
if block_preds[bid].is_empty() && ssa_blocks[bid].body.is_empty() {
|
||
|
|
process_block(
|
||
|
|
bid,
|
||
|
|
cfg,
|
||
|
|
blocks_nodes,
|
||
|
|
block_succs,
|
||
|
|
block_preds,
|
||
|
|
phi_placements,
|
||
|
|
dom_tree_children,
|
||
|
|
filtered_edges,
|
||
|
|
&mut var_stacks,
|
||
|
|
&mut ssa_blocks,
|
||
|
|
&mut phi_values,
|
||
|
|
&mut value_defs,
|
||
|
|
&mut cfg_node_map,
|
||
|
|
&mut next_value,
|
||
|
|
nop_nodes,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
(ssa_blocks, value_defs, cfg_node_map)
|
||
|
|
}
|
||
|
|
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
// Debug invariant checkers
|
||
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
||
|
|
|
||
|
|
/// Verify BFS block ordering: every non-entry, non-orphan block must have at
|
||
|
|
/// least one predecessor with a smaller block ID.
|
||
|
|
#[cfg(debug_assertions)]
|
||
|
|
fn debug_assert_bfs_ordering(block_preds: &[Vec<usize>]) {
|
||
|
|
for (i, preds) in block_preds.iter().enumerate() {
|
||
|
|
if i == 0 {
|
||
|
|
continue; // entry block
|
||
|
|
}
|
||
|
|
if preds.is_empty() {
|
||
|
|
continue; // orphan block (e.g. catch block reached via exception edge)
|
||
|
|
}
|
||
|
|
let has_forward_pred = preds.iter().any(|&p| p < i);
|
||
|
|
debug_assert!(
|
||
|
|
has_forward_pred,
|
||
|
|
"Block {} has no forward predecessor — BFS ordering violated. Preds: {:?}",
|
||
|
|
i, preds
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Verify phi operand counts: each phi must have exactly one operand
|
||
|
|
/// per predecessor, and every operand must reference an actual
|
||
|
|
/// predecessor of the block.
|
||
|
|
///
|
||
|
|
/// Runs in release builds because phi-operand mismatches are
|
||
|
|
/// load-bearing for soundness — downstream taint, const, and abstract
|
||
|
|
/// analyses iterate phi operands by `(pred_blk, value)` pairs, and
|
||
|
|
/// either a missing operand (silent "no contribution" on that edge)
|
||
|
|
/// or a phantom operand (garbage into the join) corrupts analysis
|
||
|
|
/// without surfacing.
|
||
|
|
///
|
||
|
|
/// The invariant is strict equality. Predecessors that carry no
|
||
|
|
/// reaching definition for the phi's variable are filled with the
|
||
|
|
/// [`SsaOp::Undef`] sentinel in `fill_undef_phi_operands`, rather than
|
||
|
|
/// being dropped — so consumers that look up by `(pred_blk, value)`
|
||
|
|
/// see a real operand for every control-flow edge.
|
||
|
|
fn assert_phi_operand_counts(ssa_blocks: &[SsaBlock], block_preds: &[Vec<usize>]) {
|
||
|
|
use std::collections::HashSet;
|
||
|
|
for (i, block) in ssa_blocks.iter().enumerate() {
|
||
|
|
let pred_set: HashSet<u32> = block_preds[i].iter().map(|&p| p as u32).collect();
|
||
|
|
for phi in &block.phis {
|
||
|
|
if let SsaOp::Phi(ref operands) = phi.op {
|
||
|
|
assert_eq!(
|
||
|
|
operands.len(),
|
||
|
|
block_preds[i].len(),
|
||
|
|
"SSA phi operand count does not match predecessor count: block {} phi v{} \
|
||
|
|
(var={:?}) has {} operands but block has {} predecessors. \
|
||
|
|
preds={:?}, operand_preds={:?}",
|
||
|
|
i,
|
||
|
|
phi.value.0,
|
||
|
|
phi.var_name,
|
||
|
|
operands.len(),
|
||
|
|
block_preds[i].len(),
|
||
|
|
block_preds[i],
|
||
|
|
operands.iter().map(|(b, _)| b.0).collect::<Vec<_>>(),
|
||
|
|
);
|
||
|
|
// Each operand's pred block must be an actual predecessor,
|
||
|
|
// and no predecessor may appear more than once.
|
||
|
|
let mut seen: HashSet<u32> = HashSet::new();
|
||
|
|
for (pred_blk, _) in operands.iter() {
|
||
|
|
assert!(
|
||
|
|
pred_set.contains(&pred_blk.0),
|
||
|
|
"SSA phi operand references nonexistent predecessor: block {} phi v{} \
|
||
|
|
references pred B{} but block predecessors are {:?}",
|
||
|
|
i,
|
||
|
|
phi.value.0,
|
||
|
|
pred_blk.0,
|
||
|
|
block_preds[i],
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
seen.insert(pred_blk.0),
|
||
|
|
"SSA phi operand duplicates predecessor: block {} phi v{} has two \
|
||
|
|
operands for pred B{}",
|
||
|
|
i,
|
||
|
|
phi.value.0,
|
||
|
|
pred_blk.0,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Post-rename pass: ensure every phi has one operand per predecessor.
|
||
|
|
///
|
||
|
|
/// During rename, phi operands are only pushed when the variable has a
|
||
|
|
/// live reaching definition on that predecessor edge. Edges where the
|
||
|
|
/// variable is not yet defined (e.g. a try-body rejoining after a
|
||
|
|
/// catch-only binding, an early-return branch on a later-defined
|
||
|
|
/// variable, an orphan catch block's implicit predecessors) leave the
|
||
|
|
/// phi with fewer operands than the block has predecessors.
|
||
|
|
///
|
||
|
|
/// This pass scans all phis, and for every missing `(pred_block, _)`
|
||
|
|
/// slot, pushes `(pred_block, undef_val)` where `undef_val` is a
|
||
|
|
/// single shared sentinel instruction ([`SsaOp::Undef`]) synthesized
|
||
|
|
/// at the end of block 0's body. Consumers iterate phi operands by
|
||
|
|
/// `(pred_blk, value)` and therefore see a real operand on every
|
||
|
|
/// control-flow edge — no implicit "missing = empty" semantics.
|
||
|
|
///
|
||
|
|
/// The Undef instruction is created lazily (only when at least one phi
|
||
|
|
/// has a gap) so functions with fully-dominating definitions pay zero
|
||
|
|
/// cost. All phis share the same Undef value: a phi operand is
|
||
|
|
/// identified by its `(pred_block, value)` pair, so sharing the value
|
||
|
|
/// across phis is safe and keeps the synthesized-instruction count at
|
||
|
|
/// most one per function body.
|
||
|
|
fn fill_undef_phi_operands(
|
||
|
|
ssa_blocks: &mut [SsaBlock],
|
||
|
|
block_preds: &[Vec<usize>],
|
||
|
|
value_defs: &mut Vec<ValueDef>,
|
||
|
|
blocks_nodes: &[Vec<NodeIndex>],
|
||
|
|
) {
|
||
|
|
// Fast path: detect whether any phi has a gap. Avoid allocating
|
||
|
|
// the Undef value in the common case where every phi is saturated.
|
||
|
|
let needs_undef = ssa_blocks.iter().enumerate().any(|(bi, block)| {
|
||
|
|
block.phis.iter().any(|phi| {
|
||
|
|
if let SsaOp::Phi(ref operands) = phi.op {
|
||
|
|
operands.len() < block_preds[bi].len()
|
||
|
|
} else {
|
||
|
|
false
|
||
|
|
}
|
||
|
|
})
|
||
|
|
});
|
||
|
|
if !needs_undef {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Anchor the synthetic Undef instruction to the entry block's first
|
||
|
|
// CFG node so span lookups don't hit an invalid NodeIndex.
|
||
|
|
let anchor_node = blocks_nodes
|
||
|
|
.first()
|
||
|
|
.and_then(|b| b.first())
|
||
|
|
.copied()
|
||
|
|
.expect("entry block has at least one CFG node");
|
||
|
|
|
||
|
|
let undef_val = SsaValue(value_defs.len() as u32);
|
||
|
|
value_defs.push(ValueDef {
|
||
|
|
var_name: None,
|
||
|
|
cfg_node: anchor_node,
|
||
|
|
block: BlockId(0),
|
||
|
|
});
|
||
|
|
// Place the Undef instruction at the end of block 0's body so it
|
||
|
|
// appears after any synthetic Param / SelfParam emissions — its
|
||
|
|
// only role is to anchor the SsaValue; ordering relative to other
|
||
|
|
// body instructions is cosmetic (no consumer depends on its
|
||
|
|
// position, only on the value lookup).
|
||
|
|
ssa_blocks[0].body.push(SsaInst {
|
||
|
|
value: undef_val,
|
||
|
|
op: SsaOp::Undef,
|
||
|
|
cfg_node: anchor_node,
|
||
|
|
var_name: None,
|
||
|
|
span: (0, 0),
|
||
|
|
});
|
||
|
|
|
||
|
|
// Fill missing operand slots. Iterate `block_preds[bi]` in its
|
||
|
|
// natural order so the resulting phi operand list is deterministic
|
||
|
|
// across runs.
|
||
|
|
for (bi, block) in ssa_blocks.iter_mut().enumerate() {
|
||
|
|
for phi in block.phis.iter_mut() {
|
||
|
|
if let SsaOp::Phi(ref mut operands) = phi.op {
|
||
|
|
if operands.len() == block_preds[bi].len() {
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
use std::collections::HashSet;
|
||
|
|
let present: HashSet<u32> = operands.iter().map(|(b, _)| b.0).collect();
|
||
|
|
for &pred in &block_preds[bi] {
|
||
|
|
let pid = pred as u32;
|
||
|
|
if !present.contains(&pid) {
|
||
|
|
operands.push((BlockId(pid), undef_val));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(test)]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
use crate::cfg::{EdgeKind, NodeInfo, StmtKind, TaintMeta};
|
||
|
|
use petgraph::Graph;
|
||
|
|
|
||
|
|
fn make_node(kind: StmtKind) -> NodeInfo {
|
||
|
|
NodeInfo {
|
||
|
|
kind,
|
||
|
|
..Default::default()
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn linear_cfg_no_phis() {
|
||
|
|
// Entry → x=1 → y=x → Exit
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let n1 = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let n2 = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("y".into()),
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, n1, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(n1, n2, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(n2, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Should be a single block (all Seq edges, no branches)
|
||
|
|
assert_eq!(ssa.blocks.len(), 1);
|
||
|
|
// No phis in a linear CFG
|
||
|
|
assert!(ssa.blocks[0].phis.is_empty());
|
||
|
|
// 4 body instructions (entry, x=1, y=x, exit)
|
||
|
|
assert_eq!(ssa.blocks[0].body.len(), 4);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn diamond_cfg_produces_phi() {
|
||
|
|
// Entry → x=1 → If → [True: x=2] [False: x=3] → Join → Exit
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let def_x = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
||
|
|
let true_node = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let false_node = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let join = cfg.add_node(make_node(StmtKind::Seq));
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(def_x, if_node, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(if_node, true_node, EdgeKind::True);
|
||
|
|
cfg.add_edge(if_node, false_node, EdgeKind::False);
|
||
|
|
cfg.add_edge(true_node, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(false_node, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Should have multiple blocks
|
||
|
|
assert!(ssa.blocks.len() >= 3);
|
||
|
|
|
||
|
|
// The join block should have a phi for "x"
|
||
|
|
let join_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| !b.phis.is_empty())
|
||
|
|
.expect("should have a block with a phi");
|
||
|
|
assert_eq!(join_block.phis.len(), 1);
|
||
|
|
assert_eq!(join_block.phis[0].var_name.as_deref(), Some("x"));
|
||
|
|
|
||
|
|
// Phi should have 2 operands (from true and false branches)
|
||
|
|
if let SsaOp::Phi(ref operands) = join_block.phis[0].op {
|
||
|
|
assert_eq!(operands.len(), 2);
|
||
|
|
} else {
|
||
|
|
panic!("expected Phi op");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn loop_cfg_produces_phi() {
|
||
|
|
// Entry → x=0 → Loop header → [Back: x=x+1] → Exit
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let def_x = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let loop_header = cfg.add_node(make_node(StmtKind::Loop));
|
||
|
|
let body = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(def_x, loop_header, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(loop_header, body, EdgeKind::True);
|
||
|
|
cfg.add_edge(body, loop_header, EdgeKind::Back);
|
||
|
|
cfg.add_edge(loop_header, exit, EdgeKind::False);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Loop header block should have a phi for "x" (from entry and back edge)
|
||
|
|
let header_phis: Vec<_> = ssa.blocks.iter().filter(|b| !b.phis.is_empty()).collect();
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
!header_phis.is_empty(),
|
||
|
|
"loop header should have a phi for x"
|
||
|
|
);
|
||
|
|
|
||
|
|
let x_phi = header_phis[0]
|
||
|
|
.phis
|
||
|
|
.iter()
|
||
|
|
.find(|p| p.var_name.as_deref() == Some("x"));
|
||
|
|
assert!(x_phi.is_some(), "should have phi for variable x");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn multiple_reassignments_distinct_values() {
|
||
|
|
// Entry → x=1 → x=2 → x=3 → Exit
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let n1 = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let n2 = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let n3 = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, n1, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(n1, n2, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(n2, n3, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(n3, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Each definition of x should produce a distinct SsaValue
|
||
|
|
let x_values: Vec<_> = ssa
|
||
|
|
.value_defs
|
||
|
|
.iter()
|
||
|
|
.enumerate()
|
||
|
|
.filter(|(_, vd)| vd.var_name.as_deref() == Some("x"))
|
||
|
|
.map(|(i, _)| SsaValue(i as u32))
|
||
|
|
.collect();
|
||
|
|
|
||
|
|
assert_eq!(x_values.len(), 3, "three definitions of x");
|
||
|
|
// All distinct
|
||
|
|
let unique: HashSet<_> = x_values.iter().collect();
|
||
|
|
assert_eq!(unique.len(), 3, "all SsaValues should be distinct");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn empty_cfg_returns_error() {
|
||
|
|
let cfg: Cfg = Graph::new();
|
||
|
|
let result = lower_to_ssa(&cfg, NodeIndex::new(0), None, true);
|
||
|
|
assert!(result.is_err());
|
||
|
|
}
|
||
|
|
|
||
|
|
// ── BFS ordering and phi invariant tests ─────────────────────────────
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn bfs_ordering_holds_for_linear_cfg() {
|
||
|
|
// Entry → A → B → Exit — all blocks should satisfy BFS ordering
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let a = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let b = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("y".into()),
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, a, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(a, b, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(b, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
// This exercises the debug_assert_bfs_ordering in debug builds
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
assert!(!ssa.blocks.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn bfs_ordering_holds_for_diamond_cfg() {
|
||
|
|
// Entry → If → [True] [False] → Join → Exit
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let def_x = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
||
|
|
let true_node = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let false_node = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let join = cfg.add_node(make_node(StmtKind::Seq));
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(def_x, if_node, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(if_node, true_node, EdgeKind::True);
|
||
|
|
cfg.add_edge(if_node, false_node, EdgeKind::False);
|
||
|
|
cfg.add_edge(true_node, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(false_node, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
// Exercises both BFS ordering and phi operand count assertions
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
// The join block should have a phi with exactly 2 operands (== 2 preds)
|
||
|
|
let phi_block = ssa.blocks.iter().find(|b| !b.phis.is_empty());
|
||
|
|
if let Some(block) = phi_block {
|
||
|
|
assert_eq!(
|
||
|
|
block.preds.len(),
|
||
|
|
2,
|
||
|
|
"join block should have 2 predecessors"
|
||
|
|
);
|
||
|
|
for phi in &block.phis {
|
||
|
|
if let SsaOp::Phi(ref ops) = phi.op {
|
||
|
|
assert!(
|
||
|
|
ops.len() <= block.preds.len(),
|
||
|
|
"phi operands should not exceed predecessor count"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn bfs_ordering_holds_for_loop_with_back_edge() {
|
||
|
|
// Entry → x=0 → Loop → body(x=x+1) → [Back→Loop] → Exit
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let def_x = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let loop_h = cfg.add_node(make_node(StmtKind::Loop));
|
||
|
|
let body = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, def_x, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(def_x, loop_h, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(loop_h, body, EdgeKind::True);
|
||
|
|
cfg.add_edge(body, loop_h, EdgeKind::Back);
|
||
|
|
cfg.add_edge(loop_h, exit, EdgeKind::False);
|
||
|
|
|
||
|
|
// Exercises BFS ordering with back edges and phi on loop header
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
assert!(!ssa.blocks.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn orphan_catch_block_does_not_violate_bfs_ordering() {
|
||
|
|
// Entry → body → Exit, with an exception edge body → catch → Exit
|
||
|
|
// The catch block becomes an orphan (no normal-flow predecessors)
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let body = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let catch = cfg.add_node(NodeInfo {
|
||
|
|
catch_param: true,
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("e".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, body, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(body, exit, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(body, catch, EdgeKind::Exception);
|
||
|
|
cfg.add_edge(catch, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
// The catch block is reached via exception edge (stripped from normal flow)
|
||
|
|
// so it may appear as an orphan. The BFS assertion should skip it.
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
assert!(!ssa.blocks.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn phi_operand_count_equals_pred_count_in_diamond() {
|
||
|
|
// Specific test: phi operands == predecessor count (not just <=)
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
||
|
|
let t = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("v".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let f = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("v".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let join = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
uses: vec!["v".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, if_node, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(if_node, t, EdgeKind::True);
|
||
|
|
cfg.add_edge(if_node, f, EdgeKind::False);
|
||
|
|
cfg.add_edge(t, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(f, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
let phi_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| !b.phis.is_empty())
|
||
|
|
.expect("should have a phi block");
|
||
|
|
|
||
|
|
for phi in &phi_block.phis {
|
||
|
|
if let SsaOp::Phi(ref ops) = phi.op {
|
||
|
|
assert_eq!(
|
||
|
|
ops.len(),
|
||
|
|
phi_block.preds.len(),
|
||
|
|
"phi operand count should equal predecessor count in a clean diamond"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn bfs_assertion_helper_accepts_valid_orderings() {
|
||
|
|
// Direct unit test of the assertion helper with valid input
|
||
|
|
let block_preds = vec![
|
||
|
|
vec![], // block 0: entry (no preds)
|
||
|
|
vec![0], // block 1: pred is block 0 (forward)
|
||
|
|
vec![0, 1], // block 2: both forward preds
|
||
|
|
vec![], // block 3: orphan (no preds)
|
||
|
|
vec![2], // block 4: forward pred
|
||
|
|
];
|
||
|
|
// Should not panic
|
||
|
|
debug_assert_bfs_ordering(&block_preds);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Regression guard: a catch block that joins an exception
|
||
|
|
/// predecessor and a normal control-flow predecessor must lower to a
|
||
|
|
/// consistent phi. For variables defined before the try (live on
|
||
|
|
/// *both* edges), the phi at the catch block has exactly two operands
|
||
|
|
/// — one per predecessor — and the release assertion accepts it.
|
||
|
|
#[test]
|
||
|
|
fn catch_block_join_phi_has_operand_per_live_predecessor() {
|
||
|
|
// Entry → defines `x` → Try → (Seq) → Join ← (Exception via body) Catch
|
||
|
|
// ↑
|
||
|
|
// A phi for `x` at the join block should carry
|
||
|
|
// one operand from each of its two predecessors.
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let define_x = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let body = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let catch = cfg.add_node(NodeInfo {
|
||
|
|
catch_param: true,
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let join = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, define_x, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(define_x, body, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(body, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(body, catch, EdgeKind::Exception);
|
||
|
|
cfg.add_edge(catch, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
// Lowering must succeed — the assertion is active in release.
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Locate the block containing a phi for `x`; it must be the join
|
||
|
|
// block with two reachable predecessors. The phi must have
|
||
|
|
// exactly two operands.
|
||
|
|
let phi_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| {
|
||
|
|
b.phis
|
||
|
|
.iter()
|
||
|
|
.any(|p| p.var_name.as_deref() == Some("x") && matches!(p.op, SsaOp::Phi(_)))
|
||
|
|
})
|
||
|
|
.expect("expected a phi for `x` at the catch/normal join");
|
||
|
|
assert_eq!(
|
||
|
|
phi_block.preds.len(),
|
||
|
|
2,
|
||
|
|
"catch/normal join block must have 2 predecessors, got {}",
|
||
|
|
phi_block.preds.len()
|
||
|
|
);
|
||
|
|
let phi_for_x = phi_block
|
||
|
|
.phis
|
||
|
|
.iter()
|
||
|
|
.find(|p| p.var_name.as_deref() == Some("x"))
|
||
|
|
.unwrap();
|
||
|
|
if let SsaOp::Phi(ref operands) = phi_for_x.op {
|
||
|
|
assert_eq!(
|
||
|
|
operands.len(),
|
||
|
|
2,
|
||
|
|
"phi for `x` at the catch/normal join must have one operand per \
|
||
|
|
predecessor, got {}",
|
||
|
|
operands.len()
|
||
|
|
);
|
||
|
|
} else {
|
||
|
|
panic!("expected SsaOp::Phi for `x`");
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Regression guard for the Undef fill pass. When a variable is
|
||
|
|
/// only defined on one branch of a join (e.g. a catch-only binding
|
||
|
|
/// rejoining the normal path), the lowering must still emit one
|
||
|
|
/// phi operand per predecessor — the missing edge becoming a
|
||
|
|
/// reference to the synthesized `SsaOp::Undef` sentinel rather
|
||
|
|
/// than being dropped.
|
||
|
|
#[test]
|
||
|
|
fn partial_phi_edge_fills_with_undef_sentinel() {
|
||
|
|
// Entry → Body → Join
|
||
|
|
// ↓
|
||
|
|
// Catch (defines `e`) → Join
|
||
|
|
//
|
||
|
|
// `e` is defined only on the exception path; on the normal path
|
||
|
|
// from Body → Join it has no reaching definition. The phi for `e`
|
||
|
|
// at Join must have two operands (one per predecessor), with the
|
||
|
|
// Body-side operand pointing at the Undef sentinel.
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let body = cfg.add_node(make_node(StmtKind::Seq));
|
||
|
|
let catch = cfg.add_node(NodeInfo {
|
||
|
|
catch_param: true,
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("e".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let join = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
uses: vec!["e".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, body, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(body, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(body, catch, EdgeKind::Exception);
|
||
|
|
cfg.add_edge(catch, join, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(join, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Find the phi for `e`.
|
||
|
|
let phi_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| b.phis.iter().any(|p| p.var_name.as_deref() == Some("e")))
|
||
|
|
.expect("expected a phi for `e`");
|
||
|
|
let phi_for_e = phi_block
|
||
|
|
.phis
|
||
|
|
.iter()
|
||
|
|
.find(|p| p.var_name.as_deref() == Some("e"))
|
||
|
|
.unwrap();
|
||
|
|
let operands = match &phi_for_e.op {
|
||
|
|
SsaOp::Phi(ops) => ops,
|
||
|
|
_ => panic!("expected SsaOp::Phi for `e`"),
|
||
|
|
};
|
||
|
|
|
||
|
|
// Strict invariant: one operand per predecessor.
|
||
|
|
assert_eq!(
|
||
|
|
operands.len(),
|
||
|
|
phi_block.preds.len(),
|
||
|
|
"phi for `e` must have one operand per predecessor",
|
||
|
|
);
|
||
|
|
|
||
|
|
// At least one operand must reference the Undef sentinel (the
|
||
|
|
// Body-side edge where `e` has no reaching definition).
|
||
|
|
let found_inst = |v: SsaValue| -> Option<&SsaInst> {
|
||
|
|
ssa.blocks
|
||
|
|
.iter()
|
||
|
|
.flat_map(|b| b.phis.iter().chain(b.body.iter()))
|
||
|
|
.find(|i| i.value == v)
|
||
|
|
};
|
||
|
|
let any_undef = operands.iter().any(|(_, v)| {
|
||
|
|
found_inst(*v)
|
||
|
|
.map(|i| matches!(i.op, SsaOp::Undef))
|
||
|
|
.unwrap_or(false)
|
||
|
|
});
|
||
|
|
assert!(
|
||
|
|
any_undef,
|
||
|
|
"phi for `e` at the catch-join must reference SsaOp::Undef \
|
||
|
|
on the normal-path predecessor edge",
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn phi_assertion_helper_accepts_exact_operand_count() {
|
||
|
|
// Direct test of the assertion helper: a phi with exactly as many
|
||
|
|
// operands as the block has predecessors must not panic.
|
||
|
|
let dummy_node = NodeIndex::new(0);
|
||
|
|
let block = SsaBlock {
|
||
|
|
id: BlockId(1),
|
||
|
|
phis: vec![SsaInst {
|
||
|
|
value: SsaValue(0),
|
||
|
|
op: SsaOp::Phi(smallvec::smallvec![
|
||
|
|
(BlockId(0), SsaValue(1)),
|
||
|
|
(BlockId(2), SsaValue(2)),
|
||
|
|
]),
|
||
|
|
cfg_node: dummy_node,
|
||
|
|
var_name: Some("x".into()),
|
||
|
|
span: (0, 0),
|
||
|
|
}],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Unreachable,
|
||
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
||
|
|
succs: smallvec::smallvec![],
|
||
|
|
};
|
||
|
|
let block_preds = vec![vec![], vec![0, 2], vec![0]];
|
||
|
|
assert_phi_operand_counts(
|
||
|
|
&[
|
||
|
|
SsaBlock {
|
||
|
|
id: BlockId(0),
|
||
|
|
phis: vec![],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Goto(BlockId(1)),
|
||
|
|
preds: smallvec::smallvec![],
|
||
|
|
succs: smallvec::smallvec![BlockId(1)],
|
||
|
|
},
|
||
|
|
block,
|
||
|
|
SsaBlock {
|
||
|
|
id: BlockId(2),
|
||
|
|
phis: vec![],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Goto(BlockId(1)),
|
||
|
|
preds: smallvec::smallvec![BlockId(0)],
|
||
|
|
succs: smallvec::smallvec![BlockId(1)],
|
||
|
|
},
|
||
|
|
],
|
||
|
|
&block_preds,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[should_panic(expected = "SSA phi operand count does not match predecessor count")]
|
||
|
|
fn phi_assertion_helper_rejects_more_operands_than_preds() {
|
||
|
|
// A phi with MORE operands than preds references a nonexistent
|
||
|
|
// predecessor — unsound because downstream consumers either
|
||
|
|
// panic on the lookup or silently feed garbage taint into the
|
||
|
|
// join. Strict-equality invariant catches this.
|
||
|
|
let dummy_node = NodeIndex::new(0);
|
||
|
|
let block = SsaBlock {
|
||
|
|
id: BlockId(1),
|
||
|
|
phis: vec![SsaInst {
|
||
|
|
value: SsaValue(0),
|
||
|
|
op: SsaOp::Phi(smallvec::smallvec![
|
||
|
|
(BlockId(0), SsaValue(1)),
|
||
|
|
(BlockId(2), SsaValue(2)),
|
||
|
|
(BlockId(3), SsaValue(3)),
|
||
|
|
]),
|
||
|
|
cfg_node: dummy_node,
|
||
|
|
var_name: Some("x".into()),
|
||
|
|
span: (0, 0),
|
||
|
|
}],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Unreachable,
|
||
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
||
|
|
succs: smallvec::smallvec![],
|
||
|
|
};
|
||
|
|
let block_preds = vec![vec![], vec![0, 2]];
|
||
|
|
assert_phi_operand_counts(
|
||
|
|
&[
|
||
|
|
SsaBlock {
|
||
|
|
id: BlockId(0),
|
||
|
|
phis: vec![],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Goto(BlockId(1)),
|
||
|
|
preds: smallvec::smallvec![],
|
||
|
|
succs: smallvec::smallvec![BlockId(1)],
|
||
|
|
},
|
||
|
|
block,
|
||
|
|
],
|
||
|
|
&block_preds,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[should_panic(expected = "SSA phi operand count does not match predecessor count")]
|
||
|
|
fn phi_assertion_helper_rejects_fewer_operands_than_preds() {
|
||
|
|
// A phi with fewer operands than preds violates the strict-equality
|
||
|
|
// invariant: `fill_undef_phi_operands` is responsible for filling
|
||
|
|
// every missing slot with an Undef sentinel, so the final body
|
||
|
|
// should never have gaps. This test guards the post-pass.
|
||
|
|
let dummy_node = NodeIndex::new(0);
|
||
|
|
let block = SsaBlock {
|
||
|
|
id: BlockId(1),
|
||
|
|
phis: vec![SsaInst {
|
||
|
|
value: SsaValue(0),
|
||
|
|
op: SsaOp::Phi(smallvec::smallvec![(BlockId(0), SsaValue(1))]),
|
||
|
|
cfg_node: dummy_node,
|
||
|
|
var_name: Some("e".into()),
|
||
|
|
span: (0, 0),
|
||
|
|
}],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Unreachable,
|
||
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
||
|
|
succs: smallvec::smallvec![],
|
||
|
|
};
|
||
|
|
let block_preds = vec![vec![], vec![0, 2]];
|
||
|
|
assert_phi_operand_counts(
|
||
|
|
&[
|
||
|
|
SsaBlock {
|
||
|
|
id: BlockId(0),
|
||
|
|
phis: vec![],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Goto(BlockId(1)),
|
||
|
|
preds: smallvec::smallvec![],
|
||
|
|
succs: smallvec::smallvec![BlockId(1)],
|
||
|
|
},
|
||
|
|
block,
|
||
|
|
],
|
||
|
|
&block_preds,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
#[should_panic(expected = "SSA phi operand references nonexistent predecessor")]
|
||
|
|
fn phi_assertion_helper_rejects_wrong_pred_block() {
|
||
|
|
// A phi with the correct operand count but referencing a block
|
||
|
|
// that isn't actually a predecessor must also fail the invariant.
|
||
|
|
let dummy_node = NodeIndex::new(0);
|
||
|
|
let block = SsaBlock {
|
||
|
|
id: BlockId(1),
|
||
|
|
phis: vec![SsaInst {
|
||
|
|
value: SsaValue(0),
|
||
|
|
op: SsaOp::Phi(smallvec::smallvec![
|
||
|
|
(BlockId(0), SsaValue(1)),
|
||
|
|
(BlockId(3), SsaValue(2)),
|
||
|
|
]),
|
||
|
|
cfg_node: dummy_node,
|
||
|
|
var_name: Some("x".into()),
|
||
|
|
span: (0, 0),
|
||
|
|
}],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Unreachable,
|
||
|
|
preds: smallvec::smallvec![BlockId(0), BlockId(2)],
|
||
|
|
succs: smallvec::smallvec![],
|
||
|
|
};
|
||
|
|
let block_preds = vec![vec![], vec![0, 2]];
|
||
|
|
assert_phi_operand_counts(
|
||
|
|
&[
|
||
|
|
SsaBlock {
|
||
|
|
id: BlockId(0),
|
||
|
|
phis: vec![],
|
||
|
|
body: vec![],
|
||
|
|
terminator: Terminator::Goto(BlockId(1)),
|
||
|
|
preds: smallvec::smallvec![],
|
||
|
|
succs: smallvec::smallvec![BlockId(1)],
|
||
|
|
},
|
||
|
|
block,
|
||
|
|
],
|
||
|
|
&block_preds,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn three_successor_collapse_produces_switch() {
|
||
|
|
// Build a CFG where a single node has 3 successors. The
|
||
|
|
// structured `Terminator::Switch` replaced the old
|
||
|
|
// `Goto(first)` collapse so every target is visible on the
|
||
|
|
// terminator shape (not only on `block.succs`).
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let branch = cfg.add_node(make_node(StmtKind::If));
|
||
|
|
let s0 = cfg.add_node(make_node(StmtKind::Seq));
|
||
|
|
let s1 = cfg.add_node(make_node(StmtKind::Seq));
|
||
|
|
let s2 = cfg.add_node(make_node(StmtKind::Seq));
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, branch, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(branch, s0, EdgeKind::True);
|
||
|
|
cfg.add_edge(branch, s1, EdgeKind::False);
|
||
|
|
cfg.add_edge(branch, s2, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(s0, exit, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(s1, exit, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(s2, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
assert!(!ssa.blocks.is_empty());
|
||
|
|
|
||
|
|
let switch_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| matches!(b.terminator, Terminator::Switch { .. }) && b.succs.len() >= 3)
|
||
|
|
.expect("expected a block with a Switch terminator and ≥3 succs");
|
||
|
|
|
||
|
|
assert_eq!(
|
||
|
|
switch_block.succs.len(),
|
||
|
|
3,
|
||
|
|
"≥3-successor lowering must retain all succs on block.succs, got {:?}",
|
||
|
|
switch_block.succs
|
||
|
|
);
|
||
|
|
|
||
|
|
if let Terminator::Switch {
|
||
|
|
targets, default, ..
|
||
|
|
} = &switch_block.terminator
|
||
|
|
{
|
||
|
|
// Default is the first succ (deterministic ordering); the
|
||
|
|
// remaining N-1 succs populate `targets` in order.
|
||
|
|
assert_eq!(
|
||
|
|
*default, switch_block.succs[0],
|
||
|
|
"Switch default must match succs[0]"
|
||
|
|
);
|
||
|
|
assert_eq!(
|
||
|
|
targets.len(),
|
||
|
|
switch_block.succs.len() - 1,
|
||
|
|
"Switch targets must cover every succ except default"
|
||
|
|
);
|
||
|
|
for (i, t) in targets.iter().enumerate() {
|
||
|
|
assert_eq!(
|
||
|
|
*t,
|
||
|
|
switch_block.succs[i + 1],
|
||
|
|
"Switch target[{i}] must match succs[{}]",
|
||
|
|
i + 1
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn normal_two_successor_produces_branch() {
|
||
|
|
// Regression: normal 2-successor case should still produce Branch
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
let if_node = cfg.add_node(make_node(StmtKind::If));
|
||
|
|
let t = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let f = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("x".into()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, if_node, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(if_node, t, EdgeKind::True);
|
||
|
|
cfg.add_edge(if_node, f, EdgeKind::False);
|
||
|
|
cfg.add_edge(t, exit, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(f, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
let has_branch = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.any(|b| matches!(b.terminator, Terminator::Branch { .. }));
|
||
|
|
assert!(
|
||
|
|
has_branch,
|
||
|
|
"normal 2-successor case must produce Branch, not Goto"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Regression: a block containing an explicit Return CFG node must
|
||
|
|
/// terminate with [`Terminator::Return`], never [`Terminator::Goto`]
|
||
|
|
/// to a synthetic exit block. Previously, the bookkeeping
|
||
|
|
/// `Return → fn_exit` `Seq` edge made early-return blocks fall into
|
||
|
|
/// the single-successor `Goto` arm, and the fall-through tail
|
||
|
|
/// expression's body got merged into the shared exit block — every
|
||
|
|
/// early-return path therefore appeared to also execute the tail.
|
||
|
|
/// Mirrors the `if cond { return X; } Y` shape that motivated the fix.
|
||
|
|
#[test]
|
||
|
|
fn early_return_block_terminates_with_return_not_goto_to_exit() {
|
||
|
|
let mut cfg: Cfg = Graph::new();
|
||
|
|
let entry = cfg.add_node(make_node(StmtKind::Entry));
|
||
|
|
// Param-style external use (x is read by the if condition).
|
||
|
|
let if_node = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::If)
|
||
|
|
});
|
||
|
|
// True branch: return constant. uses=[] + const_text=Some triggers
|
||
|
|
// the literal-return path, ensuring the block emits a synthetic
|
||
|
|
// Const + Return(Some(_)) — the same shape `return None` /
|
||
|
|
// `return String::new()` produces in real Rust code.
|
||
|
|
let early_ret = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
const_text: Some("\"\"".to_string()),
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Return)
|
||
|
|
});
|
||
|
|
// False branch: tail expression that defines `y` (the implicit
|
||
|
|
// function return value).
|
||
|
|
let tail = cfg.add_node(NodeInfo {
|
||
|
|
taint: TaintMeta {
|
||
|
|
defines: Some("y".into()),
|
||
|
|
uses: vec!["x".into()],
|
||
|
|
..Default::default()
|
||
|
|
},
|
||
|
|
..make_node(StmtKind::Seq)
|
||
|
|
});
|
||
|
|
let exit = cfg.add_node(make_node(StmtKind::Exit));
|
||
|
|
|
||
|
|
cfg.add_edge(entry, if_node, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(if_node, early_ret, EdgeKind::True);
|
||
|
|
cfg.add_edge(if_node, tail, EdgeKind::False);
|
||
|
|
// Bookkeeping wire-up the real CFG construction performs in
|
||
|
|
// `build_cfg` — Return / Throw → fn_exit via Seq — so the SSA
|
||
|
|
// lowering has to handle it.
|
||
|
|
cfg.add_edge(early_ret, exit, EdgeKind::Seq);
|
||
|
|
cfg.add_edge(tail, exit, EdgeKind::Seq);
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(&cfg, entry, None, true).unwrap();
|
||
|
|
|
||
|
|
// Locate the block containing the early-return CFG node and
|
||
|
|
// assert it terminates with Return — not Goto(_) into the
|
||
|
|
// shared exit block.
|
||
|
|
let early_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| {
|
||
|
|
b.body
|
||
|
|
.iter()
|
||
|
|
.chain(b.phis.iter())
|
||
|
|
.any(|inst| inst.cfg_node == early_ret)
|
||
|
|
})
|
||
|
|
.expect("early-return CFG node must live in some SSA block");
|
||
|
|
assert!(
|
||
|
|
matches!(early_block.terminator, Terminator::Return(_)),
|
||
|
|
"early-return block must terminate with Return, got {:?}",
|
||
|
|
early_block.terminator
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
early_block.succs.is_empty(),
|
||
|
|
"early-return block must have no successors at the block level, \
|
||
|
|
got succs = {:?}",
|
||
|
|
early_block.succs
|
||
|
|
);
|
||
|
|
|
||
|
|
// The fall-through (tail) block must NOT have the early-return
|
||
|
|
// block as a predecessor. Pre-fix, both the early-return path
|
||
|
|
// and the tail path merged into the shared fn_exit block, so the
|
||
|
|
// tail's body was reachable from the early-return path — that's
|
||
|
|
// the merged-return defect.
|
||
|
|
let tail_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| {
|
||
|
|
b.body
|
||
|
|
.iter()
|
||
|
|
.chain(b.phis.iter())
|
||
|
|
.any(|inst| inst.cfg_node == tail)
|
||
|
|
})
|
||
|
|
.expect("tail CFG node must live in some SSA block");
|
||
|
|
let early_block_id = early_block.id;
|
||
|
|
assert!(
|
||
|
|
!tail_block.preds.contains(&early_block_id),
|
||
|
|
"tail block must not have early-return block as a predecessor; \
|
||
|
|
merged-return defect would re-emerge. tail.preds = {:?}, \
|
||
|
|
early_block_id = {:?}",
|
||
|
|
tail_block.preds,
|
||
|
|
early_block_id
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Regression: an OR-chain rejection arm such as
|
||
|
|
/// `if a || b || c { return X; } Y` must have its rejection body emit a
|
||
|
|
/// `Terminator::Return(_)` and have `succs.is_empty()`. Pre-fix the
|
||
|
|
/// rejection body's String::new() Call shared a block whose only
|
||
|
|
/// successor was the merged tail — losing the early-return semantics
|
||
|
|
/// entirely and diluting per-return-path PathFact narrowing.
|
||
|
|
#[test]
|
||
|
|
fn or_chain_rejection_block_terminates_with_return() {
|
||
|
|
use crate::cfg::build_cfg;
|
||
|
|
|
||
|
|
let src = br#"
|
||
|
|
fn sanitize_path(s: &str) -> String {
|
||
|
|
if s.contains("..") || s.starts_with('/') || s.starts_with('\\') {
|
||
|
|
return String::new();
|
||
|
|
}
|
||
|
|
s.to_string()
|
||
|
|
}
|
||
|
|
"#;
|
||
|
|
let mut parser = tree_sitter::Parser::new();
|
||
|
|
parser
|
||
|
|
.set_language(&tree_sitter::Language::from(tree_sitter_rust::LANGUAGE))
|
||
|
|
.unwrap();
|
||
|
|
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
|
|
let file_cfg = build_cfg(&tree, src.as_slice(), "rust", "test.rs", None);
|
||
|
|
let body = if file_cfg.bodies.len() > 1 {
|
||
|
|
&file_cfg.bodies[1]
|
||
|
|
} else {
|
||
|
|
file_cfg.first_body()
|
||
|
|
};
|
||
|
|
let cfg = &body.graph;
|
||
|
|
let entry = body.entry;
|
||
|
|
|
||
|
|
// Locate the Return CFG node sourced from the if-body and the tail
|
||
|
|
// expression's Call node so the assertions are meaningful even if
|
||
|
|
// block ordering shifts.
|
||
|
|
let mut rejection_call: Option<NodeIndex> = None;
|
||
|
|
for idx in cfg.node_indices() {
|
||
|
|
let info = &cfg[idx];
|
||
|
|
if info.kind == StmtKind::Call {
|
||
|
|
if let Some(callee) = &info.call.callee {
|
||
|
|
if callee == "String::new" || callee.ends_with("String::new") {
|
||
|
|
rejection_call = Some(idx);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
let rejection_call = rejection_call
|
||
|
|
.expect("CFG must contain a String::new() Call node for the rejection arm");
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(cfg, entry, None, true).expect("SSA lowering should succeed");
|
||
|
|
|
||
|
|
// Find the SSA block containing the String::new() Call. This is
|
||
|
|
// the rejection-arm block.
|
||
|
|
let rejection_block = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.find(|b| {
|
||
|
|
b.body
|
||
|
|
.iter()
|
||
|
|
.chain(b.phis.iter())
|
||
|
|
.any(|inst| inst.cfg_node == rejection_call)
|
||
|
|
})
|
||
|
|
.expect("rejection-arm Call must live in some SSA block");
|
||
|
|
|
||
|
|
assert!(
|
||
|
|
rejection_block.succs.is_empty(),
|
||
|
|
"rejection-arm block must have no block-level successors after \
|
||
|
|
return-frontier strip; got succs = {:?}",
|
||
|
|
rejection_block.succs
|
||
|
|
);
|
||
|
|
assert!(
|
||
|
|
matches!(rejection_block.terminator, Terminator::Return(_)),
|
||
|
|
"rejection-arm block must terminate with Terminator::Return; got {:?}",
|
||
|
|
rejection_block.terminator
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Cross-language regression: the same merged-return defect that the Rust
|
||
|
|
/// fix closed must not appear in C. The C OR-chain shape from
|
||
|
|
/// `tests/benchmark/corpus/c/safe/safe_direct_path_sanitizer.c` has both
|
||
|
|
/// a rejection arm (`return ""`) and a tail return (`return s`). Both
|
||
|
|
/// must produce blocks whose terminator is `Terminator::Return(_)`.
|
||
|
|
#[test]
|
||
|
|
fn c_or_chain_both_return_arms_terminate_with_return() {
|
||
|
|
use crate::cfg::build_cfg;
|
||
|
|
|
||
|
|
let src = br#"
|
||
|
|
const char *sanitize_path(const char *s) {
|
||
|
|
if (strstr(s, "..") != NULL || s[0] == '/' || s[0] == '\\') {
|
||
|
|
return "";
|
||
|
|
}
|
||
|
|
return s;
|
||
|
|
}
|
||
|
|
"#;
|
||
|
|
let mut parser = tree_sitter::Parser::new();
|
||
|
|
parser
|
||
|
|
.set_language(&tree_sitter::Language::from(tree_sitter_c::LANGUAGE))
|
||
|
|
.unwrap();
|
||
|
|
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||
|
|
let file_cfg = build_cfg(&tree, src.as_slice(), "c", "test.c", None);
|
||
|
|
let body = file_cfg.first_body();
|
||
|
|
let cfg = &body.graph;
|
||
|
|
let entry = body.entry;
|
||
|
|
|
||
|
|
let ssa = lower_to_ssa(cfg, entry, None, true).expect("SSA lowering should succeed");
|
||
|
|
|
||
|
|
let return_blocks: Vec<&SsaBlock> = ssa
|
||
|
|
.blocks
|
||
|
|
.iter()
|
||
|
|
.filter(|b| matches!(b.terminator, Terminator::Return(_)))
|
||
|
|
.collect();
|
||
|
|
assert!(
|
||
|
|
return_blocks.len() >= 2,
|
||
|
|
"Expected ≥2 Return-terminated blocks (rejection arm + tail); got {}: {:?}",
|
||
|
|
return_blocks.len(),
|
||
|
|
ssa.blocks
|
||
|
|
.iter()
|
||
|
|
.map(|b| (b.id, &b.terminator))
|
||
|
|
.collect::<Vec<_>>()
|
||
|
|
);
|
||
|
|
|
||
|
|
// Each Return-terminated block must have an empty successor list
|
||
|
|
// (no fall-through past Return).
|
||
|
|
for b in &return_blocks {
|
||
|
|
assert!(
|
||
|
|
b.succs.is_empty(),
|
||
|
|
"Return-terminated block id={:?} has succs={:?}",
|
||
|
|
b.id,
|
||
|
|
b.succs
|
||
|
|
);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|