use std::collections::HashMap; use super::ir::*; use crate::cfg::Cfg; use crate::labels::DataLabel; /// Eliminate dead definitions from an SSA body. /// /// A definition is dead if its SsaValue has zero uses across the entire body, /// except for instructions that must be preserved: /// - `Source` (taint origin, must survive for correctness) /// - `Call` (may have side effects) /// - `CatchParam` (exception binding) /// - Instructions whose CFG node has Sink labels (sink detection relies on them) /// /// Returns the number of instructions removed. pub fn eliminate_dead_defs(body: &mut SsaBody, cfg: &Cfg) -> usize { let mut total_removed = 0; // Iterate until no more removals (removing a def may make its operands dead) loop { let use_counts = build_use_counts(body); let mut removed_this_pass = 0; for block in &mut body.blocks { // Remove dead body instructions let before = block.body.len(); block.body.retain(|inst| !is_dead(inst, &use_counts, cfg)); removed_this_pass += before - block.body.len(); // Remove dead phi instructions let before_phis = block.phis.len(); block.phis.retain(|inst| !is_dead(inst, &use_counts, cfg)); removed_this_pass += before_phis - block.phis.len(); } total_removed += removed_this_pass; if removed_this_pass == 0 { break; } } total_removed } /// Build a map of SsaValue → number of uses across all instructions and /// block terminators. /// /// Terminator uses must be counted: `Terminator::Return(rv)` references the /// returned value and `Terminator::Branch { condition, .. }` references the /// condition variable. Without counting these, a value used solely by a /// terminator (the canonical case for short helpers like /// `def f(s): return s`) is judged dead, and DCE strips every instruction /// in the body — leaving empty blocks whose terminators reference /// nonexistent SsaValues, breaking downstream analyses (per-return-path /// PathFact narrowing, inline-summary extraction, etc.). fn build_use_counts(body: &SsaBody) -> HashMap { let mut counts: HashMap = HashMap::new(); for block in &body.blocks { for inst in block.phis.iter().chain(block.body.iter()) { for v in inst_used_values(inst) { *counts.entry(v).or_insert(0) += 1; } } for v in terminator_used_values(&block.terminator) { *counts.entry(v).or_insert(0) += 1; } } counts } /// Get all SSA values used by a block terminator. fn terminator_used_values(term: &Terminator) -> Vec { use crate::constraint::lower::{ConditionExpr, Operand}; match term { Terminator::Return(Some(rv)) => vec![*rv], Terminator::Return(None) => Vec::new(), Terminator::Branch { condition, .. } => match condition.as_deref() { Some(ConditionExpr::BoolTest { var }) => vec![*var], Some(ConditionExpr::NullCheck { var, .. }) => vec![*var], Some(ConditionExpr::TypeCheck { var, .. }) => vec![*var], Some(ConditionExpr::Comparison { lhs, rhs, .. }) => { let mut out = Vec::new(); if let Operand::Value(v) = lhs { out.push(*v); } if let Operand::Value(v) = rhs { out.push(*v); } out } Some(ConditionExpr::Unknown) | None => Vec::new(), }, Terminator::Switch { scrutinee, .. } => vec![*scrutinee], Terminator::Goto(_) | Terminator::Unreachable => Vec::new(), } } /// Check if an instruction is dead and safe to remove. fn is_dead(inst: &SsaInst, use_counts: &HashMap, cfg: &Cfg) -> bool { let uses = use_counts.get(&inst.value).copied().unwrap_or(0); if uses > 0 { return false; } // Never remove side-effectful or semantically required instructions match &inst.op { SsaOp::Source => return false, SsaOp::Call { .. } => return false, SsaOp::CatchParam => return false, _ => {} } // Never remove instructions whose CFG node has Sink, Source, or Sanitizer labels if cfg.node_weight(inst.cfg_node).is_some_and(|info| { info.taint.labels.iter().any(|l| { matches!( l, DataLabel::Sink(_) | DataLabel::Source(_) | DataLabel::Sanitizer(_) ) }) }) { return false; } true } /// Get all SSA values used by an instruction. fn inst_used_values(inst: &SsaInst) -> Vec { match &inst.op { SsaOp::Phi(operands) => operands.iter().map(|(_, v)| *v).collect(), SsaOp::Assign(uses) => uses.to_vec(), SsaOp::Call { args, receiver, .. } => { let mut vals = Vec::new(); if let Some(rv) = receiver { vals.push(*rv); } for arg in args { vals.extend(arg.iter()); } vals } SsaOp::Source | SsaOp::Const(_) | SsaOp::Param { .. } | SsaOp::SelfParam | SsaOp::CatchParam | SsaOp::Nop | SsaOp::Undef => Vec::new(), } } #[cfg(test)] mod tests { use super::*; use crate::cfg::{NodeInfo, StmtKind}; use petgraph::Graph; use smallvec::SmallVec; fn make_cfg_node(kind: StmtKind) -> NodeInfo { NodeInfo { kind, ..Default::default() } } #[test] fn dead_const_removed() { // v0 = const("42") — unused, should be removed // v1 = source() — must survive even if unused let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let n1 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![ SsaInst { value: SsaValue(0), op: SsaOp::Const(Some("42".into())), cfg_node: n0, var_name: Some("x".into()), span: (0, 2), }, SsaInst { value: SsaValue(1), op: SsaOp::Source, cfg_node: n1, var_name: Some("tainted".into()), span: (3, 10), }, ], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ ValueDef { var_name: Some("x".into()), cfg_node: n0, block: BlockId(0), }, ValueDef { var_name: Some("tainted".into()), cfg_node: n1, block: BlockId(0), }, ], cfg_node_map: [(n0, SsaValue(0)), (n1, SsaValue(1))].into_iter().collect(), exception_edges: vec![], }; let removed = eliminate_dead_defs(&mut body, &cfg); assert_eq!(removed, 1); assert_eq!(body.blocks[0].body.len(), 1); // Source survives assert!(matches!(body.blocks[0].body[0].op, SsaOp::Source)); } #[test] fn dead_sanitizer_label_preserved() { // v0 has a Sanitizer label on its CFG node — must survive even if unused use crate::labels::{Cap, DataLabel}; let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(NodeInfo { taint: crate::cfg::TaintMeta { labels: smallvec::smallvec![DataLabel::Sanitizer(Cap::HTML_ESCAPE)], ..Default::default() }, ..make_cfg_node(StmtKind::Seq) }); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Assign(SmallVec::new()), cfg_node: n0, var_name: Some("sanitized".into()), span: (0, 5), }], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ValueDef { var_name: Some("sanitized".into()), cfg_node: n0, block: BlockId(0), }], cfg_node_map: [(n0, SsaValue(0))].into_iter().collect(), exception_edges: vec![], }; let removed = eliminate_dead_defs(&mut body, &cfg); assert_eq!( removed, 0, "Sanitizer-labeled instruction must not be removed" ); assert_eq!(body.blocks[0].body.len(), 1); } #[test] fn dead_source_label_preserved() { // v0 has a Source label on its CFG node — must survive even if unused use crate::labels::{Cap, DataLabel}; let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(NodeInfo { taint: crate::cfg::TaintMeta { labels: smallvec::smallvec![DataLabel::Source(Cap::all())], ..Default::default() }, ..make_cfg_node(StmtKind::Seq) }); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Assign(SmallVec::new()), cfg_node: n0, var_name: Some("src".into()), span: (0, 3), }], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ValueDef { var_name: Some("src".into()), cfg_node: n0, block: BlockId(0), }], cfg_node_map: [(n0, SsaValue(0))].into_iter().collect(), exception_edges: vec![], }; let removed = eliminate_dead_defs(&mut body, &cfg); assert_eq!(removed, 0, "Source-labeled instruction must not be removed"); } #[test] fn dead_sink_label_still_preserved() { // Regression: Sink-labeled dead instructions must still be kept use crate::labels::{Cap, DataLabel}; let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(NodeInfo { taint: crate::cfg::TaintMeta { labels: smallvec::smallvec![DataLabel::Sink(Cap::SQL_QUERY)], ..Default::default() }, ..make_cfg_node(StmtKind::Seq) }); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Assign(SmallVec::new()), cfg_node: n0, var_name: Some("q".into()), span: (0, 2), }], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ValueDef { var_name: Some("q".into()), cfg_node: n0, block: BlockId(0), }], cfg_node_map: [(n0, SsaValue(0))].into_iter().collect(), exception_edges: vec![], }; let removed = eliminate_dead_defs(&mut body, &cfg); assert_eq!(removed, 0, "Sink-labeled instruction must not be removed"); } #[test] fn dead_unlabeled_assign_still_removed() { // Negative test: unlabeled dead assignments must still be eliminated let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Assign(SmallVec::new()), cfg_node: n0, var_name: Some("dead".into()), span: (0, 4), }], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ValueDef { var_name: Some("dead".into()), cfg_node: n0, block: BlockId(0), }], cfg_node_map: [(n0, SsaValue(0))].into_iter().collect(), exception_edges: vec![], }; let removed = eliminate_dead_defs(&mut body, &cfg); assert_eq!(removed, 1, "unlabeled dead assignment must be removed"); assert!(body.blocks[0].body.is_empty()); } #[test] fn used_def_preserved() { // v0 = const("42"), v1 = assign(v0) — v0 is used, both survive let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let n1 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![ SsaInst { value: SsaValue(0), op: SsaOp::Const(Some("42".into())), cfg_node: n0, var_name: Some("x".into()), span: (0, 2), }, SsaInst { value: SsaValue(1), op: SsaOp::Assign(SmallVec::from_elem(SsaValue(0), 1)), cfg_node: n1, var_name: Some("y".into()), span: (3, 5), }, ], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ ValueDef { var_name: Some("x".into()), cfg_node: n0, block: BlockId(0), }, ValueDef { var_name: Some("y".into()), cfg_node: n1, block: BlockId(0), }, ], cfg_node_map: [(n0, SsaValue(0)), (n1, SsaValue(1))].into_iter().collect(), exception_edges: vec![], }; let removed = eliminate_dead_defs(&mut body, &cfg); // v1 is dead (unused), but v0 is used by v1 so on first pass only v1 removed, // then v0 becomes dead on second pass assert_eq!(removed, 2); assert_eq!(body.blocks[0].body.len(), 0); } }