#![allow(clippy::collapsible_if)] use std::collections::HashMap; use super::ir::*; use crate::cfg::Cfg; /// Run copy propagation on an SSA body. /// /// Identifies `Assign([single_use])` instructions where the CFG node has no /// labels (i.e., no semantic significance like sanitizer/source), then rewrites /// all uses of the destination value to use the source value directly. /// /// Returns `(copies_eliminated, resolved_replacement_map)`. The replacement map /// maps each eliminated destination SsaValue to its transitive root source /// SsaValue, used downstream by alias analysis to recover base-variable /// aliasing relationships. pub fn copy_propagate(body: &mut SsaBody, cfg: &Cfg) -> (usize, HashMap) { // 1. Identify copies: Assign with single operand and no labels on CFG node let mut replace_map: HashMap = HashMap::new(); for block in &body.blocks { for inst in &block.body { if let SsaOp::Assign(uses) = &inst.op { if uses.len() == 1 { let src = uses[0]; let info = &cfg[inst.cfg_node]; // Skip if the node has labels — sanitizers, sources, sinks // have semantic meaning that must be preserved. if !info.taint.labels.is_empty() { continue; } // Skip numeric-length reads (`arr.length`, `map.size`, etc.): // the destination is Int-typed (a derived property of the // source) while the source is typically String/Object/ // Unknown. Copy-propagating through this Assign would // erase the Int type fact and defeat HTML_ESCAPE / SQL / // FILE_IO / SHELL sink suppression. if info.is_numeric_length_access { continue; } // Skip Assigns whose CFG node carries a `string_prefix` // (template literals or `"lit" + var` RHS recognised by // `extract_template_prefix`). The abstract-interpretation // `transfer_abstract` consumes that prefix to seed a // StringFact on the Assign's SSA value, which downstream // SSRF suppression reads. Propagating past this Assign // erases the prefix-bearing SSA value: the Call's args get // rewritten to the bare upstream variable (no prefix), and // `is_call_abstract_safe` falls through to a tainted-flow // emission even on safe fixed-host URLs. if info.string_prefix.is_some() { continue; } replace_map.insert(inst.value, src); } } } } if replace_map.is_empty() { return (0, HashMap::new()); } // 2. Build transitive replacement map: chase chains (SSA is acyclic) let mut resolved: HashMap = HashMap::new(); for &dst in replace_map.keys() { let root = resolve_root(dst, &replace_map); resolved.insert(dst, root); } // 3. Rewrite all uses let mut count = 0; for block in &mut body.blocks { // Rewrite phi operands for phi in &mut block.phis { if let SsaOp::Phi(operands) = &mut phi.op { for (_bid, val) in operands.iter_mut() { if let Some(&root) = resolved.get(val) { *val = root; } } } } // Rewrite body instructions for inst in &mut block.body { match &mut inst.op { SsaOp::Assign(uses) => { for val in uses.iter_mut() { if let Some(&root) = resolved.get(val) { *val = root; } } } SsaOp::Call { args, receiver, .. } => { if let Some(rv) = receiver { if let Some(&root) = resolved.get(rv) { *rv = root; } } for arg in args.iter_mut() { for val in arg.iter_mut() { if let Some(&root) = resolved.get(val) { *val = root; } } } } _ => {} } } } // 4. Convert copy instructions to Nop (DCE will clean up) for block in &mut body.blocks { for inst in &mut block.body { if resolved.contains_key(&inst.value) { inst.op = SsaOp::Nop; count += 1; } } } (count, resolved) } /// Chase the replacement chain to find the root value. fn resolve_root(val: SsaValue, map: &HashMap) -> SsaValue { let mut current = val; // Safety: SSA is acyclic, but cap iterations to be safe for _ in 0..1000 { match map.get(¤t) { Some(&next) if next != current => current = next, _ => break, } } current } #[cfg(test)] mod tests { use super::*; use crate::cfg::{NodeInfo, StmtKind}; use petgraph::Graph; use smallvec::SmallVec; fn make_cfg_node(kind: StmtKind) -> NodeInfo { NodeInfo { kind, ..Default::default() } } #[test] fn simple_copy_eliminated() { // v0 = const("42"), v1 = assign(v0), v2 = assign(v1) let mut cfg: Cfg = Graph::new(); let n0 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let n1 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let n2 = cfg.add_node(make_cfg_node(StmtKind::Seq)); let mut body = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![ SsaInst { value: SsaValue(0), op: SsaOp::Const(Some("42".into())), cfg_node: n0, var_name: Some("x".into()), span: (0, 2), }, SsaInst { value: SsaValue(1), op: SsaOp::Assign(SmallVec::from_elem(SsaValue(0), 1)), cfg_node: n1, var_name: Some("y".into()), span: (3, 5), }, SsaInst { value: SsaValue(2), op: SsaOp::Assign(SmallVec::from_elem(SsaValue(1), 1)), cfg_node: n2, var_name: Some("z".into()), span: (6, 8), }, ], terminator: Terminator::Return(None), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![ ValueDef { var_name: Some("x".into()), cfg_node: n0, block: BlockId(0), }, ValueDef { var_name: Some("y".into()), cfg_node: n1, block: BlockId(0), }, ValueDef { var_name: Some("z".into()), cfg_node: n2, block: BlockId(0), }, ], cfg_node_map: [(n0, SsaValue(0)), (n1, SsaValue(1)), (n2, SsaValue(2))] .into_iter() .collect(), exception_edges: vec![], }; let (eliminated, copy_map) = copy_propagate(&mut body, &cfg); assert_eq!(eliminated, 2); // Both v1 and v2 should map to v0 (the root) assert_eq!(copy_map.get(&SsaValue(1)), Some(&SsaValue(0))); assert_eq!(copy_map.get(&SsaValue(2)), Some(&SsaValue(0))); // v1 and v2 should be Nop now assert!(matches!(body.blocks[0].body[1].op, SsaOp::Nop)); assert!(matches!(body.blocks[0].body[2].op, SsaOp::Nop)); } }