//! Parameter-granularity points-to analysis. //! //! Produces a [`PointsToSummary`] for a function body by walking the SSA //! once and recording two classes of aliasing: //! //! 1. **Param → Param field writes.** An `obj.field = val` where `obj` //! traces back to parameter `b` and `val` traces back to parameter `a` //! emits a `Param(a) → Param(b)` `MayAlias` edge. This captures the //! `mutating_helper` pattern, the callee mutates a shared heap cell //! through one parameter and the caller observes the mutation through //! its argument for that parameter. //! //! 2. **Param → Return aliases.** `Terminator::Return(v)` where `v` //! traces back to a parameter emits a `Param(i) → Return` edge. This //! captures the `returned_alias` pattern, the callee returns its //! argument unchanged and the caller treats the result as aliasing the //! input. //! //! Field-write detection uses the existing SSA lowering convention: a //! source-level `obj.x = val` is lowered to an `Assign` whose `var_name` //! is the dotted path `"obj.x"`, plus synthetic parent-path Assigns that //! propagate the write up to the base (`"obj"`). See //! [`crate::ssa::lower`]'s "Synthetic base update" block for the //! canonical source. //! //! The analysis is **flow-insensitive** and **bounded**: it does not //! reason about path feasibility, and it stops adding edges once the //! summary's `MAX_ALIAS_EDGES` cap is reached, the overflow flag is //! the conservative fallback that callers honour. use std::collections::{HashMap, HashSet}; use smallvec::SmallVec; use crate::summary::points_to::{AliasKind, AliasPosition, PointsToSummary}; use crate::symbol::Lang; use super::ir::{SsaBody, SsaOp, SsaValue, Terminator}; /// Map an SSA value back to its defining instruction's op. /// /// Local to this module, the taint engine has its own `build_inst_map` /// that also carries receiver info we do not need, and duplicating it /// keeps this analysis independent of that private helper's shape. fn build_op_map(ssa: &SsaBody) -> HashMap { let mut map = HashMap::with_capacity(ssa.num_values()); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { map.insert(inst.value, inst.op.clone()); } } map } /// Sibling of [`build_op_map`] that captures the optional `var_name` /// recorded on each SSA instruction. Used alongside the op map so a /// [`ParamHit`] can surface the underlying variable name for /// formal-index resolution. fn build_var_name_map(ssa: &SsaBody) -> HashMap> { let mut map = HashMap::with_capacity(ssa.num_values()); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { map.insert(inst.value, inst.var_name.clone()); } } map } /// Information about an SSA `Param { index }` node needed to resolve /// back to a caller-side positional index via formal-params lookup. #[derive(Clone, Debug)] struct ParamHit { /// The `SsaOp::Param` index as lowered. ssa_index: usize, /// The parameter's variable name (from [`SsaInst::var_name`]). Used /// to map back to the formal-declaration position, the caller's /// `args[i]` slot is keyed by declaration position, not by SSA /// index, and the two can disagree when a formal parameter is /// skipped from SSA lowering (e.g., pure-output params). var_name: Option, } /// Walk Assign/Phi chains to find a backing `Param { index }` SSA op. /// /// Returns the `SsaOp::Param`'s index *and* its var_name so callers can /// resolve the formal-positional index via the name lookup table, the /// two indices can disagree when SSA lowering skips a formal parameter /// (never used as a read), shifting subsequent param indices down. fn trace_to_param_hit( v: SsaValue, op_map: &HashMap, var_names: &HashMap>, visited: &mut HashSet, ) -> Option { if !visited.insert(v) { return None; } match op_map.get(&v)? { SsaOp::Param { index } => Some(ParamHit { ssa_index: *index, var_name: var_names.get(&v).cloned().flatten(), }), SsaOp::Assign(uses) => { for u in uses { if let Some(hit) = trace_to_param_hit(*u, op_map, var_names, visited) { return Some(hit); } } None } SsaOp::Phi(operands) => { for (_, pv) in operands { if let Some(hit) = trace_to_param_hit(*pv, op_map, var_names, visited) { return Some(hit); } } None } // Call produces a fresh identity; Const / Source / CatchParam / // SelfParam / Nop are not param-derived. _ => None, } } /// Resolve a [`ParamHit`] to a caller-side positional index using the /// formal-params name lookup. Falls back to the SSA `index` when no /// name-based match exists (e.g., extractor called without /// `formal_param_names`). fn param_hit_to_formal_index(hit: &ParamHit, params_by_name: &HashMap) -> usize { if let Some(name) = &hit.var_name && let Some(&idx) = params_by_name.get(name) { return idx; } hit.ssa_index } /// Parse the base of a dotted / indexed path into its root name. /// /// * `"obj"` → `"obj"` /// * `"obj.field"` → `"obj"` /// * `"obj.field.sub"` → `"obj"` /// * `"obj[0]"` → `"obj"` /// * `"obj.list[2].name"` → `"obj"` /// /// Used to decide whether a field-style Assign's LHS base names a /// parameter variable, we strip everything after the first separator /// and compare the remainder to the recorded param names. fn base_of_path(name: &str) -> &str { let dot = name.find('.'); let bracket = name.find('['); let end = match (dot, bracket) { (Some(d), Some(b)) => d.min(b), (Some(d), None) => d, (None, Some(b)) => b, (None, None) => return name, }; &name[..end] } /// Local receiver check duplicated to avoid depending on private /// `lower::is_receiver_name`. Must stay in sync with that helper. fn is_receiver_name_local(name: &str) -> bool { matches!(name, "self" | "this") } /// Walk Assign/Phi chains from a return value to decide whether the path /// ends at a fresh container allocation (literal or constructor call). /// /// Returns `true` the first time a qualifying allocation is found. /// Parameter-terminated paths, `Call` ops that are not container /// constructors, and constants that are not container literals all /// return `false`, soundly under-approximating, since the caller will /// simply fall back to the existing `Param(i) → Return` / store-into- /// heap channels when the flag is absent. fn trace_to_fresh_alloc( v: SsaValue, op_map: &HashMap, lang: Option, visited: &mut HashSet, ) -> bool { if !visited.insert(v) { return false; } let Some(op) = op_map.get(&v) else { return false; }; match op { SsaOp::Const(Some(text)) => crate::ssa::heap::is_container_literal_public(text), SsaOp::Call { callee, .. } => lang .map(|l| crate::ssa::heap::is_container_constructor(callee, l)) .unwrap_or(false), SsaOp::Assign(uses) => uses .iter() .any(|u| trace_to_fresh_alloc(*u, op_map, lang, visited)), SsaOp::Phi(operands) => operands .iter() .any(|(_, pv)| trace_to_fresh_alloc(*pv, op_map, lang, visited)), _ => false, } } /// Whether any `Terminator::Return(Some(v))` in the body traces back to a /// fresh container allocation. Invoked once per function; the visited /// set is fresh per return block so distinct returns do not poison each /// other's searches. fn returns_fresh_allocation( ssa: &SsaBody, op_map: &HashMap, lang: Option, ) -> bool { for block in &ssa.blocks { let Terminator::Return(Some(v)) = block.terminator else { continue; }; let mut visited = HashSet::new(); if trace_to_fresh_alloc(v, op_map, lang, &mut visited) { return true; } } false } /// Compute the parameter-granularity points-to summary for a function. /// /// `param_info` carries one `(param_index, param_name, param_ssa_value)` /// tuple per formal parameter that was emitted as [`SsaOp::Param`] in the /// lowered body. The receiver is intentionally excluded, this table /// captures positional parameters only. /// /// `formal_param_names`, when supplied, is the authoritative list of /// declared parameter names in declaration order. It matters for /// **pure-output parameters**: a param like `target` in /// `fn set(target, val): target.data = val` is never *used* in the body /// (only assigned into), so SSA lowering does not emit a `Param` node /// for it and `param_info` will not contain it. Falling back to /// `formal_param_names` lets the base-name lookup still find its index. /// /// `formal_param_count` bounds the parameter indices written to the /// summary: scoped lowering synthesises `Param` ops for module-level /// captures at indices beyond the formal arity, and those must not leak /// into the summary (they would trip `ssa_summary_fits_arity`). pub fn analyse_param_points_to( ssa: &SsaBody, param_info: &[(usize, String, SsaValue)], formal_param_count: usize, formal_param_names: Option<&[String]>, lang: Option, ) -> PointsToSummary { let mut summary = PointsToSummary::empty(); let op_map = build_op_map(ssa); let var_names = build_var_name_map(ssa); // ── 0. Fresh-container return detection ───────────────────────────── // // A return path traces back to either: // * `SsaOp::Const(text)` where `text` is a container literal // (`[]`, `{}`, `new Map()`, …), OR // * `SsaOp::Call { callee, … }` where `callee` matches a known // container constructor for `lang` (`ArrayList`, `dict`, …). // // When at least one return path matches, the callee produces a // caller-visible fresh heap identity on that path, callers // synthesise a `HeapObjectId` keyed on the call result so later // container operations have a stable heap cell. Traces that reach a // parameter are handled by the edge-based `Param(i) → Return` channel // below and do not contribute here; a mixed function emits both. // // Runs before the early-out on `formal_param_count == 0` so pure // factories (zero-param container constructors) still record the // fresh-alloc signal. if returns_fresh_allocation(ssa, &op_map, lang) { summary.returns_fresh_alloc = true; } if formal_param_count == 0 { return summary; } // Build the name→positional-index map. Summary param indices are // *positional*, they match the call-site `args[i]` position, which // excludes the receiver (`self`/`this`). When `formal_param_names` // contains a leading receiver, skip it so the remaining names align // with the SSA `SsaOp::Param { index }` convention. let mut params_by_name: HashMap = HashMap::new(); if let Some(names) = formal_param_names { let mut pos: usize = 0; for name in names { if is_receiver_name_local(name) { continue; } if pos >= formal_param_count { break; } params_by_name.insert(name.clone(), pos); pos += 1; } } // Overlay `param_info` ONLY when formal_param_names was absent. // When formal_param_names is supplied it is the authoritative // declaration-order mapping; SSA param indices can legitimately // diverge (a pure-output param is never emitted, shifting later // indices down), so trusting SSA here would mis-map the caller's // `args[i]` positional slot. if formal_param_names.is_none() { for (idx, name, _) in param_info { params_by_name.insert(name.clone(), *idx); } } // ── 1. Field-store alias edges (Param(a) → Param(b)) ──────────────── // // SSA lowering encodes `obj.field = val` as one or more Assigns whose // `var_name` is the dotted / indexed path. For every such Assign we // look up the root name, check it matches a parameter variable, and // trace each use back to a param for the `Param(a) → Param(b)` edge. for block in &ssa.blocks { for inst in block.body.iter() { let SsaOp::Assign(uses) = &inst.op else { continue; }; let Some(name) = inst.var_name.as_ref() else { continue; }; // Only field/index-style writes encode the base in var_name; // a plain `x = ...` doesn't imply aliasing with `x`'s param. if !name.contains('.') && !name.contains('[') { continue; } let base = base_of_path(name); let Some(&target_idx) = params_by_name.get(base) else { continue; }; if target_idx >= formal_param_count { continue; } for u in uses { let mut visited = HashSet::new(); let Some(hit) = trace_to_param_hit(*u, &op_map, &var_names, &mut visited) else { continue; }; let src_idx = param_hit_to_formal_index(&hit, ¶ms_by_name); if src_idx >= formal_param_count { continue; } if src_idx == target_idx { // Self-alias is uninformative, the caller's // arg-to-itself propagation is already covered by // `param_to_return`/`param_to_sink`. continue; } summary.insert( AliasPosition::Param(src_idx as u32), AliasPosition::Param(target_idx as u32), AliasKind::MayAlias, ); if summary.overflow { return summary; } } } } // ── 2. Return-alias edges (Param(i) → Return) ─────────────────────── // // `Terminator::Return(v)` with `v` tracing back to a parameter means // the call site's result aliases the corresponding argument's heap // identity. Joining across all return blocks is a plain set union. let mut return_param_indices: SmallVec<[usize; 4]> = SmallVec::new(); for block in &ssa.blocks { let Terminator::Return(Some(v)) = block.terminator else { continue; }; let mut visited = HashSet::new(); if let Some(hit) = trace_to_param_hit(v, &op_map, &var_names, &mut visited) { let idx = param_hit_to_formal_index(&hit, ¶ms_by_name); if idx < formal_param_count && !return_param_indices.contains(&idx) { return_param_indices.push(idx); } } } for idx in return_param_indices { summary.insert( AliasPosition::Param(idx as u32), AliasPosition::Return, AliasKind::MayAlias, ); if summary.overflow { return summary; } } summary } #[cfg(test)] mod tests { use super::*; use crate::ssa::ir::{BlockId, SsaBlock, SsaInst}; use petgraph::graph::NodeIndex; use smallvec::smallvec; fn mk_body(blocks: Vec, num_values: u32) -> SsaBody { use crate::ssa::ir::ValueDef; let value_defs = (0..num_values) .map(|_| ValueDef { var_name: None, cfg_node: NodeIndex::new(0), block: BlockId(0), }) .collect(); SsaBody { blocks, entry: BlockId(0), value_defs, cfg_node_map: HashMap::new(), exception_edges: vec![], field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), } } fn inst(v: u32, op: SsaOp, var_name: Option<&str>) -> SsaInst { SsaInst { value: SsaValue(v), op, cfg_node: NodeIndex::new(0), var_name: var_name.map(String::from), span: (0, 0), } } #[test] fn field_write_param_to_param_emits_edge() { // Simulate: // fn f(a, b): // b.data = a # Assign var_name="b.data" uses=[a_ssa] // synthetic: b = b.data # Assign var_name="b" uses=[assign0] // return let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![ inst(0, SsaOp::Param { index: 0 }, Some("a")), inst(1, SsaOp::Param { index: 1 }, Some("b")), inst(2, SsaOp::Assign(smallvec![SsaValue(0)]), Some("b.data")), inst(3, SsaOp::Assign(smallvec![SsaValue(2)]), Some("b")), ], terminator: Terminator::Return(None), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 4); let pinfo = vec![ (0usize, "a".to_string(), SsaValue(0)), (1usize, "b".to_string(), SsaValue(1)), ]; let s = analyse_param_points_to(&body, &pinfo, 2, None, None); assert!(!s.overflow, "unexpected overflow: {s:?}"); assert!( s.edges.iter().any(|e| e.source == AliasPosition::Param(0) && e.target == AliasPosition::Param(1) && e.kind == AliasKind::MayAlias), "expected Param(0) → Param(1) edge, got {s:?}" ); } #[test] fn return_alias_emits_edge() { // fn f(a): return a let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![inst(0, SsaOp::Param { index: 0 }, Some("a"))], terminator: Terminator::Return(Some(SsaValue(0))), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 1); let pinfo = vec![(0usize, "a".to_string(), SsaValue(0))]; let s = analyse_param_points_to(&body, &pinfo, 1, None, None); assert!(!s.overflow); assert_eq!(s.edges.len(), 1); assert_eq!(s.edges[0].source, AliasPosition::Param(0)); assert_eq!(s.edges[0].target, AliasPosition::Return); } #[test] fn self_alias_is_dropped() { // fn f(b): b.data = b_other_field (reading b.x and writing b.y) // Both uses trace back to Param(0) and base is Param(0) → // self-alias is uninformative, no edge emitted. let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![ inst(0, SsaOp::Param { index: 0 }, Some("b")), inst(1, SsaOp::Assign(smallvec![SsaValue(0)]), Some("b.x")), inst(2, SsaOp::Assign(smallvec![SsaValue(1)]), Some("b.data")), ], terminator: Terminator::Return(None), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 3); let pinfo = vec![(0usize, "b".to_string(), SsaValue(0))]; let s = analyse_param_points_to(&body, &pinfo, 1, None, None); assert!( s.is_empty(), "self-alias edges should not be emitted: {s:?}" ); } #[test] fn out_of_range_param_rejected() { // Synthetic Param with index >= formal_param_count must not leak // into the summary (it would trip ssa_summary_fits_arity). let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![ inst(0, SsaOp::Param { index: 5 }, Some("capture")), inst(1, SsaOp::Param { index: 1 }, Some("b")), inst(2, SsaOp::Assign(smallvec![SsaValue(0)]), Some("b.data")), ], terminator: Terminator::Return(None), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 3); let pinfo = vec![ (5usize, "capture".to_string(), SsaValue(0)), (1usize, "b".to_string(), SsaValue(1)), ]; // formal_param_count = 2, index 5 is out of range. let s = analyse_param_points_to(&body, &pinfo, 2, None, None); assert!( s.is_empty(), "synthetic captures past formal arity must not emit edges: {s:?}" ); } #[test] fn bounded_graph_overflows_at_cap() { // Build MAX_ALIAS_EDGES+2 param→return edges by returning a Phi // of every param. This exercises the overflow fallback. let n = (crate::summary::points_to::MAX_ALIAS_EDGES + 2) as u32; let mut insts = Vec::new(); let mut phi_operands: SmallVec<[(BlockId, SsaValue); 2]> = SmallVec::new(); for i in 0..n { insts.push(inst( i, SsaOp::Param { index: i as usize }, Some(&format!("p{i}")), )); phi_operands.push((BlockId(0), SsaValue(i))); } let phi_v = n; insts.push(inst(phi_v, SsaOp::Phi(phi_operands), Some("ret"))); let block = SsaBlock { id: BlockId(0), phis: vec![], body: insts, terminator: Terminator::Return(Some(SsaValue(phi_v))), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], n + 1); let pinfo: Vec<(usize, String, SsaValue)> = (0..n as usize) .map(|i| (i, format!("p{i}"), SsaValue(i as u32))) .collect(); // Only the first traced param is emitted (trace_to_param short- // circuits on first match), so overflow is not expected, we // instead verify the bounded behaviour: a single edge. let s = analyse_param_points_to(&body, &pinfo, n as usize, None, None); assert!(!s.overflow); assert_eq!(s.edges.len(), 1); } #[test] fn fresh_container_literal_return_sets_flag() { // fn makeBag() { return []; } // v0 = Const("[]") // terminator: Return(v0) let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![inst(0, SsaOp::Const(Some("[]".to_string())), None)], terminator: Terminator::Return(Some(SsaValue(0))), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 1); let s = analyse_param_points_to(&body, &[], 0, None, Some(Lang::JavaScript)); assert!(s.returns_fresh_alloc); assert!(s.edges.is_empty()); } #[test] fn constructor_return_sets_flag() { // fn makeList() { return list(); } // v0 = Call("list", []) // terminator: Return(v0) let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![inst( 0, SsaOp::Call { callee: "list".to_string(), callee_text: None, args: vec![], receiver: None, }, None, )], terminator: Terminator::Return(Some(SsaValue(0))), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 1); let s = analyse_param_points_to(&body, &[], 0, None, Some(Lang::Python)); assert!(s.returns_fresh_alloc); } #[test] fn return_of_param_does_not_set_fresh_flag() { // fn identity(a) { return a; } let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![inst(0, SsaOp::Param { index: 0 }, Some("a"))], terminator: Terminator::Return(Some(SsaValue(0))), preds: smallvec![], succs: smallvec![], }; let body = mk_body(vec![block], 1); let pinfo = vec![(0usize, "a".to_string(), SsaValue(0))]; let s = analyse_param_points_to(&body, &pinfo, 1, None, Some(Lang::JavaScript)); assert!( !s.returns_fresh_alloc, "param-only return must not set fresh-alloc flag" ); // But the Param(0) → Return edge must still be emitted. assert!( s.edges .iter() .any(|e| e.source == AliasPosition::Param(0) && e.target == AliasPosition::Return), "expected Param(0) → Return edge, got {s:?}" ); } }