nyx/src/ssa/param_points_to.rs

//! Parameter-granularity points-to analysis.
//!
//! Produces a [`PointsToSummary`] for a function body by walking the SSA
//! once and recording two classes of aliasing:
//!
//! 1. **Param → Param field writes.**  An `obj.field = val` where `obj`
//!    traces back to parameter `b` and `val` traces back to parameter `a`
//!    emits a `Param(a) → Param(b)` `MayAlias` edge.  This captures the
//!    `mutating_helper` pattern, the callee mutates a shared heap cell
//!    through one parameter and the caller observes the mutation through
//!    its argument for that parameter.
//!
//! 2. **Param → Return aliases.**  `Terminator::Return(v)` where `v`
//!    traces back to a parameter emits a `Param(i) → Return` edge.  This
//!    captures the `returned_alias` pattern, the callee returns its
//!    argument unchanged and the caller treats the result as aliasing the
//!    input.
//!
//! Field-write detection uses the existing SSA lowering convention: a
//! source-level `obj.x = val` is lowered to an `Assign` whose `var_name`
//! is the dotted path `"obj.x"`, plus synthetic parent-path Assigns that
//! propagate the write up to the base (`"obj"`).  See
//! [`crate::ssa::lower`]'s "Synthetic base update" block for the
//! canonical source.
//!
//! The analysis is **flow-insensitive** and **bounded**: it does not
//! reason about path feasibility, and it stops adding edges once the
//! summary's `MAX_ALIAS_EDGES` cap is reached, the overflow flag is
//! the conservative fallback that callers honour.

use std::collections::{HashMap, HashSet};

use smallvec::SmallVec;

use crate::summary::points_to::{AliasKind, AliasPosition, PointsToSummary};
use crate::symbol::Lang;

use super::ir::{SsaBody, SsaOp, SsaValue, Terminator};

/// Map an SSA value back to its defining instruction's op.
///
/// Local to this module, the taint engine has its own `build_inst_map`
/// that also carries receiver info we do not need, and duplicating it
/// keeps this analysis independent of that private helper's shape.
fn build_op_map(ssa: &SsaBody) -> HashMap<SsaValue, SsaOp> {
    let mut map = HashMap::with_capacity(ssa.num_values());
    for block in &ssa.blocks {
        for inst in block.phis.iter().chain(block.body.iter()) {
            map.insert(inst.value, inst.op.clone());
        }
    }
    map
}

/// Sibling of [`build_op_map`] that captures the optional `var_name`
/// recorded on each SSA instruction.  Used alongside the op map so a
/// [`ParamHit`] can surface the underlying variable name for
/// formal-index resolution.
fn build_var_name_map(ssa: &SsaBody) -> HashMap<SsaValue, Option<String>> {
    let mut map = HashMap::with_capacity(ssa.num_values());
    for block in &ssa.blocks {
        for inst in block.phis.iter().chain(block.body.iter()) {
            map.insert(inst.value, inst.var_name.clone());
        }
    }
    map
}

/// Information about an SSA `Param { index }` node needed to resolve
/// back to a caller-side positional index via formal-params lookup.
#[derive(Clone, Debug)]
struct ParamHit {
    /// The `SsaOp::Param` index as lowered.
    ssa_index: usize,
    /// The parameter's variable name (from [`SsaInst::var_name`]).  Used
    /// to map back to the formal-declaration position, the caller's
    /// `args[i]` slot is keyed by declaration position, not by SSA
    /// index, and the two can disagree when a formal parameter is
    /// skipped from SSA lowering (e.g., pure-output params).
    var_name: Option<String>,
}

/// Walk Assign/Phi chains to find a backing `Param { index }` SSA op.
///
/// Returns the `SsaOp::Param`'s index *and* its var_name so callers can
/// resolve the formal-positional index via the name lookup table, the
/// two indices can disagree when SSA lowering skips a formal parameter
/// (never used as a read), shifting subsequent param indices down.
fn trace_to_param_hit(
    v: SsaValue,
    op_map: &HashMap<SsaValue, SsaOp>,
    var_names: &HashMap<SsaValue, Option<String>>,
    visited: &mut HashSet<SsaValue>,
) -> Option<ParamHit> {
    if !visited.insert(v) {
        return None;
    }
    match op_map.get(&v)? {
        SsaOp::Param { index } => Some(ParamHit {
            ssa_index: *index,
            var_name: var_names.get(&v).cloned().flatten(),
        }),
        SsaOp::Assign(uses) => {
            for u in uses {
                if let Some(hit) = trace_to_param_hit(*u, op_map, var_names, visited) {
                    return Some(hit);
                }
            }
            None
        }
        SsaOp::Phi(operands) => {
            for (_, pv) in operands {
                if let Some(hit) = trace_to_param_hit(*pv, op_map, var_names, visited) {
                    return Some(hit);
                }
            }
            None
        }
        // Call produces a fresh identity; Const / Source / CatchParam /
        // SelfParam / Nop are not param-derived.
        _ => None,
    }
}

/// Resolve a [`ParamHit`] to a caller-side positional index using the
/// formal-params name lookup.  Falls back to the SSA `index` when no
/// name-based match exists (e.g., extractor called without
/// `formal_param_names`).
fn param_hit_to_formal_index(hit: &ParamHit, params_by_name: &HashMap<String, usize>) -> usize {
    if let Some(name) = &hit.var_name
        && let Some(&idx) = params_by_name.get(name)
    {
        return idx;
    }
    hit.ssa_index
}

/// Parse the base of a dotted / indexed path into its root name.
///
/// * `"obj"` → `"obj"`
/// * `"obj.field"` → `"obj"`
/// * `"obj.field.sub"` → `"obj"`
/// * `"obj[0]"` → `"obj"`
/// * `"obj.list[2].name"` → `"obj"`
///
/// Used to decide whether a field-style Assign's LHS base names a
/// parameter variable, we strip everything after the first separator
/// and compare the remainder to the recorded param names.
fn base_of_path(name: &str) -> &str {
    let dot = name.find('.');
    let bracket = name.find('[');
    let end = match (dot, bracket) {
        (Some(d), Some(b)) => d.min(b),
        (Some(d), None) => d,
        (None, Some(b)) => b,
        (None, None) => return name,
    };
    &name[..end]
}

/// Local receiver check duplicated to avoid depending on private
/// `lower::is_receiver_name`.  Must stay in sync with that helper.
fn is_receiver_name_local(name: &str) -> bool {
    matches!(name, "self" | "this")
}

/// Walk Assign/Phi chains from a return value to decide whether the path
/// ends at a fresh container allocation (literal or constructor call).
///
/// Returns `true` the first time a qualifying allocation is found.
/// Parameter-terminated paths, `Call` ops that are not container
/// constructors, and constants that are not container literals all
/// return `false`, soundly under-approximating, since the caller will
/// simply fall back to the existing `Param(i) → Return` / store-into-
/// heap channels when the flag is absent.
fn trace_to_fresh_alloc(
    v: SsaValue,
    op_map: &HashMap<SsaValue, SsaOp>,
    lang: Option<Lang>,
    visited: &mut HashSet<SsaValue>,
) -> bool {
    if !visited.insert(v) {
        return false;
    }
    let Some(op) = op_map.get(&v) else {
        return false;
    };
    match op {
        SsaOp::Const(Some(text)) => crate::ssa::heap::is_container_literal_public(text),
        SsaOp::Call { callee, .. } => lang
            .map(|l| crate::ssa::heap::is_container_constructor(callee, l))
            .unwrap_or(false),
        SsaOp::Assign(uses) => uses
            .iter()
            .any(|u| trace_to_fresh_alloc(*u, op_map, lang, visited)),
        SsaOp::Phi(operands) => operands
            .iter()
            .any(|(_, pv)| trace_to_fresh_alloc(*pv, op_map, lang, visited)),
        _ => false,
    }
}

/// Whether any `Terminator::Return(Some(v))` in the body traces back to a
/// fresh container allocation.  Invoked once per function; the visited
/// set is fresh per return block so distinct returns do not poison each
/// other's searches.
fn returns_fresh_allocation(
    ssa: &SsaBody,
    op_map: &HashMap<SsaValue, SsaOp>,
    lang: Option<Lang>,
) -> bool {
    for block in &ssa.blocks {
        let Terminator::Return(Some(v)) = block.terminator else {
            continue;
        };
        let mut visited = HashSet::new();
        if trace_to_fresh_alloc(v, op_map, lang, &mut visited) {
            return true;
        }
    }
    false
}

/// Compute the parameter-granularity points-to summary for a function.
///
/// `param_info` carries one `(param_index, param_name, param_ssa_value)`
/// tuple per formal parameter that was emitted as [`SsaOp::Param`] in the
/// lowered body.  The receiver is intentionally excluded, this table
/// captures positional parameters only.
///
/// `formal_param_names`, when supplied, is the authoritative list of
/// declared parameter names in declaration order.  It matters for
/// **pure-output parameters**: a param like `target` in
/// `fn set(target, val): target.data = val` is never *used* in the body
/// (only assigned into), so SSA lowering does not emit a `Param` node
/// for it and `param_info` will not contain it.  Falling back to
/// `formal_param_names` lets the base-name lookup still find its index.
///
/// `formal_param_count` bounds the parameter indices written to the
/// summary: scoped lowering synthesises `Param` ops for module-level
/// captures at indices beyond the formal arity, and those must not leak
/// into the summary (they would trip `ssa_summary_fits_arity`).
pub fn analyse_param_points_to(
    ssa: &SsaBody,
    param_info: &[(usize, String, SsaValue)],
    formal_param_count: usize,
    formal_param_names: Option<&[String]>,
    lang: Option<Lang>,
) -> PointsToSummary {
    let mut summary = PointsToSummary::empty();

    let op_map = build_op_map(ssa);
    let var_names = build_var_name_map(ssa);

    // ── 0. Fresh-container return detection ─────────────────────────────
    //
    // A return path traces back to either:
    //   * `SsaOp::Const(text)` where `text` is a container literal
    //     (`[]`, `{}`, `new Map()`, …), OR
    //   * `SsaOp::Call { callee, … }` where `callee` matches a known
    //     container constructor for `lang` (`ArrayList`, `dict`, …).
    //
    // When at least one return path matches, the callee produces a
    // caller-visible fresh heap identity on that path, callers
    // synthesise a `HeapObjectId` keyed on the call result so later
    // container operations have a stable heap cell.  Traces that reach a
    // parameter are handled by the edge-based `Param(i) → Return` channel
    // below and do not contribute here; a mixed function emits both.
    //
    // Runs before the early-out on `formal_param_count == 0` so pure
    // factories (zero-param container constructors) still record the
    // fresh-alloc signal.
    if returns_fresh_allocation(ssa, &op_map, lang) {
        summary.returns_fresh_alloc = true;
    }

    if formal_param_count == 0 {
        return summary;
    }
    // Build the name→positional-index map.  Summary param indices are
    // *positional*, they match the call-site `args[i]` position, which
    // excludes the receiver (`self`/`this`).  When `formal_param_names`
    // contains a leading receiver, skip it so the remaining names align
    // with the SSA `SsaOp::Param { index }` convention.
    let mut params_by_name: HashMap<String, usize> = HashMap::new();
    if let Some(names) = formal_param_names {
        let mut pos: usize = 0;
        for name in names {
            if is_receiver_name_local(name) {
                continue;
            }
            if pos >= formal_param_count {
                break;
            }
            params_by_name.insert(name.clone(), pos);
            pos += 1;
        }
    }
    // Overlay `param_info` ONLY when formal_param_names was absent.
    // When formal_param_names is supplied it is the authoritative
    // declaration-order mapping; SSA param indices can legitimately
    // diverge (a pure-output param is never emitted, shifting later
    // indices down), so trusting SSA here would mis-map the caller's
    // `args[i]` positional slot.
    if formal_param_names.is_none() {
        for (idx, name, _) in param_info {
            params_by_name.insert(name.clone(), *idx);
        }
    }

    // ── 1. Field-store alias edges (Param(a) → Param(b)) ────────────────
    //
    // SSA lowering encodes `obj.field = val` as one or more Assigns whose
    // `var_name` is the dotted / indexed path.  For every such Assign we
    // look up the root name, check it matches a parameter variable, and
    // trace each use back to a param for the `Param(a) → Param(b)` edge.
    for block in &ssa.blocks {
        for inst in block.body.iter() {
            let SsaOp::Assign(uses) = &inst.op else {
                continue;
            };
            let Some(name) = inst.var_name.as_ref() else {
                continue;
            };
            // Only field/index-style writes encode the base in var_name;
            // a plain `x = ...` doesn't imply aliasing with `x`'s param.
            if !name.contains('.') && !name.contains('[') {
                continue;
            }
            let base = base_of_path(name);
            let Some(&target_idx) = params_by_name.get(base) else {
                continue;
            };
            if target_idx >= formal_param_count {
                continue;
            }
            for u in uses {
                let mut visited = HashSet::new();
                let Some(hit) = trace_to_param_hit(*u, &op_map, &var_names, &mut visited) else {
                    continue;
                };
                let src_idx = param_hit_to_formal_index(&hit, &params_by_name);
                if src_idx >= formal_param_count {
                    continue;
                }
                if src_idx == target_idx {
                    // Self-alias is uninformative, the caller's
                    // arg-to-itself propagation is already covered by
                    // `param_to_return`/`param_to_sink`.
                    continue;
                }
                summary.insert(
                    AliasPosition::Param(src_idx as u32),
                    AliasPosition::Param(target_idx as u32),
                    AliasKind::MayAlias,
                );
                if summary.overflow {
                    return summary;
                }
            }
        }
    }

    // ── 2. Return-alias edges (Param(i) → Return) ───────────────────────
    //
    // `Terminator::Return(v)` with `v` tracing back to a parameter means
    // the call site's result aliases the corresponding argument's heap
    // identity.  Joining across all return blocks is a plain set union.
    let mut return_param_indices: SmallVec<[usize; 4]> = SmallVec::new();
    for block in &ssa.blocks {
        let Terminator::Return(Some(v)) = block.terminator else {
            continue;
        };
        let mut visited = HashSet::new();
        if let Some(hit) = trace_to_param_hit(v, &op_map, &var_names, &mut visited) {
            let idx = param_hit_to_formal_index(&hit, &params_by_name);
            if idx < formal_param_count && !return_param_indices.contains(&idx) {
                return_param_indices.push(idx);
            }
        }
    }
    for idx in return_param_indices {
        summary.insert(
            AliasPosition::Param(idx as u32),
            AliasPosition::Return,
            AliasKind::MayAlias,
        );
        if summary.overflow {
            return summary;
        }
    }

    summary
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::ssa::ir::{BlockId, SsaBlock, SsaInst};
    use petgraph::graph::NodeIndex;
    use smallvec::smallvec;

    fn mk_body(blocks: Vec<SsaBlock>, num_values: u32) -> SsaBody {
        use crate::ssa::ir::ValueDef;
        let value_defs = (0..num_values)
            .map(|_| ValueDef {
                var_name: None,
                cfg_node: NodeIndex::new(0),
                block: BlockId(0),
            })
            .collect();
        SsaBody {
            blocks,
            entry: BlockId(0),
            value_defs,
            cfg_node_map: HashMap::new(),
            exception_edges: vec![],
            field_interner: crate::ssa::ir::FieldInterner::default(),
            field_writes: std::collections::HashMap::new(),

            synthetic_externals: std::collections::HashSet::new(),
        }
    }

    fn inst(v: u32, op: SsaOp, var_name: Option<&str>) -> SsaInst {
        SsaInst {
            value: SsaValue(v),
            op,
            cfg_node: NodeIndex::new(0),
            var_name: var_name.map(String::from),
            span: (0, 0),
        }
    }

    #[test]
    fn field_write_param_to_param_emits_edge() {
        // Simulate:
        //   fn f(a, b):
        //     b.data = a        # Assign var_name="b.data" uses=[a_ssa]
        //     synthetic: b = b.data     # Assign var_name="b" uses=[assign0]
        //     return
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![
                inst(0, SsaOp::Param { index: 0 }, Some("a")),
                inst(1, SsaOp::Param { index: 1 }, Some("b")),
                inst(2, SsaOp::Assign(smallvec![SsaValue(0)]), Some("b.data")),
                inst(3, SsaOp::Assign(smallvec![SsaValue(2)]), Some("b")),
            ],
            terminator: Terminator::Return(None),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 4);
        let pinfo = vec![
            (0usize, "a".to_string(), SsaValue(0)),
            (1usize, "b".to_string(), SsaValue(1)),
        ];
        let s = analyse_param_points_to(&body, &pinfo, 2, None, None);
        assert!(!s.overflow, "unexpected overflow: {s:?}");
        assert!(
            s.edges.iter().any(|e| e.source == AliasPosition::Param(0)
                && e.target == AliasPosition::Param(1)
                && e.kind == AliasKind::MayAlias),
            "expected Param(0) → Param(1) edge, got {s:?}"
        );
    }

    #[test]
    fn return_alias_emits_edge() {
        // fn f(a): return a
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![inst(0, SsaOp::Param { index: 0 }, Some("a"))],
            terminator: Terminator::Return(Some(SsaValue(0))),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 1);
        let pinfo = vec![(0usize, "a".to_string(), SsaValue(0))];
        let s = analyse_param_points_to(&body, &pinfo, 1, None, None);
        assert!(!s.overflow);
        assert_eq!(s.edges.len(), 1);
        assert_eq!(s.edges[0].source, AliasPosition::Param(0));
        assert_eq!(s.edges[0].target, AliasPosition::Return);
    }

    #[test]
    fn self_alias_is_dropped() {
        // fn f(b): b.data = b_other_field (reading b.x and writing b.y)
        // Both uses trace back to Param(0) and base is Param(0) →
        // self-alias is uninformative, no edge emitted.
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![
                inst(0, SsaOp::Param { index: 0 }, Some("b")),
                inst(1, SsaOp::Assign(smallvec![SsaValue(0)]), Some("b.x")),
                inst(2, SsaOp::Assign(smallvec![SsaValue(1)]), Some("b.data")),
            ],
            terminator: Terminator::Return(None),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 3);
        let pinfo = vec![(0usize, "b".to_string(), SsaValue(0))];
        let s = analyse_param_points_to(&body, &pinfo, 1, None, None);
        assert!(
            s.is_empty(),
            "self-alias edges should not be emitted: {s:?}"
        );
    }

    #[test]
    fn out_of_range_param_rejected() {
        // Synthetic Param with index >= formal_param_count must not leak
        // into the summary (it would trip ssa_summary_fits_arity).
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![
                inst(0, SsaOp::Param { index: 5 }, Some("capture")),
                inst(1, SsaOp::Param { index: 1 }, Some("b")),
                inst(2, SsaOp::Assign(smallvec![SsaValue(0)]), Some("b.data")),
            ],
            terminator: Terminator::Return(None),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 3);
        let pinfo = vec![
            (5usize, "capture".to_string(), SsaValue(0)),
            (1usize, "b".to_string(), SsaValue(1)),
        ];
        // formal_param_count = 2, index 5 is out of range.
        let s = analyse_param_points_to(&body, &pinfo, 2, None, None);
        assert!(
            s.is_empty(),
            "synthetic captures past formal arity must not emit edges: {s:?}"
        );
    }

    #[test]
    fn bounded_graph_overflows_at_cap() {
        // Build MAX_ALIAS_EDGES+2 param→return edges by returning a Phi
        // of every param.  This exercises the overflow fallback.
        let n = (crate::summary::points_to::MAX_ALIAS_EDGES + 2) as u32;
        let mut insts = Vec::new();
        let mut phi_operands: SmallVec<[(BlockId, SsaValue); 2]> = SmallVec::new();
        for i in 0..n {
            insts.push(inst(
                i,
                SsaOp::Param { index: i as usize },
                Some(&format!("p{i}")),
            ));
            phi_operands.push((BlockId(0), SsaValue(i)));
        }
        let phi_v = n;
        insts.push(inst(phi_v, SsaOp::Phi(phi_operands), Some("ret")));
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: insts,
            terminator: Terminator::Return(Some(SsaValue(phi_v))),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], n + 1);
        let pinfo: Vec<(usize, String, SsaValue)> = (0..n as usize)
            .map(|i| (i, format!("p{i}"), SsaValue(i as u32)))
            .collect();
        // Only the first traced param is emitted (trace_to_param short-
        // circuits on first match), so overflow is not expected, we
        // instead verify the bounded behaviour: a single edge.
        let s = analyse_param_points_to(&body, &pinfo, n as usize, None, None);
        assert!(!s.overflow);
        assert_eq!(s.edges.len(), 1);
    }

    #[test]
    fn fresh_container_literal_return_sets_flag() {
        // fn makeBag() { return []; }
        // v0 = Const("[]")
        // terminator: Return(v0)
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![inst(0, SsaOp::Const(Some("[]".to_string())), None)],
            terminator: Terminator::Return(Some(SsaValue(0))),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 1);
        let s = analyse_param_points_to(&body, &[], 0, None, Some(Lang::JavaScript));
        assert!(s.returns_fresh_alloc);
        assert!(s.edges.is_empty());
    }

    #[test]
    fn constructor_return_sets_flag() {
        // fn makeList() { return list(); }
        // v0 = Call("list", [])
        // terminator: Return(v0)
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![inst(
                0,
                SsaOp::Call {
                    callee: "list".to_string(),
                    callee_text: None,
                    args: vec![],
                    receiver: None,
                },
                None,
            )],
            terminator: Terminator::Return(Some(SsaValue(0))),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 1);
        let s = analyse_param_points_to(&body, &[], 0, None, Some(Lang::Python));
        assert!(s.returns_fresh_alloc);
    }

    #[test]
    fn return_of_param_does_not_set_fresh_flag() {
        // fn identity(a) { return a; }
        let block = SsaBlock {
            id: BlockId(0),
            phis: vec![],
            body: vec![inst(0, SsaOp::Param { index: 0 }, Some("a"))],
            terminator: Terminator::Return(Some(SsaValue(0))),
            preds: smallvec![],
            succs: smallvec![],
        };
        let body = mk_body(vec![block], 1);
        let pinfo = vec![(0usize, "a".to_string(), SsaValue(0))];
        let s = analyse_param_points_to(&body, &pinfo, 1, None, Some(Lang::JavaScript));
        assert!(
            !s.returns_fresh_alloc,
            "param-only return must not set fresh-alloc flag"
        );
        // But the Param(0) → Return edge must still be emitted.
        assert!(
            s.edges
                .iter()
                .any(|e| e.source == AliasPosition::Param(0) && e.target == AliasPosition::Return),
            "expected Param(0) → Return edge, got {s:?}"
        );
    }
}