//! Forward symbolic transfer over SSA instructions. //! //! Walks SSA blocks and builds `SymbolicValue` expression trees for each //! defined SSA value, while eagerly propagating taint through the root-set. //! //! Cross-file symbolic summary modeling: when a callee has an //! `SsaFuncSummary` available via `GlobalSummaries`, the Call instruction's //! return value is modeled symbolically instead of being treated as opaque. #![allow( clippy::collapsible_if, clippy::if_same_then_else, clippy::too_many_arguments )] use crate::cfg::Cfg; use crate::ssa::const_prop::ConstLattice; use crate::ssa::heap::PointsToResult; use crate::ssa::ir::{BlockId, SsaBlock, SsaBody, SsaInst, SsaOp, SsaValue}; use crate::ssa::pointsto::{ContainerOp, classify_container_op}; use crate::ssa::type_facts::TypeFactResult; use crate::summary::ssa_summary::TaintTransform; use crate::summary::{CalleeResolution, GlobalSummaries}; use crate::symbol::Lang; use super::heap::{self, FieldAccessRecord, FieldSlot, HeapKey}; use super::state::SymbolicState; use super::strings::{ StringOperandSource, TransformKind, classify_string_method, classify_transform_method, }; use super::value::{ Op, SymbolicValue, mk_binop, mk_call, mk_decode, mk_encode, mk_phi, mk_replace, mk_strlen, mk_substr, mk_to_lower, mk_to_upper, mk_trim, }; /// Context for cross-file symbolic summary modeling during transfer. /// /// When provided, Call instructions attempt to resolve callee behavior /// via `SsaFuncSummary` before falling back to the opaque `mk_call`. pub struct SymexSummaryCtx<'a> { pub global_summaries: &'a GlobalSummaries, pub lang: Lang, pub namespace: &'a str, /// Type facts for type-qualified symbolic summary resolution. /// When present, receiver types guide callee name qualification. pub type_facts: Option<&'a TypeFactResult>, } /// Context for field-sensitive heap operations during transfer. /// /// When provided, Assign and Call instructions attempt store/load operations /// through the symbolic heap using allocation-site identities from points-to. /// `const_values` enables per-index array slot resolution. pub struct SymexHeapCtx<'a> { pub points_to: &'a PointsToResult, pub ssa: &'a SsaBody, pub lang: Lang, pub const_values: &'a std::collections::HashMap, } /// Result of resolving a callee symbolically via its summary. struct SymbolicCallResult { value: SymbolicValue, tainted: bool, } /// Transfer a single SSA instruction: set the symbolic value and propagate taint. pub fn transfer_inst( state: &mut SymbolicState, inst: &SsaInst, cfg: &Cfg, ssa: &SsaBody, summary_ctx: Option<&SymexSummaryCtx>, heap_ctx: Option<&SymexHeapCtx>, interproc_ctx: Option<&super::interproc::InterprocCtx>, lang: Option, node_meta: Option< &std::collections::HashMap, >, ) { match &inst.op { SsaOp::Const(text) => { let sym = match text { Some(t) => match ConstLattice::parse(t) { ConstLattice::Int(n) => SymbolicValue::Concrete(n), ConstLattice::Str(s) => SymbolicValue::ConcreteStr(s), _ => SymbolicValue::Unknown, // Bool, Null, Top, Varying }, None => SymbolicValue::Unknown, }; state.set(inst.value, sym); } SsaOp::Source => { state.set(inst.value, SymbolicValue::Symbol(inst.value)); state.mark_tainted(inst.value); } SsaOp::Param { .. } => { // Params are symbolic inputs but NOT tainted by default. // Taint seeding happens via finding.flow_steps in analyse_finding_path. state.set(inst.value, SymbolicValue::Symbol(inst.value)); } SsaOp::SelfParam => { // Implicit method receiver, symbolic input, not tainted by default. state.set(inst.value, SymbolicValue::Symbol(inst.value)); } SsaOp::CatchParam => { if let Some(exc_val) = state.take_exception_context() { // On an exception path, seed from exception context // and mark tainted (matches taint engine: CatchParam gets Cap::all()) state.set(inst.value, exc_val); state.mark_tainted(inst.value); } else { // Normal path or no explicit exception context, still mark tainted // to match taint engine behavior (ssa_transfer.rs CatchParam gets Cap::all()) state.set(inst.value, SymbolicValue::Symbol(inst.value)); state.mark_tainted(inst.value); } } SsaOp::Nop => { // Nop does not define a meaningful value, skip. } SsaOp::Undef => { // Phi-operand sentinel for edges without a reaching // definition. No concrete value, no taint. state.set(inst.value, SymbolicValue::Unknown); } SsaOp::FieldProj { receiver, .. } => { // Symbolic field read: model `obj.field` as an opaque value // tied to the projection's SsaValue, and propagate the // receiver's taint to the result so flat root-set tracking // continues to flow taint through chained accesses. // // This pass deliberately keeps the opaque-Symbol model: without // a field-sensitive heap, a dedicated `Field { receiver, name }` // SymbolicValue variant cannot soundly carry concrete reads // across method boundaries, the witness pipeline already // reconstructs `obj.field` text from `ValueDef.var_name` // (populated by lower.rs to `"base.f1.f2"` for chain projections). // The structured variant is deferred to the field-sensitive // pointer analysis prompt, where heap loads consume `FieldProj` // directly. state.set(inst.value, SymbolicValue::Symbol(inst.value)); state.propagate_taint(inst.value, std::slice::from_ref(receiver)); } SsaOp::Assign(uses) => { let uses_slice: &[_] = uses; match uses_slice.len() { 0 => { state.set(inst.value, SymbolicValue::Unknown); } 1 => { // Copy let sym = state.get(uses_slice[0]); state.set(inst.value, sym); state.propagate_taint(inst.value, uses_slice); } 2 => { // Field-load pattern detection. // When RHS is a member expression, SSA produces 2 uses: // uses[0] = dotted-path SSA value (e.g., v for "user.name") // uses[1] = base variable SSA value (e.g., v for "user") // The first operand IS the field value, use it directly. if let Some(def) = ssa.value_defs.get(uses_slice[0].0 as usize) { if def.var_name.as_ref().is_some_and(|n| n.contains('.')) { let sym = state.get(uses_slice[0]); state.set(inst.value, sym); state.propagate_taint(inst.value, uses_slice); // Record heap load for cross-alias + witness try_heap_load_record(state, inst, ssa, heap_ctx); return; } } // Heap-based cross-alias load fallback. // If the instruction defines a dotted path but the first // operand doesn't have a dotted var_name (aliased object), // try loading from the symbolic heap via points-to. if try_heap_alias_load(state, inst, ssa, heap_ctx) { state.propagate_taint(inst.value, uses_slice); return; } // Check for binary op metadata on the CFG node let bin_op_val = if let Some(meta) = node_meta { meta.get(&(inst.cfg_node.index() as u32)) .and_then(|m| m.info.bin_op) } else { cfg[inst.cfg_node].bin_op }; if let Some(bin_op) = bin_op_val { let lhs = state.get(uses_slice[0]); let rhs = state.get(uses_slice[1]); let sym = mk_binop(Op::from(bin_op), lhs, rhs); state.set(inst.value, sym); } else { // No structural info, conservative Unknown state.set(inst.value, SymbolicValue::Unknown); } state.propagate_taint(inst.value, uses_slice); } _ => { // 3+ operands, complex expression state.set(inst.value, SymbolicValue::Unknown); state.propagate_taint(inst.value, uses_slice); } } // If this instruction defines a dotted path, record // the store in the symbolic heap for cross-alias resolution. try_heap_field_store(state, inst, ssa, heap_ctx); } SsaOp::Call { callee, args, receiver, .. } => { // Collect symbolic values for arguments let mut arg_syms: Vec = Vec::new(); let mut all_operands: Vec<_> = Vec::new(); if let Some(recv) = receiver { arg_syms.push(state.get(*recv)); all_operands.push(*recv); } for arg_slot in args { if let Some(&first_val) = arg_slot.first() { arg_syms.push(state.get(first_val)); all_operands.push(first_val); } } // Container store/load via symbolic heap. // Resolve index_arg via const_values for per-index precision when // the index is a known constant. if let Some(hctx) = heap_ctx { if let Some(container_op) = classify_container_op(callee, hctx.lang) { let recv_obj = receiver .and_then(|rv| hctx.points_to.get(rv)) .filter(|pts| pts.len() == 1) .and_then(|pts| pts.iter().next().copied()); if let Some(obj_id) = recv_obj { match container_op { ContainerOp::Store { ref value_args, index_arg, } => { let field = index_arg .and_then(|pos| { args.get(pos).and_then(|slot| slot.first()).map(|&v| { heap::resolve_index_slot(v, hctx.const_values) }) }) .unwrap_or(FieldSlot::Elements); let key = HeapKey { object: obj_id, field, }; let val_sym = value_args .first() .and_then(|&idx| args.get(idx)) .and_then(|slot| slot.first()) .map(|&v| state.get(v)) .unwrap_or(SymbolicValue::Unknown); let any_tainted = value_args.iter().any(|&idx| { args.get(idx) .and_then(|slot| slot.first()) .map(|&v| state.is_tainted(v)) .unwrap_or(false) }); state.heap_mut().store(key, val_sym, any_tainted); // Fall through to normal Call for return value } ContainerOp::Load { index_arg } => { let field = index_arg .and_then(|pos| { args.get(pos).and_then(|slot| slot.first()).map(|&v| { heap::resolve_index_slot(v, hctx.const_values) }) }) .unwrap_or(FieldSlot::Elements); let key = HeapKey { object: obj_id, field, }; let loaded = state.heap().load(&key); if !matches!(loaded, SymbolicValue::Unknown) { state.set(inst.value, loaded); if state.heap().is_tainted(&key) { state.mark_tainted(inst.value); } return; } // Fall through to normal Call } ContainerOp::Writeback { .. } => { // Symex doesn't model writeback yet, taint // engine handles the destination-arg taint // directly. Fall through to normal Call. } } } } } // String method recognition if let Some(result) = try_string_method(state, callee, receiver, &arg_syms, &all_operands, lang) { state.set(inst.value, result.value); if result.tainted { state.mark_tainted(inst.value); } return; } // Encoding/decoding transform recognition if let Some(result) = try_transform_method(state, callee, receiver, &arg_syms, &all_operands, lang) { state.set(inst.value, result.value); if result.tainted { state.mark_tainted(inst.value); } return; } // Interprocedural symbolic execution. // Execute callee body when available, full state propagation. if let Some(ictx) = interproc_ctx { let mut callee_args: Vec<(crate::ssa::ir::SsaValue, SymbolicValue, bool)> = Vec::new(); for (i, op) in all_operands.iter().enumerate() { callee_args.push(( *op, arg_syms.get(i).cloned().unwrap_or(SymbolicValue::Unknown), state.is_tainted(*op), )); } if let Some(outcome) = super::interproc::execute_callee( ictx, callee, &callee_args, state.heap(), 0, // depth: caller is at depth 0 &[], summary_ctx, heap_ctx, ) { if !outcome.exit_states.is_empty() { let policy = super::interproc::select_merge_policy( outcome.exit_states.len(), !outcome.cutoff_reasons.is_empty(), ); let merged = super::interproc::merge_exit_states(&outcome.exit_states, policy); state.set(inst.value, merged.return_value); if merged.return_tainted { state.mark_tainted(inst.value); } // Apply heap delta: callee writes become visible to caller for mutation in &merged.heap_delta { state.heap_mut().store( mutation.key.clone(), mutation.value.clone(), mutation.tainted, ); } return; } } } // Try cross-file summary modeling before falling back to mk_call if let Some(ctx) = summary_ctx { if let Some(result) = resolve_callee_symbolically( ctx, callee, &arg_syms, &all_operands, state, inst.value, *receiver, ) { state.set(inst.value, result.value); if result.tainted { state.mark_tainted(inst.value); } return; } } // Fallback: opaque call let sym = mk_call(callee.clone(), arg_syms); state.set(inst.value, sym); state.propagate_taint(inst.value, &all_operands); } SsaOp::Phi(operands) => { let phi_ops: Vec<_> = operands .iter() .map(|(bid, v)| (*bid, state.get(*v))) .collect(); let operand_vals: Vec<_> = operands.iter().map(|(_, v)| *v).collect(); let sym = mk_phi(phi_ops); state.set(inst.value, sym); state.propagate_taint(inst.value, &operand_vals); } } } // ───────────────────────────────────────────────────────────────────────────── // Heap helpers // ───────────────────────────────────────────────────────────────────────────── /// Record a field store in the symbolic heap when the instruction defines /// a dotted path (e.g., `user.name`). fn try_heap_field_store( state: &mut SymbolicState, inst: &SsaInst, _ssa: &SsaBody, heap_ctx: Option<&SymexHeapCtx>, ) { let hctx = match heap_ctx { Some(hctx) => hctx, None => return, }; let vn = match inst.var_name.as_deref() { Some(vn) => vn, None => return, }; let (recv_name, field_name) = match heap::split_field_access(vn) { Some(pair) => pair, None => return, }; let recv_ssa = match heap::resolve_receiver_ssa(recv_name, hctx.ssa, inst.value) { Some(v) => v, None => return, }; let obj_id = match heap::resolve_singleton_object(recv_ssa, hctx.points_to) { Some(id) => id, None => return, }; let key = HeapKey { object: obj_id, field: FieldSlot::Named(field_name.to_string()), }; let sym = state.get(inst.value); let tainted = state.is_tainted(inst.value); state.heap_mut().store(key, sym, tainted); state.heap_mut().record_access(FieldAccessRecord { object_name: recv_name.to_string(), field_name: field_name.to_string(), ssa_value: inst.value, }); } /// Record a field access from a successful field-load pattern. fn try_heap_load_record( state: &mut SymbolicState, inst: &SsaInst, ssa: &SsaBody, _heap_ctx: Option<&SymexHeapCtx>, ) { // The uses[0] var_name has the dotted path. let uses = match &inst.op { SsaOp::Assign(u) => u, _ => return, }; if let Some(&first) = uses.first() { if let Some(def) = ssa.value_defs.get(first.0 as usize) { if let Some(ref dotted) = def.var_name { if let Some((recv_name, field_name)) = heap::split_field_access(dotted) { state.heap_mut().record_access(FieldAccessRecord { object_name: recv_name.to_string(), field_name: field_name.to_string(), ssa_value: inst.value, }); } } } } } /// Try to resolve a 2-use Assign via heap cross-alias lookup. /// /// When `inst.var_name` is a dotted path (e.g., `obj.field`) but the first /// operand doesn't have a dotted def (the alias case), check the heap via /// points-to resolution. Returns `true` if the heap provided a value. fn try_heap_alias_load( state: &mut SymbolicState, inst: &SsaInst, _ssa: &SsaBody, heap_ctx: Option<&SymexHeapCtx>, ) -> bool { let hctx = match heap_ctx { Some(hctx) => hctx, None => return false, }; let vn = match inst.var_name.as_deref() { Some(vn) => vn, None => return false, }; let (recv_name, field_name) = match heap::split_field_access(vn) { Some(pair) => pair, None => return false, }; let recv_ssa = match heap::resolve_receiver_ssa(recv_name, hctx.ssa, inst.value) { Some(v) => v, None => return false, }; let obj_id = match heap::resolve_singleton_object(recv_ssa, hctx.points_to) { Some(id) => id, None => return false, }; let key = HeapKey { object: obj_id, field: FieldSlot::Named(field_name.to_string()), }; let loaded = state.heap().load(&key); if matches!(loaded, SymbolicValue::Unknown) { return false; } state.set(inst.value, loaded); if state.heap().is_tainted(&key) { state.mark_tainted(inst.value); } state.heap_mut().record_access(FieldAccessRecord { object_name: recv_name.to_string(), field_name: field_name.to_string(), ssa_value: inst.value, }); true } /// Transfer a single SSA instruction with optional predecessor context. /// /// ONLY phi instructions use predecessor-sensitive selection, when /// `predecessor` is `Some(bid)`, the phi resolves to the operand from /// that specific predecessor block instead of building a `Phi(...)` /// expression. All non-phi instructions delegate to [`transfer_inst`]. pub fn transfer_inst_with_predecessor( state: &mut SymbolicState, inst: &SsaInst, cfg: &Cfg, ssa: &SsaBody, predecessor: Option, summary_ctx: Option<&SymexSummaryCtx>, heap_ctx: Option<&SymexHeapCtx>, interproc_ctx: Option<&super::interproc::InterprocCtx>, lang: Option, node_meta: Option< &std::collections::HashMap, >, ) { match (&inst.op, predecessor) { (SsaOp::Phi(operands), Some(pred)) => { let sym = state.resolve_phi_from_predecessor(operands, pred); state.set(inst.value, sym); // Taint: propagate only from the matched predecessor operand for (bid, v) in operands.iter() { if *bid == pred { state.propagate_taint(inst.value, &[*v]); return; } } // Predecessor not found among operands, propagate from all (fallback) let operand_vals: Vec<_> = operands.iter().map(|(_, v)| *v).collect(); state.propagate_taint(inst.value, &operand_vals); } _ => { transfer_inst( state, inst, cfg, ssa, summary_ctx, heap_ctx, interproc_ctx, lang, node_meta, ); } } } /// Transfer all instructions in a block with predecessor context. /// /// Phis use predecessor-aware transfer; body instructions use standard /// [`transfer_inst`]. See [`transfer_inst_with_predecessor`] for details. pub fn transfer_block_with_predecessor( state: &mut SymbolicState, block: &SsaBlock, cfg: &Cfg, ssa: &SsaBody, predecessor: Option, summary_ctx: Option<&SymexSummaryCtx>, heap_ctx: Option<&SymexHeapCtx>, interproc_ctx: Option<&super::interproc::InterprocCtx>, lang: Option, node_meta: Option< &std::collections::HashMap, >, ) { for inst in &block.phis { transfer_inst_with_predecessor( state, inst, cfg, ssa, predecessor, summary_ctx, heap_ctx, interproc_ctx, lang, node_meta, ); } for inst in &block.body { transfer_inst( state, inst, cfg, ssa, summary_ctx, heap_ctx, interproc_ctx, lang, node_meta, ); } } /// Transfer all instructions in a block: phis first, then body. pub fn transfer_block( state: &mut SymbolicState, block: &SsaBlock, cfg: &Cfg, ssa: &SsaBody, summary_ctx: Option<&SymexSummaryCtx>, heap_ctx: Option<&SymexHeapCtx>, interproc_ctx: Option<&super::interproc::InterprocCtx>, lang: Option, ) { for inst in &block.phis { transfer_inst( state, inst, cfg, ssa, summary_ctx, heap_ctx, interproc_ctx, lang, None, ); } for inst in &block.body { transfer_inst( state, inst, cfg, ssa, summary_ctx, heap_ctx, interproc_ctx, lang, None, ); } } // ───────────────────────────────────────────────────────────────────────────── // String method dispatch // ───────────────────────────────────────────────────────────────────────────── /// Attempt to model a callee as a recognized string operation. /// /// Returns `Some(SymbolicCallResult)` if the callee is a known string method /// with structurally-modelable arguments. Otherwise returns `None`. fn try_string_method( state: &SymbolicState, callee: &str, receiver: &Option, arg_syms: &[SymbolicValue], all_operands: &[SsaValue], lang: Option, ) -> Option { let lang = lang?; let info = classify_string_method(callee, arg_syms, lang)?; // Get the string operand based on the operand source let (string_sym, string_ssa) = match info.operand_source { StringOperandSource::Receiver => { let recv = (*receiver)?; (state.get(recv), recv) } StringOperandSource::FirstArg => { // For free functions, first arg is the string. // If receiver was prepended to arg_syms, it's at index 0; // otherwise first explicit arg is at index 0. if let Some(recv) = receiver { // Receiver was prepended, it IS the string operand (state.get(*recv), *recv) } else if let Some(&first_op) = all_operands.first() { ( arg_syms.first().cloned().unwrap_or(SymbolicValue::Unknown), first_op, ) } else { return None; } } }; // Build the structured SymbolicValue via smart constructors let value = match info.method { super::strings::StringMethod::Trim => mk_trim(string_sym), super::strings::StringMethod::ToLower => mk_to_lower(string_sym), super::strings::StringMethod::ToUpper => mk_to_upper(string_sym), super::strings::StringMethod::Replace { pattern, replacement, } => mk_replace(string_sym, pattern, replacement), super::strings::StringMethod::Substr => { // Extract start and end indices from args let arg_offset = match info.operand_source { StringOperandSource::Receiver => 1, // args[0] = receiver, args[1] = start StringOperandSource::FirstArg => { if receiver.is_some() { 1 } else { 1 } // args[0] = string, args[1] = start } }; let start = arg_syms .get(arg_offset) .cloned() .unwrap_or(SymbolicValue::Concrete(0)); let end = arg_syms.get(arg_offset + 1).cloned(); mk_substr(string_sym, start, end) } super::strings::StringMethod::StrLen => mk_strlen(string_sym), }; // Taint: string operations preserve taint from the string operand let tainted = state.is_tainted(string_ssa); Some(SymbolicCallResult { value, tainted }) } /// Recognize encoding/decoding transforms and build structured /// `Encode`/`Decode` nodes instead of opaque `Call`. /// /// Taint is always propagated from the operand, encoding preserves taint /// unconditionally. This function does NOT sanitize. fn try_transform_method( state: &SymbolicState, callee: &str, receiver: &Option, arg_syms: &[SymbolicValue], all_operands: &[SsaValue], lang: Option, ) -> Option { let lang = lang?; let info = classify_transform_method(callee, lang)?; // Extract the operand the same way as try_string_method let (operand_sym, operand_ssa) = match info.operand_source { StringOperandSource::Receiver => { let recv = (*receiver)?; (state.get(recv), recv) } StringOperandSource::FirstArg => { if let Some(recv) = receiver { (state.get(*recv), *recv) } else if let Some(&first_op) = all_operands.first() { ( arg_syms.first().cloned().unwrap_or(SymbolicValue::Unknown), first_op, ) } else { return None; } } }; // Build structured Encode or Decode node via smart constructors let value = match info.kind { TransformKind::Base64Decode | TransformKind::UrlDecode => mk_decode(info.kind, operand_sym), _ => mk_encode(info.kind, operand_sym), }; // Encoding preserves taint unconditionally let tainted = state.is_tainted(operand_ssa); Some(SymbolicCallResult { value, tainted }) } // ───────────────────────────────────────────────────────────────────────────── // Cross-file symbolic summary resolution // ───────────────────────────────────────────────────────────────────────────── /// Model a callee's return value from its SSA summary. /// /// Shared by both type-qualified and bare-name resolution paths. /// /// Resolution rules: /// - **Exactly one `Identity`**: pass through that argument's symbolic value /// - **Multiple `Identity` entries**: ambiguous → fall back (do NOT pick arbitrarily) /// - **`StripBits`**: sanitizer → `Unknown`, not tainted /// - **`AddBits` or `source_caps != empty`**: source → fresh tainted Symbol /// - **`NotFound` / `Ambiguous`**: hard fallback to mk_call fn model_from_summary( summary: &crate::summary::ssa_summary::SsaFuncSummary, arg_syms: &[SymbolicValue], all_operands: &[SsaValue], state: &SymbolicState, result_value: SsaValue, ) -> Option { // Check for source-producing function if !summary.source_caps.is_empty() { return Some(SymbolicCallResult { value: SymbolicValue::Symbol(result_value), tainted: true, }); } // Inspect param_to_return transforms if summary.param_to_return.is_empty() { return None; } // Collect identity mappings let identities: Vec<_> = summary .param_to_return .iter() .filter(|(_, t)| matches!(t, TaintTransform::Identity)) .collect(); // Check for StripBits (sanitizer) let has_strip = summary .param_to_return .iter() .any(|(_, t)| matches!(t, TaintTransform::StripBits(_))); // Check for AddBits (source introduction) let has_add = summary .param_to_return .iter() .any(|(_, t)| matches!(t, TaintTransform::AddBits(_))); if has_add { return Some(SymbolicCallResult { value: SymbolicValue::Symbol(result_value), tainted: true, }); } if has_strip && identities.is_empty() { return Some(SymbolicCallResult { value: SymbolicValue::Unknown, tainted: false, }); } if identities.len() == 1 { let (param_idx, _) = identities[0]; if let Some(sym) = arg_syms.get(*param_idx) { let is_tainted = all_operands .get(*param_idx) .map(|v| state.is_tainted(*v)) .unwrap_or(false); return Some(SymbolicCallResult { value: sym.clone(), tainted: is_tainted, }); } } // Multiple Identity entries or other ambiguous cases: fall back None } /// Attempt to resolve a callee's return value symbolically using its /// `SsaFuncSummary` from `GlobalSummaries`. /// /// Returns `Some(SymbolicCallResult)` if the summary provides actionable /// modeling. Returns `None` to fall through to the opaque `mk_call` path. /// /// When a receiver has a known type via type facts, tries type-qualified /// callee name (e.g., `"HttpClient.send"`) before bare-name resolution. This /// improves summary-based modeling only, not general virtual dispatch. fn resolve_callee_symbolically( ctx: &SymexSummaryCtx, callee: &str, arg_syms: &[SymbolicValue], all_operands: &[SsaValue], state: &SymbolicState, result_value: SsaValue, receiver: Option, ) -> Option { // Type-qualified symbolic resolution when receiver has a known type. // Improves summary-based modeling only, not general virtual dispatch. // Precedence: exact qualified > type-aided disambiguation > bare-name fallback. if let (Some(tf), Some(recv)) = (ctx.type_facts, receiver) && let Some(receiver_type) = tf.get_type(recv) && let Some(prefix) = receiver_type.label_prefix() { let method = crate::callgraph::callee_leaf_name(callee); let qualified = format!("{}.{}", prefix, method); // Attempt 1: Exact lookup under type-qualified name. // Arity=None to avoid receiver-in-operands vs formal-param mismatch. let resolution = ctx.global_summaries .resolve_callee_key(&qualified, ctx.lang, ctx.namespace, None); if let CalleeResolution::Resolved(key) = resolution && let Some(summary) = ctx.global_summaries.get_ssa(&key) { return model_from_summary(summary, arg_syms, all_operands, state, result_value); } // Attempt 2: Disambiguate among ambiguous bare-name candidates. // Only select when a candidate's FuncKey.name EXACTLY equals the // qualified name, no substring matching, never guess. let bare_resolution = ctx.global_summaries .resolve_callee_key(method, ctx.lang, ctx.namespace, None); if let CalleeResolution::Ambiguous(candidates) = bare_resolution { let exact_match: Vec<_> = candidates.iter().filter(|k| k.name == qualified).collect(); if exact_match.len() == 1 && let Some(summary) = ctx.global_summaries.get_ssa(exact_match[0]) { return model_from_summary(summary, arg_syms, all_operands, state, result_value); } // >1 or 0 exact matches: do NOT guess, fall through } // Fall through to existing bare-name resolution } // Existing bare-name resolution path let normalized = crate::callgraph::callee_leaf_name(callee); let resolution = ctx.global_summaries.resolve_callee_key( normalized, ctx.lang, ctx.namespace, Some(all_operands.len()), ); let key = match resolution { CalleeResolution::Resolved(k) => k, CalleeResolution::NotFound | CalleeResolution::Ambiguous(_) => return None, }; let summary = ctx.global_summaries.get_ssa(&key)?; model_from_summary(summary, arg_syms, all_operands, state, result_value) } // ───────────────────────────────────────────────────────────────────────────── // Tests // ───────────────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use crate::cfg::{BinOp, Cfg, NodeInfo, StmtKind}; use crate::ssa::ir::{BlockId, SsaBlock, SsaInst, SsaValue, Terminator}; use petgraph::graph::NodeIndex; use smallvec::smallvec; /// Create a minimal Cfg with a single node that has the given bin_op. fn cfg_with_node(bin_op: Option) -> (Cfg, NodeIndex) { let mut cfg = Cfg::new(); let info = NodeInfo { kind: StmtKind::Seq, bin_op, ..Default::default() }; let idx = cfg.add_node(info); (cfg, idx) } fn make_inst(value: u32, op: SsaOp, cfg_node: NodeIndex) -> SsaInst { SsaInst { value: SsaValue(value), op, cfg_node, var_name: None, span: (0, 0), } } fn empty_ssa() -> SsaBody { SsaBody { blocks: vec![], entry: BlockId(0), value_defs: vec![], cfg_node_map: std::collections::HashMap::new(), exception_edges: vec![], field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), slot_scoped_assigns: std::collections::HashSet::new(), } } #[test] fn transfer_const_int() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Const(Some("42".into())), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(0)), SymbolicValue::Concrete(42)); assert!(!state.is_tainted(SsaValue(0))); } #[test] fn transfer_const_string() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Const(Some("\"hello\"".into())), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!( state.get(SsaValue(0)), SymbolicValue::ConcreteStr("hello".into()) ); } #[test] fn transfer_const_bool_fallback() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Const(Some("true".into())), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(0)), SymbolicValue::Unknown); } #[test] fn transfer_const_none() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Const(None), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(0)), SymbolicValue::Unknown); } #[test] fn transfer_source_tainted() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Source, node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(0)), SymbolicValue::Symbol(SsaValue(0))); assert!(state.is_tainted(SsaValue(0))); } #[test] fn transfer_param_not_tainted() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Param { index: 0 }, node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(0)), SymbolicValue::Symbol(SsaValue(0))); assert!(!state.is_tainted(SsaValue(0))); } #[test] fn transfer_assign_copy() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // Set up source value state.set(SsaValue(0), SymbolicValue::Concrete(7)); state.mark_tainted(SsaValue(0)); let inst = make_inst(1, SsaOp::Assign(smallvec![SsaValue(0)]), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(1)), SymbolicValue::Concrete(7)); assert!(state.is_tainted(SsaValue(1))); } #[test] fn transfer_assign_binop() { let (cfg, node) = cfg_with_node(Some(BinOp::Mul)); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); state.set(SsaValue(1), SymbolicValue::Concrete(2)); let inst = make_inst(2, SsaOp::Assign(smallvec![SsaValue(0), SsaValue(1)]), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); let expected = SymbolicValue::BinOp( Op::Mul, Box::new(SymbolicValue::Symbol(SsaValue(0))), Box::new(SymbolicValue::Concrete(2)), ); assert_eq!(state.get(SsaValue(2)), expected); assert!(state.is_tainted(SsaValue(2))); } #[test] fn transfer_assign_no_binop_is_unknown() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.set(SsaValue(1), SymbolicValue::Concrete(2)); let inst = make_inst(2, SsaOp::Assign(smallvec![SsaValue(0), SsaValue(1)]), node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); assert_eq!(state.get(SsaValue(2)), SymbolicValue::Unknown); } #[test] fn transfer_call() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); let inst = make_inst( 1, SsaOp::Call { callee: "parseInt".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); let expected = SymbolicValue::Call("parseInt".into(), vec![SymbolicValue::Symbol(SsaValue(0))]); assert_eq!(state.get(SsaValue(1)), expected); assert!(state.is_tainted(SsaValue(1))); } #[test] fn transfer_call_with_receiver() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); // receiver state.set(SsaValue(1), SymbolicValue::Concrete(42)); // arg let inst = make_inst( 2, SsaOp::Call { callee: "send".into(), callee_text: None, args: vec![smallvec![SsaValue(1)]], receiver: Some(SsaValue(0)), }, node, ); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); let expected = SymbolicValue::Call( "send".into(), vec![ SymbolicValue::Symbol(SsaValue(0)), SymbolicValue::Concrete(42), ], ); assert_eq!(state.get(SsaValue(2)), expected); } #[test] fn transfer_phi() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Concrete(1)); state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); state.mark_tainted(SsaValue(1)); let inst = make_inst( 2, SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), node, ); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); let expected = SymbolicValue::Phi(vec![ (BlockId(0), SymbolicValue::Concrete(1)), (BlockId(1), SymbolicValue::Symbol(SsaValue(1))), ]); assert_eq!(state.get(SsaValue(2)), expected); assert!(state.is_tainted(SsaValue(2))); } #[test] fn taint_propagation_chain() { // Build a cfg with two nodes: one plain (for source/copy/const), one with Mul let mut cfg = Cfg::new(); let node_plain = cfg.add_node(NodeInfo { kind: StmtKind::Seq, ..Default::default() }); let node_mul = cfg.add_node(NodeInfo { kind: StmtKind::Seq, bin_op: Some(BinOp::Mul), ..Default::default() }); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // v0: source (tainted) let i0 = make_inst(0, SsaOp::Source, node_plain); transfer_inst(&mut state, &i0, &cfg, &ssa, None, None, None, None, None); assert!(state.is_tainted(SsaValue(0))); // v1: copy of v0 let i1 = make_inst(1, SsaOp::Assign(smallvec![SsaValue(0)]), node_plain); transfer_inst(&mut state, &i1, &cfg, &ssa, None, None, None, None, None); assert!(state.is_tainted(SsaValue(1))); // v2: constant (not tainted) let i2 = make_inst(2, SsaOp::Const(Some("3".into())), node_plain); transfer_inst(&mut state, &i2, &cfg, &ssa, None, None, None, None, None); assert!(!state.is_tainted(SsaValue(2))); // v3: v1 * v2 (tainted because v1 is tainted) let i3 = make_inst( 3, SsaOp::Assign(smallvec![SsaValue(1), SsaValue(2)]), node_mul, ); transfer_inst(&mut state, &i3, &cfg, &ssa, None, None, None, None, None); assert!(state.is_tainted(SsaValue(3))); let expected = SymbolicValue::BinOp( Op::Mul, Box::new(SymbolicValue::Symbol(SsaValue(0))), // v1 was a copy of v0 (Symbol) Box::new(SymbolicValue::Concrete(3)), ); assert_eq!(state.get(SsaValue(3)), expected); // v4: call using v3 (still tainted) let i4 = make_inst( 4, SsaOp::Call { callee: "toString".into(), callee_text: None, args: vec![smallvec![SsaValue(3)]], receiver: None, }, node_plain, ); transfer_inst(&mut state, &i4, &cfg, &ssa, None, None, None, None, None); assert!(state.is_tainted(SsaValue(4))); } #[test] fn transfer_nop_skipped() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Concrete(99)); let inst = make_inst(0, SsaOp::Nop, node); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); // Nop does not overwrite existing value assert_eq!(state.get(SsaValue(0)), SymbolicValue::Concrete(99)); } #[test] fn transfer_block_processes_phis_then_body() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // Set up predecessor values for phi state.set(SsaValue(0), SymbolicValue::Concrete(1)); state.set(SsaValue(1), SymbolicValue::Concrete(1)); let block = SsaBlock { id: BlockId(0), phis: vec![make_inst( 2, SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), node, )], body: vec![make_inst(3, SsaOp::Const(Some("10".into())), node)], terminator: Terminator::Return(None), preds: smallvec![], succs: smallvec![], }; transfer_block(&mut state, &block, &cfg, &ssa, None, None, None, None); // Phi with all-same should fold to Concrete(1) assert_eq!(state.get(SsaValue(2)), SymbolicValue::Concrete(1)); // Body const should be set assert_eq!(state.get(SsaValue(3)), SymbolicValue::Concrete(10)); } #[test] fn transfer_phi_with_predecessor_resolves_to_operand() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // Set up different values for each predecessor state.set(SsaValue(0), SymbolicValue::Concrete(10)); state.set(SsaValue(1), SymbolicValue::Concrete(20)); let inst = make_inst( 2, SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), node, ); // With predecessor B1, should resolve to SsaValue(1) → Concrete(20) transfer_inst_with_predecessor( &mut state, &inst, &cfg, &ssa, Some(BlockId(1)), None, None, None, None, None, ); assert_eq!(state.get(SsaValue(2)), SymbolicValue::Concrete(20)); } #[test] fn transfer_phi_with_predecessor_taint_from_selected_only() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // B0's operand is NOT tainted, B1's operand IS tainted state.set(SsaValue(0), SymbolicValue::Concrete(10)); state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); state.mark_tainted(SsaValue(1)); let inst = make_inst( 2, SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), node, ); // With predecessor B0 (untainted), result should NOT be tainted transfer_inst_with_predecessor( &mut state, &inst, &cfg, &ssa, Some(BlockId(0)), None, None, None, None, None, ); assert!(!state.is_tainted(SsaValue(2))); } #[test] fn transfer_phi_with_predecessor_taint_from_tainted_pred() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Concrete(10)); state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); state.mark_tainted(SsaValue(1)); let inst = make_inst( 2, SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), node, ); // With predecessor B1 (tainted), result SHOULD be tainted transfer_inst_with_predecessor( &mut state, &inst, &cfg, &ssa, Some(BlockId(1)), None, None, None, None, None, ); assert!(state.is_tainted(SsaValue(2))); } #[test] fn transfer_phi_without_predecessor_builds_phi_expr() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Concrete(10)); state.set(SsaValue(1), SymbolicValue::Concrete(20)); let inst = make_inst( 2, SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), node, ); // Without predecessor (None), falls back to Phi(...) expression transfer_inst_with_predecessor( &mut state, &inst, &cfg, &ssa, None, None, None, None, None, None, ); let expected = SymbolicValue::Phi(vec![ (BlockId(0), SymbolicValue::Concrete(10)), (BlockId(1), SymbolicValue::Concrete(20)), ]); assert_eq!(state.get(SsaValue(2)), expected); } #[test] fn transfer_non_phi_ignores_predecessor() { // Non-phi instructions should behave identically regardless of predecessor let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let inst = make_inst(0, SsaOp::Const(Some("42".into())), node); transfer_inst_with_predecessor( &mut state, &inst, &cfg, &ssa, Some(BlockId(5)), None, None, None, None, None, ); assert_eq!(state.get(SsaValue(0)), SymbolicValue::Concrete(42)); } // ─── Cross-file summary resolution tests ───────────────────────── use crate::labels::Cap; use crate::ssa::type_facts::TypeKind; use crate::summary::FuncSummary; use crate::summary::GlobalSummaries; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; use crate::symbol::{FuncKey, Lang}; fn make_summary_ctx(gs: &GlobalSummaries) -> SymexSummaryCtx<'_> { SymexSummaryCtx { global_summaries: gs, lang: Lang::JavaScript, namespace: "test.js", type_facts: None, } } fn make_func_key(name: &str, arity: usize) -> FuncKey { FuncKey { lang: Lang::JavaScript, namespace: "helper.js".into(), name: name.into(), arity: Some(arity), ..Default::default() } } /// Insert both a regular FuncSummary (for resolve_callee_key lookup) and /// an SsaFuncSummary (for the actual symbolic modeling). fn insert_summary(gs: &mut GlobalSummaries, name: &str, arity: usize, ssa: SsaFuncSummary) { let key = make_func_key(name, arity); // Regular summary needed for by_lang_name index used by resolve_callee_key gs.insert( key.clone(), FuncSummary { name: name.into(), file_path: "helper.js".into(), lang: "javascript".into(), param_count: arity, param_names: vec![], source_caps: 0, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }, ); gs.insert_ssa(key, ssa); } #[test] fn transfer_call_identity_summary() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // Arg v0 is tainted state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); // Build GlobalSummaries with exactly one Identity(param 0) let mut gs = GlobalSummaries::new(); insert_summary( &mut gs, "passthrough", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let ctx = make_summary_ctx(&gs); let inst = make_inst( 1, SsaOp::Call { callee: "passthrough".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, None, None, ); // Should pass through arg's symbolic value assert_eq!(state.get(SsaValue(1)), SymbolicValue::Symbol(SsaValue(0))); assert!(state.is_tainted(SsaValue(1))); } #[test] fn transfer_call_multiple_identity_fallback() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); state.set(SsaValue(1), SymbolicValue::Concrete(42)); // Two Identity entries, should fall back to mk_call, NOT pick one let mut gs = GlobalSummaries::new(); insert_summary( &mut gs, "ambig", 2, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity), (1, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let ctx = make_summary_ctx(&gs); let inst = make_inst( 2, SsaOp::Call { callee: "ambig".into(), callee_text: None, args: vec![smallvec![SsaValue(0)], smallvec![SsaValue(1)]], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, None, None, ); // Should fall back to Call expression, not Symbol pass-through match state.get(SsaValue(2)) { SymbolicValue::Call(name, _) => assert_eq!(name, "ambig"), other => panic!("expected Call fallback, got {:?}", other), } } #[test] fn transfer_call_stripbits_summary() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); let mut gs = GlobalSummaries::new(); insert_summary( &mut gs, "sanitize", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::StripBits(Cap::SQL_QUERY))], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let ctx = make_summary_ctx(&gs); let inst = make_inst( 1, SsaOp::Call { callee: "sanitize".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, None, None, ); // StripBits → Unknown, not tainted assert_eq!(state.get(SsaValue(1)), SymbolicValue::Unknown); assert!(!state.is_tainted(SsaValue(1))); } #[test] fn transfer_call_addbits_summary() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let mut gs = GlobalSummaries::new(); insert_summary( &mut gs, "enrich", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::AddBits(Cap::ENV_VAR))], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let ctx = make_summary_ctx(&gs); let inst = make_inst( 1, SsaOp::Call { callee: "enrich".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, None, None, ); // AddBits → fresh Symbol, tainted assert_eq!(state.get(SsaValue(1)), SymbolicValue::Symbol(SsaValue(1))); assert!(state.is_tainted(SsaValue(1))); } #[test] fn transfer_call_source_summary() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); let mut gs = GlobalSummaries::new(); insert_summary( &mut gs, "readEnv", 0, SsaFuncSummary { param_to_return: vec![], param_to_sink: vec![], source_caps: Cap::ENV_VAR, param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let ctx = make_summary_ctx(&gs); let inst = make_inst( 0, SsaOp::Call { callee: "readEnv".into(), callee_text: None, args: vec![], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, None, None, ); // source_caps non-empty → tainted Symbol assert_eq!(state.get(SsaValue(0)), SymbolicValue::Symbol(SsaValue(0))); assert!(state.is_tainted(SsaValue(0))); } #[test] fn transfer_call_no_summary_fallback() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); // Empty GlobalSummaries → NotFound → mk_call fallback let gs = GlobalSummaries::new(); let ctx = make_summary_ctx(&gs); let inst = make_inst( 1, SsaOp::Call { callee: "unknown_func".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, None, None, ); match state.get(SsaValue(1)) { SymbolicValue::Call(name, _) => assert_eq!(name, "unknown_func"), other => panic!("expected Call fallback, got {:?}", other), } } #[test] fn transfer_call_none_summary_ctx_fallback() { let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); // No summary ctx at all → mk_call let inst = make_inst( 1, SsaOp::Call { callee: "foo".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None); match state.get(SsaValue(1)) { SymbolicValue::Call(name, _) => assert_eq!(name, "foo"), other => panic!("expected Call fallback, got {:?}", other), } assert!(state.is_tainted(SsaValue(1))); } // ─── Type-qualified symbolic resolution tests ────────── use crate::ssa::type_facts::{TypeFact, TypeFactResult}; use std::collections::HashMap; fn make_type_facts(entries: Vec<(SsaValue, TypeKind)>) -> TypeFactResult { let facts = entries .into_iter() .map(|(v, kind)| { ( v, TypeFact { kind, nullable: false, }, ) }) .collect::>(); TypeFactResult { facts } } fn insert_java_summary( gs: &mut GlobalSummaries, name: &str, namespace: &str, arity: usize, ssa: SsaFuncSummary, ) { let key = FuncKey { lang: Lang::Java, namespace: namespace.into(), name: name.into(), arity: Some(arity), ..Default::default() }; gs.insert( key.clone(), FuncSummary { name: name.into(), file_path: namespace.into(), lang: "java".into(), param_count: arity, param_names: vec![], source_caps: 0, sanitizer_caps: 0, sink_caps: 0, propagating_params: vec![], propagates_taint: false, tainted_sink_params: vec![], callees: vec![], ..Default::default() }, ); gs.insert_ssa(key, ssa); } #[test] fn transfer_call_type_qualified_resolution() { // Receiver v1 typed as HttpClient, callee "send" → qualified "HttpClient.send" // Summary registered under "HttpClient.send" should be found. let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); // v0 = tainted URL argument state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); // v1 = receiver (HttpClient instance) state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); let mut gs = GlobalSummaries::new(); insert_java_summary( &mut gs, "HttpClient.send", "HttpClient.java", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]); let ctx = SymexSummaryCtx { global_summaries: &gs, lang: Lang::Java, namespace: "Caller.java", type_facts: Some(&tf), }; // v2 = v1.send(v0) let inst = make_inst( 2, SsaOp::Call { callee: "send".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: Some(SsaValue(1)), }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, Some(Lang::Java), None, ); // Identity(0) maps to arg_syms[0] which is the receiver (prepended). // So return value should be the receiver's symbolic value. assert_eq!(state.get(SsaValue(2)), SymbolicValue::Symbol(SsaValue(1))); } #[test] fn transfer_call_type_qualified_fallback_no_type() { // Receiver has no known type → type-qualified resolution does not fire, // bare-name resolution works normally. let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); // Register summary under bare name "passthrough" (Java, arity 1) let mut gs = GlobalSummaries::new(); insert_java_summary( &mut gs, "passthrough", "helper.java", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); // Empty type facts, no receiver type info let tf = make_type_facts(vec![]); let ctx = SymexSummaryCtx { global_summaries: &gs, lang: Lang::Java, namespace: "test.java", type_facts: Some(&tf), }; let inst = make_inst( 1, SsaOp::Call { callee: "passthrough".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: None, }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, Some(Lang::Java), None, ); // Bare-name resolution: Identity(0) → pass through arg assert_eq!(state.get(SsaValue(1)), SymbolicValue::Symbol(SsaValue(0))); assert!(state.is_tainted(SsaValue(1))); } #[test] fn transfer_call_type_qualified_disambiguation() { // Two summaries both named "send" in different namespaces. // One named "HttpClient.send", type disambiguation picks it. let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.mark_tainted(SsaValue(0)); state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); let mut gs = GlobalSummaries::new(); // First "send", generic, in ns A (Identity: passes through) insert_java_summary( &mut gs, "send", "SocketClient.java", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); // Second "send", in ns B, also with same arity → ambiguous bare-name insert_java_summary( &mut gs, "send", "WebSocketClient.java", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); // Also register the type-qualified name so Attempt 1 can find it insert_java_summary( &mut gs, "HttpClient.send", "HttpClient.java", 1, SsaFuncSummary { param_to_return: vec![], param_to_sink: vec![], source_caps: Cap::ENV_VAR, // Source, distinct signal param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]); let ctx = SymexSummaryCtx { global_summaries: &gs, lang: Lang::Java, namespace: "Caller.java", type_facts: Some(&tf), }; // v2 = v1.send(v0), receiver v1 is HttpClient let inst = make_inst( 2, SsaOp::Call { callee: "send".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: Some(SsaValue(1)), }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, Some(Lang::Java), None, ); // Should resolve to "HttpClient.send" summary (source_caps=ENV_VAR → tainted Symbol) assert_eq!(state.get(SsaValue(2)), SymbolicValue::Symbol(SsaValue(2))); assert!(state.is_tainted(SsaValue(2))); } #[test] fn transfer_call_type_qualified_wrong_owner() { // Receiver is HttpClient, but summary is registered as "DatabaseConnection.send". // Must NOT resolve to the wrong summary. let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); let mut gs = GlobalSummaries::new(); // Summary under "DatabaseConnection.send", wrong type insert_java_summary( &mut gs, "DatabaseConnection.send", "DatabaseConnection.java", 1, SsaFuncSummary { param_to_return: vec![], param_to_sink: vec![], source_caps: Cap::ENV_VAR, param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); // Receiver typed as HttpClient, constructs "HttpClient.send", not "DatabaseConnection.send" let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]); let ctx = SymexSummaryCtx { global_summaries: &gs, lang: Lang::Java, namespace: "Caller.java", type_facts: Some(&tf), }; let inst = make_inst( 2, SsaOp::Call { callee: "send".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: Some(SsaValue(1)), }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, Some(Lang::Java), None, ); // "HttpClient.send" not found, bare "send" not found → opaque mk_call fallback match state.get(SsaValue(2)) { SymbolicValue::Call(name, _) => assert_eq!(name, "send"), other => panic!("expected Call fallback, got {:?}", other), } } #[test] fn transfer_call_type_qualified_ambiguous_no_force() { // Ambiguous bare-name candidates, receiver type known, but no candidate's // name exactly matches the qualified name → must NOT force-pick. let (cfg, node) = cfg_with_node(None); let ssa = empty_ssa(); let mut state = SymbolicState::new(); state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1))); let mut gs = GlobalSummaries::new(); // Two "send" summaries, different namespaces → ambiguous insert_java_summary( &mut gs, "send", "ModuleA.java", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::Identity)], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); insert_java_summary( &mut gs, "send", "ModuleB.java", 1, SsaFuncSummary { param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))], param_to_sink: vec![], source_caps: Cap::empty(), param_to_sink_param: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), return_path_facts: smallvec::SmallVec::new(), typed_call_receivers: vec![], validated_params_to_return: smallvec::SmallVec::new(), param_to_gate_filters: vec![], entry_kind: None, }, ); // No "HttpClient.send" summary registered, disambiguation has 0 exact matches let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]); let ctx = SymexSummaryCtx { global_summaries: &gs, lang: Lang::Java, namespace: "Caller.java", type_facts: Some(&tf), }; let inst = make_inst( 2, SsaOp::Call { callee: "send".into(), callee_text: None, args: vec![smallvec![SsaValue(0)]], receiver: Some(SsaValue(1)), }, node, ); transfer_inst( &mut state, &inst, &cfg, &ssa, Some(&ctx), None, None, Some(Lang::Java), None, ); // Neither qualified lookup nor disambiguation found a match. // Bare-name path returns Ambiguous → falls through to mk_call. match state.get(SsaValue(2)) { SymbolicValue::Call(name, _) => assert_eq!(name, "send"), other => panic!("expected Call fallback for ambiguous case, got {:?}", other), } } }