mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-06 19:35:13 +02:00
2537 lines
88 KiB
Rust
2537 lines
88 KiB
Rust
//! Forward symbolic transfer over SSA instructions.
|
|
//!
|
|
//! Walks SSA blocks and builds `SymbolicValue` expression trees for each
|
|
//! defined SSA value, while eagerly propagating taint through the root-set.
|
|
//!
|
|
//! Cross-file symbolic summary modeling: when a callee has an
|
|
//! `SsaFuncSummary` available via `GlobalSummaries`, the Call instruction's
|
|
//! return value is modeled symbolically instead of being treated as opaque.
|
|
#![allow(
|
|
clippy::collapsible_if,
|
|
clippy::if_same_then_else,
|
|
clippy::too_many_arguments
|
|
)]
|
|
|
|
use crate::cfg::Cfg;
|
|
use crate::ssa::const_prop::ConstLattice;
|
|
use crate::ssa::heap::PointsToResult;
|
|
use crate::ssa::ir::{BlockId, SsaBlock, SsaBody, SsaInst, SsaOp, SsaValue};
|
|
use crate::ssa::pointsto::{ContainerOp, classify_container_op};
|
|
use crate::ssa::type_facts::TypeFactResult;
|
|
use crate::summary::ssa_summary::TaintTransform;
|
|
use crate::summary::{CalleeResolution, GlobalSummaries};
|
|
use crate::symbol::Lang;
|
|
|
|
use super::heap::{self, FieldAccessRecord, FieldSlot, HeapKey};
|
|
use super::state::SymbolicState;
|
|
use super::strings::{
|
|
StringOperandSource, TransformKind, classify_string_method, classify_transform_method,
|
|
};
|
|
use super::value::{
|
|
Op, SymbolicValue, mk_binop, mk_call, mk_decode, mk_encode, mk_phi, mk_replace, mk_strlen,
|
|
mk_substr, mk_to_lower, mk_to_upper, mk_trim,
|
|
};
|
|
|
|
/// Context for cross-file symbolic summary modeling during transfer.
|
|
///
|
|
/// When provided, Call instructions attempt to resolve callee behavior
|
|
/// via `SsaFuncSummary` before falling back to the opaque `mk_call`.
|
|
pub struct SymexSummaryCtx<'a> {
|
|
pub global_summaries: &'a GlobalSummaries,
|
|
pub lang: Lang,
|
|
pub namespace: &'a str,
|
|
/// Type facts for type-qualified symbolic summary resolution.
|
|
/// When present, receiver types guide callee name qualification.
|
|
pub type_facts: Option<&'a TypeFactResult>,
|
|
}
|
|
|
|
/// Context for field-sensitive heap operations during transfer.
|
|
///
|
|
/// When provided, Assign and Call instructions attempt store/load operations
|
|
/// through the symbolic heap using allocation-site identities from points-to.
|
|
/// `const_values` enables per-index array slot resolution.
|
|
pub struct SymexHeapCtx<'a> {
|
|
pub points_to: &'a PointsToResult,
|
|
pub ssa: &'a SsaBody,
|
|
pub lang: Lang,
|
|
pub const_values: &'a std::collections::HashMap<SsaValue, ConstLattice>,
|
|
}
|
|
|
|
/// Result of resolving a callee symbolically via its summary.
|
|
struct SymbolicCallResult {
|
|
value: SymbolicValue,
|
|
tainted: bool,
|
|
}
|
|
|
|
/// Transfer a single SSA instruction: set the symbolic value and propagate taint.
|
|
pub fn transfer_inst(
|
|
state: &mut SymbolicState,
|
|
inst: &SsaInst,
|
|
cfg: &Cfg,
|
|
ssa: &SsaBody,
|
|
summary_ctx: Option<&SymexSummaryCtx>,
|
|
heap_ctx: Option<&SymexHeapCtx>,
|
|
interproc_ctx: Option<&super::interproc::InterprocCtx>,
|
|
lang: Option<Lang>,
|
|
node_meta: Option<
|
|
&std::collections::HashMap<u32, crate::taint::ssa_transfer::CrossFileNodeMeta>,
|
|
>,
|
|
) {
|
|
match &inst.op {
|
|
SsaOp::Const(text) => {
|
|
let sym = match text {
|
|
Some(t) => match ConstLattice::parse(t) {
|
|
ConstLattice::Int(n) => SymbolicValue::Concrete(n),
|
|
ConstLattice::Str(s) => SymbolicValue::ConcreteStr(s),
|
|
_ => SymbolicValue::Unknown, // Bool, Null, Top, Varying
|
|
},
|
|
None => SymbolicValue::Unknown,
|
|
};
|
|
state.set(inst.value, sym);
|
|
}
|
|
|
|
SsaOp::Source => {
|
|
state.set(inst.value, SymbolicValue::Symbol(inst.value));
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
|
|
SsaOp::Param { .. } => {
|
|
// Params are symbolic inputs but NOT tainted by default.
|
|
// Taint seeding happens via finding.flow_steps in analyse_finding_path.
|
|
state.set(inst.value, SymbolicValue::Symbol(inst.value));
|
|
}
|
|
|
|
SsaOp::SelfParam => {
|
|
// Implicit method receiver, symbolic input, not tainted by default.
|
|
state.set(inst.value, SymbolicValue::Symbol(inst.value));
|
|
}
|
|
|
|
SsaOp::CatchParam => {
|
|
if let Some(exc_val) = state.take_exception_context() {
|
|
// On an exception path, seed from exception context
|
|
// and mark tainted (matches taint engine: CatchParam gets Cap::all())
|
|
state.set(inst.value, exc_val);
|
|
state.mark_tainted(inst.value);
|
|
} else {
|
|
// Normal path or no explicit exception context, still mark tainted
|
|
// to match taint engine behavior (ssa_transfer.rs CatchParam gets Cap::all())
|
|
state.set(inst.value, SymbolicValue::Symbol(inst.value));
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
}
|
|
|
|
SsaOp::Nop => {
|
|
// Nop does not define a meaningful value, skip.
|
|
}
|
|
|
|
SsaOp::Undef => {
|
|
// Phi-operand sentinel for edges without a reaching
|
|
// definition. No concrete value, no taint.
|
|
state.set(inst.value, SymbolicValue::Unknown);
|
|
}
|
|
|
|
SsaOp::FieldProj { receiver, .. } => {
|
|
// Symbolic field read: model `obj.field` as an opaque value
|
|
// tied to the projection's SsaValue, and propagate the
|
|
// receiver's taint to the result so flat root-set tracking
|
|
// continues to flow taint through chained accesses.
|
|
//
|
|
// This pass deliberately keeps the opaque-Symbol model: without
|
|
// a field-sensitive heap, a dedicated `Field { receiver, name }`
|
|
// SymbolicValue variant cannot soundly carry concrete reads
|
|
// across method boundaries, the witness pipeline already
|
|
// reconstructs `obj.field` text from `ValueDef.var_name`
|
|
// (populated by lower.rs to `"base.f1.f2"` for chain projections).
|
|
// The structured variant is deferred to the field-sensitive
|
|
// pointer analysis prompt, where heap loads consume `FieldProj`
|
|
// directly.
|
|
state.set(inst.value, SymbolicValue::Symbol(inst.value));
|
|
state.propagate_taint(inst.value, std::slice::from_ref(receiver));
|
|
}
|
|
|
|
SsaOp::Assign(uses) => {
|
|
let uses_slice: &[_] = uses;
|
|
match uses_slice.len() {
|
|
0 => {
|
|
state.set(inst.value, SymbolicValue::Unknown);
|
|
}
|
|
1 => {
|
|
// Copy
|
|
let sym = state.get(uses_slice[0]);
|
|
state.set(inst.value, sym);
|
|
state.propagate_taint(inst.value, uses_slice);
|
|
}
|
|
2 => {
|
|
// Field-load pattern detection.
|
|
// When RHS is a member expression, SSA produces 2 uses:
|
|
// uses[0] = dotted-path SSA value (e.g., v for "user.name")
|
|
// uses[1] = base variable SSA value (e.g., v for "user")
|
|
// The first operand IS the field value, use it directly.
|
|
if let Some(def) = ssa.value_defs.get(uses_slice[0].0 as usize) {
|
|
if def.var_name.as_ref().is_some_and(|n| n.contains('.')) {
|
|
let sym = state.get(uses_slice[0]);
|
|
state.set(inst.value, sym);
|
|
state.propagate_taint(inst.value, uses_slice);
|
|
// Record heap load for cross-alias + witness
|
|
try_heap_load_record(state, inst, ssa, heap_ctx);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Heap-based cross-alias load fallback.
|
|
// If the instruction defines a dotted path but the first
|
|
// operand doesn't have a dotted var_name (aliased object),
|
|
// try loading from the symbolic heap via points-to.
|
|
if try_heap_alias_load(state, inst, ssa, heap_ctx) {
|
|
state.propagate_taint(inst.value, uses_slice);
|
|
return;
|
|
}
|
|
|
|
// Check for binary op metadata on the CFG node
|
|
let bin_op_val = if let Some(meta) = node_meta {
|
|
meta.get(&(inst.cfg_node.index() as u32))
|
|
.and_then(|m| m.info.bin_op)
|
|
} else {
|
|
cfg[inst.cfg_node].bin_op
|
|
};
|
|
if let Some(bin_op) = bin_op_val {
|
|
let lhs = state.get(uses_slice[0]);
|
|
let rhs = state.get(uses_slice[1]);
|
|
let sym = mk_binop(Op::from(bin_op), lhs, rhs);
|
|
state.set(inst.value, sym);
|
|
} else {
|
|
// No structural info, conservative Unknown
|
|
state.set(inst.value, SymbolicValue::Unknown);
|
|
}
|
|
state.propagate_taint(inst.value, uses_slice);
|
|
}
|
|
_ => {
|
|
// 3+ operands, complex expression
|
|
state.set(inst.value, SymbolicValue::Unknown);
|
|
state.propagate_taint(inst.value, uses_slice);
|
|
}
|
|
}
|
|
|
|
// If this instruction defines a dotted path, record
|
|
// the store in the symbolic heap for cross-alias resolution.
|
|
try_heap_field_store(state, inst, ssa, heap_ctx);
|
|
}
|
|
|
|
SsaOp::Call {
|
|
callee,
|
|
args,
|
|
receiver,
|
|
..
|
|
} => {
|
|
// Collect symbolic values for arguments
|
|
let mut arg_syms: Vec<SymbolicValue> = Vec::new();
|
|
let mut all_operands: Vec<_> = Vec::new();
|
|
|
|
if let Some(recv) = receiver {
|
|
arg_syms.push(state.get(*recv));
|
|
all_operands.push(*recv);
|
|
}
|
|
|
|
for arg_slot in args {
|
|
if let Some(&first_val) = arg_slot.first() {
|
|
arg_syms.push(state.get(first_val));
|
|
all_operands.push(first_val);
|
|
}
|
|
}
|
|
|
|
// Container store/load via symbolic heap.
|
|
// Resolve index_arg via const_values for per-index precision when
|
|
// the index is a known constant.
|
|
if let Some(hctx) = heap_ctx {
|
|
if let Some(container_op) = classify_container_op(callee, hctx.lang) {
|
|
let recv_obj = receiver
|
|
.and_then(|rv| hctx.points_to.get(rv))
|
|
.filter(|pts| pts.len() == 1)
|
|
.and_then(|pts| pts.iter().next().copied());
|
|
|
|
if let Some(obj_id) = recv_obj {
|
|
match container_op {
|
|
ContainerOp::Store {
|
|
ref value_args,
|
|
index_arg,
|
|
} => {
|
|
let field = index_arg
|
|
.and_then(|pos| {
|
|
args.get(pos).and_then(|slot| slot.first()).map(|&v| {
|
|
heap::resolve_index_slot(v, hctx.const_values)
|
|
})
|
|
})
|
|
.unwrap_or(FieldSlot::Elements);
|
|
let key = HeapKey {
|
|
object: obj_id,
|
|
field,
|
|
};
|
|
|
|
let val_sym = value_args
|
|
.first()
|
|
.and_then(|&idx| args.get(idx))
|
|
.and_then(|slot| slot.first())
|
|
.map(|&v| state.get(v))
|
|
.unwrap_or(SymbolicValue::Unknown);
|
|
let any_tainted = value_args.iter().any(|&idx| {
|
|
args.get(idx)
|
|
.and_then(|slot| slot.first())
|
|
.map(|&v| state.is_tainted(v))
|
|
.unwrap_or(false)
|
|
});
|
|
state.heap_mut().store(key, val_sym, any_tainted);
|
|
// Fall through to normal Call for return value
|
|
}
|
|
ContainerOp::Load { index_arg } => {
|
|
let field = index_arg
|
|
.and_then(|pos| {
|
|
args.get(pos).and_then(|slot| slot.first()).map(|&v| {
|
|
heap::resolve_index_slot(v, hctx.const_values)
|
|
})
|
|
})
|
|
.unwrap_or(FieldSlot::Elements);
|
|
let key = HeapKey {
|
|
object: obj_id,
|
|
field,
|
|
};
|
|
|
|
let loaded = state.heap().load(&key);
|
|
if !matches!(loaded, SymbolicValue::Unknown) {
|
|
state.set(inst.value, loaded);
|
|
if state.heap().is_tainted(&key) {
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
return;
|
|
}
|
|
// Fall through to normal Call
|
|
}
|
|
ContainerOp::Writeback { .. } => {
|
|
// Symex doesn't model writeback yet, taint
|
|
// engine handles the destination-arg taint
|
|
// directly. Fall through to normal Call.
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// String method recognition
|
|
if let Some(result) =
|
|
try_string_method(state, callee, receiver, &arg_syms, &all_operands, lang)
|
|
{
|
|
state.set(inst.value, result.value);
|
|
if result.tainted {
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Encoding/decoding transform recognition
|
|
if let Some(result) =
|
|
try_transform_method(state, callee, receiver, &arg_syms, &all_operands, lang)
|
|
{
|
|
state.set(inst.value, result.value);
|
|
if result.tainted {
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Interprocedural symbolic execution.
|
|
// Execute callee body when available, full state propagation.
|
|
if let Some(ictx) = interproc_ctx {
|
|
let mut callee_args: Vec<(crate::ssa::ir::SsaValue, SymbolicValue, bool)> =
|
|
Vec::new();
|
|
for (i, op) in all_operands.iter().enumerate() {
|
|
callee_args.push((
|
|
*op,
|
|
arg_syms.get(i).cloned().unwrap_or(SymbolicValue::Unknown),
|
|
state.is_tainted(*op),
|
|
));
|
|
}
|
|
if let Some(outcome) = super::interproc::execute_callee(
|
|
ictx,
|
|
callee,
|
|
&callee_args,
|
|
state.heap(),
|
|
0, // depth: caller is at depth 0
|
|
&[],
|
|
summary_ctx,
|
|
heap_ctx,
|
|
) {
|
|
if !outcome.exit_states.is_empty() {
|
|
let policy = super::interproc::select_merge_policy(
|
|
outcome.exit_states.len(),
|
|
!outcome.cutoff_reasons.is_empty(),
|
|
);
|
|
let merged =
|
|
super::interproc::merge_exit_states(&outcome.exit_states, policy);
|
|
state.set(inst.value, merged.return_value);
|
|
if merged.return_tainted {
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
// Apply heap delta: callee writes become visible to caller
|
|
for mutation in &merged.heap_delta {
|
|
state.heap_mut().store(
|
|
mutation.key.clone(),
|
|
mutation.value.clone(),
|
|
mutation.tainted,
|
|
);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Try cross-file summary modeling before falling back to mk_call
|
|
if let Some(ctx) = summary_ctx {
|
|
if let Some(result) = resolve_callee_symbolically(
|
|
ctx,
|
|
callee,
|
|
&arg_syms,
|
|
&all_operands,
|
|
state,
|
|
inst.value,
|
|
*receiver,
|
|
) {
|
|
state.set(inst.value, result.value);
|
|
if result.tainted {
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Fallback: opaque call
|
|
let sym = mk_call(callee.clone(), arg_syms);
|
|
state.set(inst.value, sym);
|
|
state.propagate_taint(inst.value, &all_operands);
|
|
}
|
|
|
|
SsaOp::Phi(operands) => {
|
|
let phi_ops: Vec<_> = operands
|
|
.iter()
|
|
.map(|(bid, v)| (*bid, state.get(*v)))
|
|
.collect();
|
|
let operand_vals: Vec<_> = operands.iter().map(|(_, v)| *v).collect();
|
|
|
|
let sym = mk_phi(phi_ops);
|
|
state.set(inst.value, sym);
|
|
state.propagate_taint(inst.value, &operand_vals);
|
|
}
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Heap helpers
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/// Record a field store in the symbolic heap when the instruction defines
|
|
/// a dotted path (e.g., `user.name`).
|
|
fn try_heap_field_store(
|
|
state: &mut SymbolicState,
|
|
inst: &SsaInst,
|
|
_ssa: &SsaBody,
|
|
heap_ctx: Option<&SymexHeapCtx>,
|
|
) {
|
|
let hctx = match heap_ctx {
|
|
Some(hctx) => hctx,
|
|
None => return,
|
|
};
|
|
let vn = match inst.var_name.as_deref() {
|
|
Some(vn) => vn,
|
|
None => return,
|
|
};
|
|
let (recv_name, field_name) = match heap::split_field_access(vn) {
|
|
Some(pair) => pair,
|
|
None => return,
|
|
};
|
|
let recv_ssa = match heap::resolve_receiver_ssa(recv_name, hctx.ssa, inst.value) {
|
|
Some(v) => v,
|
|
None => return,
|
|
};
|
|
let obj_id = match heap::resolve_singleton_object(recv_ssa, hctx.points_to) {
|
|
Some(id) => id,
|
|
None => return,
|
|
};
|
|
|
|
let key = HeapKey {
|
|
object: obj_id,
|
|
field: FieldSlot::Named(field_name.to_string()),
|
|
};
|
|
let sym = state.get(inst.value);
|
|
let tainted = state.is_tainted(inst.value);
|
|
state.heap_mut().store(key, sym, tainted);
|
|
state.heap_mut().record_access(FieldAccessRecord {
|
|
object_name: recv_name.to_string(),
|
|
field_name: field_name.to_string(),
|
|
ssa_value: inst.value,
|
|
});
|
|
}
|
|
|
|
/// Record a field access from a successful field-load pattern.
|
|
fn try_heap_load_record(
|
|
state: &mut SymbolicState,
|
|
inst: &SsaInst,
|
|
ssa: &SsaBody,
|
|
_heap_ctx: Option<&SymexHeapCtx>,
|
|
) {
|
|
// The uses[0] var_name has the dotted path.
|
|
let uses = match &inst.op {
|
|
SsaOp::Assign(u) => u,
|
|
_ => return,
|
|
};
|
|
if let Some(&first) = uses.first() {
|
|
if let Some(def) = ssa.value_defs.get(first.0 as usize) {
|
|
if let Some(ref dotted) = def.var_name {
|
|
if let Some((recv_name, field_name)) = heap::split_field_access(dotted) {
|
|
state.heap_mut().record_access(FieldAccessRecord {
|
|
object_name: recv_name.to_string(),
|
|
field_name: field_name.to_string(),
|
|
ssa_value: inst.value,
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Try to resolve a 2-use Assign via heap cross-alias lookup.
|
|
///
|
|
/// When `inst.var_name` is a dotted path (e.g., `obj.field`) but the first
|
|
/// operand doesn't have a dotted def (the alias case), check the heap via
|
|
/// points-to resolution. Returns `true` if the heap provided a value.
|
|
fn try_heap_alias_load(
|
|
state: &mut SymbolicState,
|
|
inst: &SsaInst,
|
|
_ssa: &SsaBody,
|
|
heap_ctx: Option<&SymexHeapCtx>,
|
|
) -> bool {
|
|
let hctx = match heap_ctx {
|
|
Some(hctx) => hctx,
|
|
None => return false,
|
|
};
|
|
let vn = match inst.var_name.as_deref() {
|
|
Some(vn) => vn,
|
|
None => return false,
|
|
};
|
|
let (recv_name, field_name) = match heap::split_field_access(vn) {
|
|
Some(pair) => pair,
|
|
None => return false,
|
|
};
|
|
let recv_ssa = match heap::resolve_receiver_ssa(recv_name, hctx.ssa, inst.value) {
|
|
Some(v) => v,
|
|
None => return false,
|
|
};
|
|
let obj_id = match heap::resolve_singleton_object(recv_ssa, hctx.points_to) {
|
|
Some(id) => id,
|
|
None => return false,
|
|
};
|
|
|
|
let key = HeapKey {
|
|
object: obj_id,
|
|
field: FieldSlot::Named(field_name.to_string()),
|
|
};
|
|
let loaded = state.heap().load(&key);
|
|
if matches!(loaded, SymbolicValue::Unknown) {
|
|
return false;
|
|
}
|
|
state.set(inst.value, loaded);
|
|
if state.heap().is_tainted(&key) {
|
|
state.mark_tainted(inst.value);
|
|
}
|
|
state.heap_mut().record_access(FieldAccessRecord {
|
|
object_name: recv_name.to_string(),
|
|
field_name: field_name.to_string(),
|
|
ssa_value: inst.value,
|
|
});
|
|
true
|
|
}
|
|
|
|
/// Transfer a single SSA instruction with optional predecessor context.
|
|
///
|
|
/// ONLY phi instructions use predecessor-sensitive selection, when
|
|
/// `predecessor` is `Some(bid)`, the phi resolves to the operand from
|
|
/// that specific predecessor block instead of building a `Phi(...)`
|
|
/// expression. All non-phi instructions delegate to [`transfer_inst`].
|
|
pub fn transfer_inst_with_predecessor(
|
|
state: &mut SymbolicState,
|
|
inst: &SsaInst,
|
|
cfg: &Cfg,
|
|
ssa: &SsaBody,
|
|
predecessor: Option<BlockId>,
|
|
summary_ctx: Option<&SymexSummaryCtx>,
|
|
heap_ctx: Option<&SymexHeapCtx>,
|
|
interproc_ctx: Option<&super::interproc::InterprocCtx>,
|
|
lang: Option<Lang>,
|
|
node_meta: Option<
|
|
&std::collections::HashMap<u32, crate::taint::ssa_transfer::CrossFileNodeMeta>,
|
|
>,
|
|
) {
|
|
match (&inst.op, predecessor) {
|
|
(SsaOp::Phi(operands), Some(pred)) => {
|
|
let sym = state.resolve_phi_from_predecessor(operands, pred);
|
|
state.set(inst.value, sym);
|
|
// Taint: propagate only from the matched predecessor operand
|
|
for (bid, v) in operands.iter() {
|
|
if *bid == pred {
|
|
state.propagate_taint(inst.value, &[*v]);
|
|
return;
|
|
}
|
|
}
|
|
// Predecessor not found among operands, propagate from all (fallback)
|
|
let operand_vals: Vec<_> = operands.iter().map(|(_, v)| *v).collect();
|
|
state.propagate_taint(inst.value, &operand_vals);
|
|
}
|
|
_ => {
|
|
transfer_inst(
|
|
state,
|
|
inst,
|
|
cfg,
|
|
ssa,
|
|
summary_ctx,
|
|
heap_ctx,
|
|
interproc_ctx,
|
|
lang,
|
|
node_meta,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Transfer all instructions in a block with predecessor context.
|
|
///
|
|
/// Phis use predecessor-aware transfer; body instructions use standard
|
|
/// [`transfer_inst`]. See [`transfer_inst_with_predecessor`] for details.
|
|
pub fn transfer_block_with_predecessor(
|
|
state: &mut SymbolicState,
|
|
block: &SsaBlock,
|
|
cfg: &Cfg,
|
|
ssa: &SsaBody,
|
|
predecessor: Option<BlockId>,
|
|
summary_ctx: Option<&SymexSummaryCtx>,
|
|
heap_ctx: Option<&SymexHeapCtx>,
|
|
interproc_ctx: Option<&super::interproc::InterprocCtx>,
|
|
lang: Option<Lang>,
|
|
node_meta: Option<
|
|
&std::collections::HashMap<u32, crate::taint::ssa_transfer::CrossFileNodeMeta>,
|
|
>,
|
|
) {
|
|
for inst in &block.phis {
|
|
transfer_inst_with_predecessor(
|
|
state,
|
|
inst,
|
|
cfg,
|
|
ssa,
|
|
predecessor,
|
|
summary_ctx,
|
|
heap_ctx,
|
|
interproc_ctx,
|
|
lang,
|
|
node_meta,
|
|
);
|
|
}
|
|
for inst in &block.body {
|
|
transfer_inst(
|
|
state,
|
|
inst,
|
|
cfg,
|
|
ssa,
|
|
summary_ctx,
|
|
heap_ctx,
|
|
interproc_ctx,
|
|
lang,
|
|
node_meta,
|
|
);
|
|
}
|
|
}
|
|
|
|
/// Transfer all instructions in a block: phis first, then body.
|
|
pub fn transfer_block(
|
|
state: &mut SymbolicState,
|
|
block: &SsaBlock,
|
|
cfg: &Cfg,
|
|
ssa: &SsaBody,
|
|
summary_ctx: Option<&SymexSummaryCtx>,
|
|
heap_ctx: Option<&SymexHeapCtx>,
|
|
interproc_ctx: Option<&super::interproc::InterprocCtx>,
|
|
lang: Option<Lang>,
|
|
) {
|
|
for inst in &block.phis {
|
|
transfer_inst(
|
|
state,
|
|
inst,
|
|
cfg,
|
|
ssa,
|
|
summary_ctx,
|
|
heap_ctx,
|
|
interproc_ctx,
|
|
lang,
|
|
None,
|
|
);
|
|
}
|
|
for inst in &block.body {
|
|
transfer_inst(
|
|
state,
|
|
inst,
|
|
cfg,
|
|
ssa,
|
|
summary_ctx,
|
|
heap_ctx,
|
|
interproc_ctx,
|
|
lang,
|
|
None,
|
|
);
|
|
}
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// String method dispatch
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/// Attempt to model a callee as a recognized string operation.
|
|
///
|
|
/// Returns `Some(SymbolicCallResult)` if the callee is a known string method
|
|
/// with structurally-modelable arguments. Otherwise returns `None`.
|
|
fn try_string_method(
|
|
state: &SymbolicState,
|
|
callee: &str,
|
|
receiver: &Option<SsaValue>,
|
|
arg_syms: &[SymbolicValue],
|
|
all_operands: &[SsaValue],
|
|
lang: Option<Lang>,
|
|
) -> Option<SymbolicCallResult> {
|
|
let lang = lang?;
|
|
let info = classify_string_method(callee, arg_syms, lang)?;
|
|
|
|
// Get the string operand based on the operand source
|
|
let (string_sym, string_ssa) = match info.operand_source {
|
|
StringOperandSource::Receiver => {
|
|
let recv = (*receiver)?;
|
|
(state.get(recv), recv)
|
|
}
|
|
StringOperandSource::FirstArg => {
|
|
// For free functions, first arg is the string.
|
|
// If receiver was prepended to arg_syms, it's at index 0;
|
|
// otherwise first explicit arg is at index 0.
|
|
if let Some(recv) = receiver {
|
|
// Receiver was prepended, it IS the string operand
|
|
(state.get(*recv), *recv)
|
|
} else if let Some(&first_op) = all_operands.first() {
|
|
(
|
|
arg_syms.first().cloned().unwrap_or(SymbolicValue::Unknown),
|
|
first_op,
|
|
)
|
|
} else {
|
|
return None;
|
|
}
|
|
}
|
|
};
|
|
|
|
// Build the structured SymbolicValue via smart constructors
|
|
let value = match info.method {
|
|
super::strings::StringMethod::Trim => mk_trim(string_sym),
|
|
super::strings::StringMethod::ToLower => mk_to_lower(string_sym),
|
|
super::strings::StringMethod::ToUpper => mk_to_upper(string_sym),
|
|
super::strings::StringMethod::Replace {
|
|
pattern,
|
|
replacement,
|
|
} => mk_replace(string_sym, pattern, replacement),
|
|
super::strings::StringMethod::Substr => {
|
|
// Extract start and end indices from args
|
|
let arg_offset = match info.operand_source {
|
|
StringOperandSource::Receiver => 1, // args[0] = receiver, args[1] = start
|
|
StringOperandSource::FirstArg => {
|
|
if receiver.is_some() { 1 } else { 1 } // args[0] = string, args[1] = start
|
|
}
|
|
};
|
|
let start = arg_syms
|
|
.get(arg_offset)
|
|
.cloned()
|
|
.unwrap_or(SymbolicValue::Concrete(0));
|
|
let end = arg_syms.get(arg_offset + 1).cloned();
|
|
mk_substr(string_sym, start, end)
|
|
}
|
|
super::strings::StringMethod::StrLen => mk_strlen(string_sym),
|
|
};
|
|
|
|
// Taint: string operations preserve taint from the string operand
|
|
let tainted = state.is_tainted(string_ssa);
|
|
|
|
Some(SymbolicCallResult { value, tainted })
|
|
}
|
|
|
|
/// Recognize encoding/decoding transforms and build structured
|
|
/// `Encode`/`Decode` nodes instead of opaque `Call`.
|
|
///
|
|
/// Taint is always propagated from the operand, encoding preserves taint
|
|
/// unconditionally. This function does NOT sanitize.
|
|
fn try_transform_method(
|
|
state: &SymbolicState,
|
|
callee: &str,
|
|
receiver: &Option<SsaValue>,
|
|
arg_syms: &[SymbolicValue],
|
|
all_operands: &[SsaValue],
|
|
lang: Option<Lang>,
|
|
) -> Option<SymbolicCallResult> {
|
|
let lang = lang?;
|
|
let info = classify_transform_method(callee, lang)?;
|
|
|
|
// Extract the operand the same way as try_string_method
|
|
let (operand_sym, operand_ssa) = match info.operand_source {
|
|
StringOperandSource::Receiver => {
|
|
let recv = (*receiver)?;
|
|
(state.get(recv), recv)
|
|
}
|
|
StringOperandSource::FirstArg => {
|
|
if let Some(recv) = receiver {
|
|
(state.get(*recv), *recv)
|
|
} else if let Some(&first_op) = all_operands.first() {
|
|
(
|
|
arg_syms.first().cloned().unwrap_or(SymbolicValue::Unknown),
|
|
first_op,
|
|
)
|
|
} else {
|
|
return None;
|
|
}
|
|
}
|
|
};
|
|
|
|
// Build structured Encode or Decode node via smart constructors
|
|
let value = match info.kind {
|
|
TransformKind::Base64Decode | TransformKind::UrlDecode => mk_decode(info.kind, operand_sym),
|
|
_ => mk_encode(info.kind, operand_sym),
|
|
};
|
|
|
|
// Encoding preserves taint unconditionally
|
|
let tainted = state.is_tainted(operand_ssa);
|
|
|
|
Some(SymbolicCallResult { value, tainted })
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Cross-file symbolic summary resolution
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
/// Model a callee's return value from its SSA summary.
|
|
///
|
|
/// Shared by both type-qualified and bare-name resolution paths.
|
|
///
|
|
/// Resolution rules:
|
|
/// - **Exactly one `Identity`**: pass through that argument's symbolic value
|
|
/// - **Multiple `Identity` entries**: ambiguous → fall back (do NOT pick arbitrarily)
|
|
/// - **`StripBits`**: sanitizer → `Unknown`, not tainted
|
|
/// - **`AddBits` or `source_caps != empty`**: source → fresh tainted Symbol
|
|
/// - **`NotFound` / `Ambiguous`**: hard fallback to mk_call
|
|
fn model_from_summary(
|
|
summary: &crate::summary::ssa_summary::SsaFuncSummary,
|
|
arg_syms: &[SymbolicValue],
|
|
all_operands: &[SsaValue],
|
|
state: &SymbolicState,
|
|
result_value: SsaValue,
|
|
) -> Option<SymbolicCallResult> {
|
|
// Check for source-producing function
|
|
if !summary.source_caps.is_empty() {
|
|
return Some(SymbolicCallResult {
|
|
value: SymbolicValue::Symbol(result_value),
|
|
tainted: true,
|
|
});
|
|
}
|
|
|
|
// Inspect param_to_return transforms
|
|
if summary.param_to_return.is_empty() {
|
|
return None;
|
|
}
|
|
|
|
// Collect identity mappings
|
|
let identities: Vec<_> = summary
|
|
.param_to_return
|
|
.iter()
|
|
.filter(|(_, t)| matches!(t, TaintTransform::Identity))
|
|
.collect();
|
|
|
|
// Check for StripBits (sanitizer)
|
|
let has_strip = summary
|
|
.param_to_return
|
|
.iter()
|
|
.any(|(_, t)| matches!(t, TaintTransform::StripBits(_)));
|
|
|
|
// Check for AddBits (source introduction)
|
|
let has_add = summary
|
|
.param_to_return
|
|
.iter()
|
|
.any(|(_, t)| matches!(t, TaintTransform::AddBits(_)));
|
|
|
|
if has_add {
|
|
return Some(SymbolicCallResult {
|
|
value: SymbolicValue::Symbol(result_value),
|
|
tainted: true,
|
|
});
|
|
}
|
|
|
|
if has_strip && identities.is_empty() {
|
|
return Some(SymbolicCallResult {
|
|
value: SymbolicValue::Unknown,
|
|
tainted: false,
|
|
});
|
|
}
|
|
|
|
if identities.len() == 1 {
|
|
let (param_idx, _) = identities[0];
|
|
if let Some(sym) = arg_syms.get(*param_idx) {
|
|
let is_tainted = all_operands
|
|
.get(*param_idx)
|
|
.map(|v| state.is_tainted(*v))
|
|
.unwrap_or(false);
|
|
return Some(SymbolicCallResult {
|
|
value: sym.clone(),
|
|
tainted: is_tainted,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Multiple Identity entries or other ambiguous cases: fall back
|
|
None
|
|
}
|
|
|
|
/// Attempt to resolve a callee's return value symbolically using its
|
|
/// `SsaFuncSummary` from `GlobalSummaries`.
|
|
///
|
|
/// Returns `Some(SymbolicCallResult)` if the summary provides actionable
|
|
/// modeling. Returns `None` to fall through to the opaque `mk_call` path.
|
|
///
|
|
/// When a receiver has a known type via type facts, tries type-qualified
|
|
/// callee name (e.g., `"HttpClient.send"`) before bare-name resolution. This
|
|
/// improves summary-based modeling only, not general virtual dispatch.
|
|
fn resolve_callee_symbolically(
|
|
ctx: &SymexSummaryCtx,
|
|
callee: &str,
|
|
arg_syms: &[SymbolicValue],
|
|
all_operands: &[SsaValue],
|
|
state: &SymbolicState,
|
|
result_value: SsaValue,
|
|
receiver: Option<SsaValue>,
|
|
) -> Option<SymbolicCallResult> {
|
|
// Type-qualified symbolic resolution when receiver has a known type.
|
|
// Improves summary-based modeling only, not general virtual dispatch.
|
|
// Precedence: exact qualified > type-aided disambiguation > bare-name fallback.
|
|
if let (Some(tf), Some(recv)) = (ctx.type_facts, receiver)
|
|
&& let Some(receiver_type) = tf.get_type(recv)
|
|
&& let Some(prefix) = receiver_type.label_prefix()
|
|
{
|
|
let method = crate::callgraph::callee_leaf_name(callee);
|
|
let qualified = format!("{}.{}", prefix, method);
|
|
|
|
// Attempt 1: Exact lookup under type-qualified name.
|
|
// Arity=None to avoid receiver-in-operands vs formal-param mismatch.
|
|
let resolution =
|
|
ctx.global_summaries
|
|
.resolve_callee_key(&qualified, ctx.lang, ctx.namespace, None);
|
|
if let CalleeResolution::Resolved(key) = resolution
|
|
&& let Some(summary) = ctx.global_summaries.get_ssa(&key)
|
|
{
|
|
return model_from_summary(summary, arg_syms, all_operands, state, result_value);
|
|
}
|
|
|
|
// Attempt 2: Disambiguate among ambiguous bare-name candidates.
|
|
// Only select when a candidate's FuncKey.name EXACTLY equals the
|
|
// qualified name, no substring matching, never guess.
|
|
let bare_resolution =
|
|
ctx.global_summaries
|
|
.resolve_callee_key(method, ctx.lang, ctx.namespace, None);
|
|
if let CalleeResolution::Ambiguous(candidates) = bare_resolution {
|
|
let exact_match: Vec<_> = candidates.iter().filter(|k| k.name == qualified).collect();
|
|
if exact_match.len() == 1
|
|
&& let Some(summary) = ctx.global_summaries.get_ssa(exact_match[0])
|
|
{
|
|
return model_from_summary(summary, arg_syms, all_operands, state, result_value);
|
|
}
|
|
// >1 or 0 exact matches: do NOT guess, fall through
|
|
}
|
|
// Fall through to existing bare-name resolution
|
|
}
|
|
|
|
// Existing bare-name resolution path
|
|
let normalized = crate::callgraph::callee_leaf_name(callee);
|
|
let resolution = ctx.global_summaries.resolve_callee_key(
|
|
normalized,
|
|
ctx.lang,
|
|
ctx.namespace,
|
|
Some(all_operands.len()),
|
|
);
|
|
|
|
let key = match resolution {
|
|
CalleeResolution::Resolved(k) => k,
|
|
CalleeResolution::NotFound | CalleeResolution::Ambiguous(_) => return None,
|
|
};
|
|
|
|
let summary = ctx.global_summaries.get_ssa(&key)?;
|
|
model_from_summary(summary, arg_syms, all_operands, state, result_value)
|
|
}
|
|
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
// Tests
|
|
// ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::cfg::{BinOp, Cfg, NodeInfo, StmtKind};
|
|
use crate::ssa::ir::{BlockId, SsaBlock, SsaInst, SsaValue, Terminator};
|
|
use petgraph::graph::NodeIndex;
|
|
use smallvec::smallvec;
|
|
|
|
/// Create a minimal Cfg with a single node that has the given bin_op.
|
|
fn cfg_with_node(bin_op: Option<BinOp>) -> (Cfg, NodeIndex) {
|
|
let mut cfg = Cfg::new();
|
|
let info = NodeInfo {
|
|
kind: StmtKind::Seq,
|
|
bin_op,
|
|
..Default::default()
|
|
};
|
|
let idx = cfg.add_node(info);
|
|
(cfg, idx)
|
|
}
|
|
|
|
fn make_inst(value: u32, op: SsaOp, cfg_node: NodeIndex) -> SsaInst {
|
|
SsaInst {
|
|
value: SsaValue(value),
|
|
op,
|
|
cfg_node,
|
|
var_name: None,
|
|
span: (0, 0),
|
|
}
|
|
}
|
|
|
|
fn empty_ssa() -> SsaBody {
|
|
SsaBody {
|
|
blocks: vec![],
|
|
entry: BlockId(0),
|
|
value_defs: vec![],
|
|
cfg_node_map: std::collections::HashMap::new(),
|
|
exception_edges: vec![],
|
|
field_interner: crate::ssa::ir::FieldInterner::default(),
|
|
field_writes: std::collections::HashMap::new(),
|
|
|
|
synthetic_externals: std::collections::HashSet::new(),
|
|
slot_scoped_assigns: std::collections::HashSet::new(),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_const_int() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Const(Some("42".into())), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Concrete(42));
|
|
assert!(!state.is_tainted(SsaValue(0)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_const_string() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Const(Some("\"hello\"".into())), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(
|
|
state.get(SsaValue(0)),
|
|
SymbolicValue::ConcreteStr("hello".into())
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_const_bool_fallback() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Const(Some("true".into())), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Unknown);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_const_none() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Const(None), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Unknown);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_source_tainted() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Source, node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Symbol(SsaValue(0)));
|
|
assert!(state.is_tainted(SsaValue(0)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_param_not_tainted() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Param { index: 0 }, node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Symbol(SsaValue(0)));
|
|
assert!(!state.is_tainted(SsaValue(0)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_assign_copy() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// Set up source value
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(7));
|
|
state.mark_tainted(SsaValue(0));
|
|
|
|
let inst = make_inst(1, SsaOp::Assign(smallvec![SsaValue(0)]), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(1)), SymbolicValue::Concrete(7));
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_assign_binop() {
|
|
let (cfg, node) = cfg_with_node(Some(BinOp::Mul));
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(2));
|
|
|
|
let inst = make_inst(2, SsaOp::Assign(smallvec![SsaValue(0), SsaValue(1)]), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
let expected = SymbolicValue::BinOp(
|
|
Op::Mul,
|
|
Box::new(SymbolicValue::Symbol(SsaValue(0))),
|
|
Box::new(SymbolicValue::Concrete(2)),
|
|
);
|
|
assert_eq!(state.get(SsaValue(2)), expected);
|
|
assert!(state.is_tainted(SsaValue(2)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_assign_no_binop_is_unknown() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(2));
|
|
|
|
let inst = make_inst(2, SsaOp::Assign(smallvec![SsaValue(0), SsaValue(1)]), node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
assert_eq!(state.get(SsaValue(2)), SymbolicValue::Unknown);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "parseInt".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
let expected =
|
|
SymbolicValue::Call("parseInt".into(), vec![SymbolicValue::Symbol(SsaValue(0))]);
|
|
assert_eq!(state.get(SsaValue(1)), expected);
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_with_receiver() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0))); // receiver
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(42)); // arg
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Call {
|
|
callee: "send".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(1)]],
|
|
receiver: Some(SsaValue(0)),
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
let expected = SymbolicValue::Call(
|
|
"send".into(),
|
|
vec![
|
|
SymbolicValue::Symbol(SsaValue(0)),
|
|
SymbolicValue::Concrete(42),
|
|
],
|
|
);
|
|
assert_eq!(state.get(SsaValue(2)), expected);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_phi() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(1));
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
state.mark_tainted(SsaValue(1));
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Phi(smallvec![
|
|
(BlockId(0), SsaValue(0)),
|
|
(BlockId(1), SsaValue(1))
|
|
]),
|
|
node,
|
|
);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
let expected = SymbolicValue::Phi(vec![
|
|
(BlockId(0), SymbolicValue::Concrete(1)),
|
|
(BlockId(1), SymbolicValue::Symbol(SsaValue(1))),
|
|
]);
|
|
assert_eq!(state.get(SsaValue(2)), expected);
|
|
assert!(state.is_tainted(SsaValue(2)));
|
|
}
|
|
|
|
#[test]
|
|
fn taint_propagation_chain() {
|
|
// Build a cfg with two nodes: one plain (for source/copy/const), one with Mul
|
|
let mut cfg = Cfg::new();
|
|
let node_plain = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Seq,
|
|
..Default::default()
|
|
});
|
|
let node_mul = cfg.add_node(NodeInfo {
|
|
kind: StmtKind::Seq,
|
|
bin_op: Some(BinOp::Mul),
|
|
..Default::default()
|
|
});
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// v0: source (tainted)
|
|
let i0 = make_inst(0, SsaOp::Source, node_plain);
|
|
transfer_inst(&mut state, &i0, &cfg, &ssa, None, None, None, None, None);
|
|
assert!(state.is_tainted(SsaValue(0)));
|
|
|
|
// v1: copy of v0
|
|
let i1 = make_inst(1, SsaOp::Assign(smallvec![SsaValue(0)]), node_plain);
|
|
transfer_inst(&mut state, &i1, &cfg, &ssa, None, None, None, None, None);
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
|
|
// v2: constant (not tainted)
|
|
let i2 = make_inst(2, SsaOp::Const(Some("3".into())), node_plain);
|
|
transfer_inst(&mut state, &i2, &cfg, &ssa, None, None, None, None, None);
|
|
assert!(!state.is_tainted(SsaValue(2)));
|
|
|
|
// v3: v1 * v2 (tainted because v1 is tainted)
|
|
let i3 = make_inst(
|
|
3,
|
|
SsaOp::Assign(smallvec![SsaValue(1), SsaValue(2)]),
|
|
node_mul,
|
|
);
|
|
transfer_inst(&mut state, &i3, &cfg, &ssa, None, None, None, None, None);
|
|
assert!(state.is_tainted(SsaValue(3)));
|
|
let expected = SymbolicValue::BinOp(
|
|
Op::Mul,
|
|
Box::new(SymbolicValue::Symbol(SsaValue(0))), // v1 was a copy of v0 (Symbol)
|
|
Box::new(SymbolicValue::Concrete(3)),
|
|
);
|
|
assert_eq!(state.get(SsaValue(3)), expected);
|
|
|
|
// v4: call using v3 (still tainted)
|
|
let i4 = make_inst(
|
|
4,
|
|
SsaOp::Call {
|
|
callee: "toString".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(3)]],
|
|
receiver: None,
|
|
},
|
|
node_plain,
|
|
);
|
|
transfer_inst(&mut state, &i4, &cfg, &ssa, None, None, None, None, None);
|
|
assert!(state.is_tainted(SsaValue(4)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_nop_skipped() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(99));
|
|
let inst = make_inst(0, SsaOp::Nop, node);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
// Nop does not overwrite existing value
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Concrete(99));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_block_processes_phis_then_body() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// Set up predecessor values for phi
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(1));
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(1));
|
|
|
|
let block = SsaBlock {
|
|
id: BlockId(0),
|
|
phis: vec![make_inst(
|
|
2,
|
|
SsaOp::Phi(smallvec![
|
|
(BlockId(0), SsaValue(0)),
|
|
(BlockId(1), SsaValue(1))
|
|
]),
|
|
node,
|
|
)],
|
|
body: vec![make_inst(3, SsaOp::Const(Some("10".into())), node)],
|
|
terminator: Terminator::Return(None),
|
|
preds: smallvec![],
|
|
succs: smallvec![],
|
|
};
|
|
|
|
transfer_block(&mut state, &block, &cfg, &ssa, None, None, None, None);
|
|
|
|
// Phi with all-same should fold to Concrete(1)
|
|
assert_eq!(state.get(SsaValue(2)), SymbolicValue::Concrete(1));
|
|
// Body const should be set
|
|
assert_eq!(state.get(SsaValue(3)), SymbolicValue::Concrete(10));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_phi_with_predecessor_resolves_to_operand() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// Set up different values for each predecessor
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(10));
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(20));
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Phi(smallvec![
|
|
(BlockId(0), SsaValue(0)),
|
|
(BlockId(1), SsaValue(1))
|
|
]),
|
|
node,
|
|
);
|
|
|
|
// With predecessor B1, should resolve to SsaValue(1) → Concrete(20)
|
|
transfer_inst_with_predecessor(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(BlockId(1)),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
assert_eq!(state.get(SsaValue(2)), SymbolicValue::Concrete(20));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_phi_with_predecessor_taint_from_selected_only() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// B0's operand is NOT tainted, B1's operand IS tainted
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(10));
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
state.mark_tainted(SsaValue(1));
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Phi(smallvec![
|
|
(BlockId(0), SsaValue(0)),
|
|
(BlockId(1), SsaValue(1))
|
|
]),
|
|
node,
|
|
);
|
|
|
|
// With predecessor B0 (untainted), result should NOT be tainted
|
|
transfer_inst_with_predecessor(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(BlockId(0)),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
assert!(!state.is_tainted(SsaValue(2)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_phi_with_predecessor_taint_from_tainted_pred() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(10));
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
state.mark_tainted(SsaValue(1));
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Phi(smallvec![
|
|
(BlockId(0), SsaValue(0)),
|
|
(BlockId(1), SsaValue(1))
|
|
]),
|
|
node,
|
|
);
|
|
|
|
// With predecessor B1 (tainted), result SHOULD be tainted
|
|
transfer_inst_with_predecessor(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(BlockId(1)),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
assert!(state.is_tainted(SsaValue(2)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_phi_without_predecessor_builds_phi_expr() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Concrete(10));
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(20));
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Phi(smallvec![
|
|
(BlockId(0), SsaValue(0)),
|
|
(BlockId(1), SsaValue(1))
|
|
]),
|
|
node,
|
|
);
|
|
|
|
// Without predecessor (None), falls back to Phi(...) expression
|
|
transfer_inst_with_predecessor(
|
|
&mut state, &inst, &cfg, &ssa, None, None, None, None, None, None,
|
|
);
|
|
let expected = SymbolicValue::Phi(vec![
|
|
(BlockId(0), SymbolicValue::Concrete(10)),
|
|
(BlockId(1), SymbolicValue::Concrete(20)),
|
|
]);
|
|
assert_eq!(state.get(SsaValue(2)), expected);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_non_phi_ignores_predecessor() {
|
|
// Non-phi instructions should behave identically regardless of predecessor
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let inst = make_inst(0, SsaOp::Const(Some("42".into())), node);
|
|
transfer_inst_with_predecessor(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(BlockId(5)),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Concrete(42));
|
|
}
|
|
|
|
// ─── Cross-file summary resolution tests ─────────────────────────
|
|
|
|
use crate::labels::Cap;
|
|
use crate::ssa::type_facts::TypeKind;
|
|
use crate::summary::FuncSummary;
|
|
use crate::summary::GlobalSummaries;
|
|
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
|
use crate::symbol::{FuncKey, Lang};
|
|
|
|
fn make_summary_ctx(gs: &GlobalSummaries) -> SymexSummaryCtx<'_> {
|
|
SymexSummaryCtx {
|
|
global_summaries: gs,
|
|
lang: Lang::JavaScript,
|
|
namespace: "test.js",
|
|
type_facts: None,
|
|
}
|
|
}
|
|
|
|
fn make_func_key(name: &str, arity: usize) -> FuncKey {
|
|
FuncKey {
|
|
lang: Lang::JavaScript,
|
|
namespace: "helper.js".into(),
|
|
name: name.into(),
|
|
arity: Some(arity),
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
/// Insert both a regular FuncSummary (for resolve_callee_key lookup) and
|
|
/// an SsaFuncSummary (for the actual symbolic modeling).
|
|
fn insert_summary(gs: &mut GlobalSummaries, name: &str, arity: usize, ssa: SsaFuncSummary) {
|
|
let key = make_func_key(name, arity);
|
|
// Regular summary needed for by_lang_name index used by resolve_callee_key
|
|
gs.insert(
|
|
key.clone(),
|
|
FuncSummary {
|
|
name: name.into(),
|
|
file_path: "helper.js".into(),
|
|
lang: "javascript".into(),
|
|
param_count: arity,
|
|
param_names: vec![],
|
|
source_caps: 0,
|
|
sanitizer_caps: 0,
|
|
sink_caps: 0,
|
|
propagating_params: vec![],
|
|
propagates_taint: false,
|
|
tainted_sink_params: vec![],
|
|
callees: vec![],
|
|
..Default::default()
|
|
},
|
|
);
|
|
gs.insert_ssa(key, ssa);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_identity_summary() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// Arg v0 is tainted
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
|
|
// Build GlobalSummaries with exactly one Identity(param 0)
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_summary(
|
|
&mut gs,
|
|
"passthrough",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::Identity)],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
let ctx = make_summary_ctx(&gs);
|
|
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "passthrough".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
|
|
// Should pass through arg's symbolic value
|
|
assert_eq!(state.get(SsaValue(1)), SymbolicValue::Symbol(SsaValue(0)));
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_multiple_identity_fallback() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
state.set(SsaValue(1), SymbolicValue::Concrete(42));
|
|
|
|
// Two Identity entries, should fall back to mk_call, NOT pick one
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_summary(
|
|
&mut gs,
|
|
"ambig",
|
|
2,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::Identity), (1, TaintTransform::Identity)],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
let ctx = make_summary_ctx(&gs);
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Call {
|
|
callee: "ambig".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)], smallvec![SsaValue(1)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
|
|
// Should fall back to Call expression, not Symbol pass-through
|
|
match state.get(SsaValue(2)) {
|
|
SymbolicValue::Call(name, _) => assert_eq!(name, "ambig"),
|
|
other => panic!("expected Call fallback, got {:?}", other),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_stripbits_summary() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_summary(
|
|
&mut gs,
|
|
"sanitize",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::StripBits(Cap::SQL_QUERY))],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
let ctx = make_summary_ctx(&gs);
|
|
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "sanitize".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
|
|
// StripBits → Unknown, not tainted
|
|
assert_eq!(state.get(SsaValue(1)), SymbolicValue::Unknown);
|
|
assert!(!state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_addbits_summary() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_summary(
|
|
&mut gs,
|
|
"enrich",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::AddBits(Cap::ENV_VAR))],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
let ctx = make_summary_ctx(&gs);
|
|
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "enrich".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
|
|
// AddBits → fresh Symbol, tainted
|
|
assert_eq!(state.get(SsaValue(1)), SymbolicValue::Symbol(SsaValue(1)));
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_source_summary() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_summary(
|
|
&mut gs,
|
|
"readEnv",
|
|
0,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::ENV_VAR,
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
let ctx = make_summary_ctx(&gs);
|
|
|
|
let inst = make_inst(
|
|
0,
|
|
SsaOp::Call {
|
|
callee: "readEnv".into(),
|
|
callee_text: None,
|
|
args: vec![],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
|
|
// source_caps non-empty → tainted Symbol
|
|
assert_eq!(state.get(SsaValue(0)), SymbolicValue::Symbol(SsaValue(0)));
|
|
assert!(state.is_tainted(SsaValue(0)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_no_summary_fallback() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
|
|
// Empty GlobalSummaries → NotFound → mk_call fallback
|
|
let gs = GlobalSummaries::new();
|
|
let ctx = make_summary_ctx(&gs);
|
|
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "unknown_func".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
None,
|
|
None,
|
|
);
|
|
|
|
match state.get(SsaValue(1)) {
|
|
SymbolicValue::Call(name, _) => assert_eq!(name, "unknown_func"),
|
|
other => panic!("expected Call fallback, got {:?}", other),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_none_summary_ctx_fallback() {
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
|
|
// No summary ctx at all → mk_call
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "foo".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(&mut state, &inst, &cfg, &ssa, None, None, None, None, None);
|
|
|
|
match state.get(SsaValue(1)) {
|
|
SymbolicValue::Call(name, _) => assert_eq!(name, "foo"),
|
|
other => panic!("expected Call fallback, got {:?}", other),
|
|
}
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
// ─── Type-qualified symbolic resolution tests ──────────
|
|
|
|
use crate::ssa::type_facts::{TypeFact, TypeFactResult};
|
|
use std::collections::HashMap;
|
|
|
|
fn make_type_facts(entries: Vec<(SsaValue, TypeKind)>) -> TypeFactResult {
|
|
let facts = entries
|
|
.into_iter()
|
|
.map(|(v, kind)| {
|
|
(
|
|
v,
|
|
TypeFact {
|
|
kind,
|
|
nullable: false,
|
|
},
|
|
)
|
|
})
|
|
.collect::<HashMap<_, _>>();
|
|
TypeFactResult { facts }
|
|
}
|
|
|
|
fn insert_java_summary(
|
|
gs: &mut GlobalSummaries,
|
|
name: &str,
|
|
namespace: &str,
|
|
arity: usize,
|
|
ssa: SsaFuncSummary,
|
|
) {
|
|
let key = FuncKey {
|
|
lang: Lang::Java,
|
|
namespace: namespace.into(),
|
|
name: name.into(),
|
|
arity: Some(arity),
|
|
..Default::default()
|
|
};
|
|
gs.insert(
|
|
key.clone(),
|
|
FuncSummary {
|
|
name: name.into(),
|
|
file_path: namespace.into(),
|
|
lang: "java".into(),
|
|
param_count: arity,
|
|
param_names: vec![],
|
|
source_caps: 0,
|
|
sanitizer_caps: 0,
|
|
sink_caps: 0,
|
|
propagating_params: vec![],
|
|
propagates_taint: false,
|
|
tainted_sink_params: vec![],
|
|
callees: vec![],
|
|
..Default::default()
|
|
},
|
|
);
|
|
gs.insert_ssa(key, ssa);
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_type_qualified_resolution() {
|
|
// Receiver v1 typed as HttpClient, callee "send" → qualified "HttpClient.send"
|
|
// Summary registered under "HttpClient.send" should be found.
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
// v0 = tainted URL argument
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
// v1 = receiver (HttpClient instance)
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"HttpClient.send",
|
|
"HttpClient.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::Identity)],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
|
|
let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]);
|
|
let ctx = SymexSummaryCtx {
|
|
global_summaries: &gs,
|
|
lang: Lang::Java,
|
|
namespace: "Caller.java",
|
|
type_facts: Some(&tf),
|
|
};
|
|
|
|
// v2 = v1.send(v0)
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Call {
|
|
callee: "send".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: Some(SsaValue(1)),
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
Some(Lang::Java),
|
|
None,
|
|
);
|
|
|
|
// Identity(0) maps to arg_syms[0] which is the receiver (prepended).
|
|
// So return value should be the receiver's symbolic value.
|
|
assert_eq!(state.get(SsaValue(2)), SymbolicValue::Symbol(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_type_qualified_fallback_no_type() {
|
|
// Receiver has no known type → type-qualified resolution does not fire,
|
|
// bare-name resolution works normally.
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
|
|
// Register summary under bare name "passthrough" (Java, arity 1)
|
|
let mut gs = GlobalSummaries::new();
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"passthrough",
|
|
"helper.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::Identity)],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
|
|
// Empty type facts, no receiver type info
|
|
let tf = make_type_facts(vec![]);
|
|
let ctx = SymexSummaryCtx {
|
|
global_summaries: &gs,
|
|
lang: Lang::Java,
|
|
namespace: "test.java",
|
|
type_facts: Some(&tf),
|
|
};
|
|
|
|
let inst = make_inst(
|
|
1,
|
|
SsaOp::Call {
|
|
callee: "passthrough".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: None,
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
Some(Lang::Java),
|
|
None,
|
|
);
|
|
|
|
// Bare-name resolution: Identity(0) → pass through arg
|
|
assert_eq!(state.get(SsaValue(1)), SymbolicValue::Symbol(SsaValue(0)));
|
|
assert!(state.is_tainted(SsaValue(1)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_type_qualified_disambiguation() {
|
|
// Two summaries both named "send" in different namespaces.
|
|
// One named "HttpClient.send", type disambiguation picks it.
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.mark_tainted(SsaValue(0));
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
// First "send", generic, in ns A (Identity: passes through)
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"send",
|
|
"SocketClient.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::Identity)],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
// Second "send", in ns B, also with same arity → ambiguous bare-name
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"send",
|
|
"WebSocketClient.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
// Also register the type-qualified name so Attempt 1 can find it
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"HttpClient.send",
|
|
"HttpClient.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::ENV_VAR, // Source, distinct signal
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
|
|
let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]);
|
|
let ctx = SymexSummaryCtx {
|
|
global_summaries: &gs,
|
|
lang: Lang::Java,
|
|
namespace: "Caller.java",
|
|
type_facts: Some(&tf),
|
|
};
|
|
|
|
// v2 = v1.send(v0), receiver v1 is HttpClient
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Call {
|
|
callee: "send".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: Some(SsaValue(1)),
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
Some(Lang::Java),
|
|
None,
|
|
);
|
|
|
|
// Should resolve to "HttpClient.send" summary (source_caps=ENV_VAR → tainted Symbol)
|
|
assert_eq!(state.get(SsaValue(2)), SymbolicValue::Symbol(SsaValue(2)));
|
|
assert!(state.is_tainted(SsaValue(2)));
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_type_qualified_wrong_owner() {
|
|
// Receiver is HttpClient, but summary is registered as "DatabaseConnection.send".
|
|
// Must NOT resolve to the wrong summary.
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
// Summary under "DatabaseConnection.send", wrong type
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"DatabaseConnection.send",
|
|
"DatabaseConnection.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::ENV_VAR,
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
|
|
// Receiver typed as HttpClient, constructs "HttpClient.send", not "DatabaseConnection.send"
|
|
let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]);
|
|
let ctx = SymexSummaryCtx {
|
|
global_summaries: &gs,
|
|
lang: Lang::Java,
|
|
namespace: "Caller.java",
|
|
type_facts: Some(&tf),
|
|
};
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Call {
|
|
callee: "send".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: Some(SsaValue(1)),
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
Some(Lang::Java),
|
|
None,
|
|
);
|
|
|
|
// "HttpClient.send" not found, bare "send" not found → opaque mk_call fallback
|
|
match state.get(SsaValue(2)) {
|
|
SymbolicValue::Call(name, _) => assert_eq!(name, "send"),
|
|
other => panic!("expected Call fallback, got {:?}", other),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn transfer_call_type_qualified_ambiguous_no_force() {
|
|
// Ambiguous bare-name candidates, receiver type known, but no candidate's
|
|
// name exactly matches the qualified name → must NOT force-pick.
|
|
let (cfg, node) = cfg_with_node(None);
|
|
let ssa = empty_ssa();
|
|
let mut state = SymbolicState::new();
|
|
|
|
state.set(SsaValue(0), SymbolicValue::Symbol(SsaValue(0)));
|
|
state.set(SsaValue(1), SymbolicValue::Symbol(SsaValue(1)));
|
|
|
|
let mut gs = GlobalSummaries::new();
|
|
// Two "send" summaries, different namespaces → ambiguous
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"send",
|
|
"ModuleA.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::Identity)],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
insert_java_summary(
|
|
&mut gs,
|
|
"send",
|
|
"ModuleB.java",
|
|
1,
|
|
SsaFuncSummary {
|
|
param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))],
|
|
param_to_sink: vec![],
|
|
source_caps: Cap::empty(),
|
|
param_to_sink_param: vec![],
|
|
param_container_to_return: vec![],
|
|
param_to_container_store: vec![],
|
|
return_type: None,
|
|
return_abstract: None,
|
|
source_to_callback: vec![],
|
|
|
|
receiver_to_return: None,
|
|
|
|
receiver_to_sink: Cap::empty(),
|
|
|
|
abstract_transfer: vec![],
|
|
param_return_paths: vec![],
|
|
points_to: Default::default(),
|
|
field_points_to: Default::default(),
|
|
return_path_facts: smallvec::SmallVec::new(),
|
|
typed_call_receivers: vec![],
|
|
validated_params_to_return: smallvec::SmallVec::new(),
|
|
param_to_gate_filters: vec![],
|
|
entry_kind: None,
|
|
},
|
|
);
|
|
// No "HttpClient.send" summary registered, disambiguation has 0 exact matches
|
|
|
|
let tf = make_type_facts(vec![(SsaValue(1), TypeKind::HttpClient)]);
|
|
let ctx = SymexSummaryCtx {
|
|
global_summaries: &gs,
|
|
lang: Lang::Java,
|
|
namespace: "Caller.java",
|
|
type_facts: Some(&tf),
|
|
};
|
|
|
|
let inst = make_inst(
|
|
2,
|
|
SsaOp::Call {
|
|
callee: "send".into(),
|
|
callee_text: None,
|
|
args: vec![smallvec![SsaValue(0)]],
|
|
receiver: Some(SsaValue(1)),
|
|
},
|
|
node,
|
|
);
|
|
transfer_inst(
|
|
&mut state,
|
|
&inst,
|
|
&cfg,
|
|
&ssa,
|
|
Some(&ctx),
|
|
None,
|
|
None,
|
|
Some(Lang::Java),
|
|
None,
|
|
);
|
|
|
|
// Neither qualified lookup nor disambiguation found a match.
|
|
// Bare-name path returns Ambiguous → falls through to mk_call.
|
|
match state.get(SsaValue(2)) {
|
|
SymbolicValue::Call(name, _) => assert_eq!(name, "send"),
|
|
other => panic!("expected Call fallback for ambiguous case, got {:?}", other),
|
|
}
|
|
}
|
|
}
|