#![allow( clippy::collapsible_if, clippy::if_same_then_else, clippy::manual_flatten, clippy::needless_range_loop, clippy::only_used_in_recursion, clippy::single_match, clippy::too_many_arguments, clippy::unnecessary_map_or )] mod events; mod inline; mod state; mod summary_extract; #[cfg(test)] mod tests; pub use events::{SsaTaintEvent, ssa_events_to_findings}; pub(crate) use inline::{ArgTaintSig, InlineCache}; use inline::{CachedInlineShape, InlineResult, MAX_INLINE_BLOCKS, ReturnShape}; pub use inline::{CalleeSsaBody, CrossFileNodeMeta, populate_node_meta, rebuild_body_graph}; #[allow(unused_imports)] // retained for future shared-cache refactor / tests pub(crate) use inline::{inline_cache_clear_epoch, inline_cache_fingerprint}; pub use state::{ BindingKey, SsaTaintState, max_worklist_iterations, origins_truncation_count, reset_all_validated_spans, reset_origins_observability, reset_path_safe_suppressed_spans, reset_worklist_observability, seed_lookup, set_max_origins_override, set_worklist_cap_override, take_all_validated_spans, take_path_safe_suppressed_spans, worklist_cap_hit_count, }; use state::{ MAX_WORKLIST_ITERATIONS, ORIGINS_TRUNCATION_COUNT, WORKLIST_CAP_HITS, effective_max_origins, effective_worklist_cap, }; pub(crate) use state::{ push_origin_bounded, record_engine_note, reset_body_engine_notes, take_body_engine_notes, }; pub use summary_extract::{extract_ssa_func_summary, extract_ssa_func_summary_full}; use crate::abstract_interp::AbstractState; use crate::callgraph::{callee_container_hint, callee_leaf_name}; use crate::cfg::{BodyId, Cfg, FuncSummaries, NodeInfo}; use crate::constraint; use crate::interop::InteropEdge; use crate::labels::{Cap, DataLabel, RuntimeLabelRule, SourceKind}; use crate::ssa::heap::{HeapObjectId, HeapSlot, PointsToResult, PointsToSet}; use crate::ssa::ir::*; use crate::ssa::type_facts::InputValidatorPolarity; use crate::state::lattice::Lattice; use crate::state::symbol::SymbolInterner; use crate::summary::{CalleeQuery, CalleeResolution, GlobalSummaries, SinkSite}; use crate::symbol::{FuncKey, Lang}; use crate::taint::domain::{PredicateSummary, TaintOrigin, VarTaint, predicate_kind_bit}; use crate::taint::path_state::{PredicateKind, classify_condition_with_target}; use petgraph::graph::NodeIndex; use smallvec::SmallVec; use std::cell::RefCell; use std::collections::{HashMap, HashSet, VecDeque}; // ── SSA Taint Transfer ────────────────────────────────────────────────── /// Configuration for SSA taint analysis. pub struct SsaTaintTransfer<'a> { pub lang: Lang, pub namespace: &'a str, pub interner: &'a SymbolInterner, pub local_summaries: &'a FuncSummaries, pub global_summaries: Option<&'a GlobalSummaries>, pub interop_edges: &'a [InteropEdge], /// The [`BodyId`] of the body currently being analysed. Used as the /// owning scope when writing seed entries that leave this body /// (e.g. [`extract_ssa_exit_state`]) and as the identity recorded on /// engine notes. Defaults to `BodyId(0)` (top-level) for inline /// probes and unit tests that analyse a single synthetic body. pub owner_body_id: BodyId, /// The [`BodyId`] of this body's lexical parent, if any. Drives the /// `Param`-op reader's lookup into [`Self::global_seed`]: we read /// from the parent's scope first (the seed entries produced by /// [`extract_ssa_exit_state`] on the parent body), then fall back to /// `BodyId(0)` to pick up JS/TS two-level re-keyed entries (see /// [`filter_seed_to_toplevel`]). `None` for the top-level body and /// for probes with no surrounding scope. pub parent_body_id: Option, /// Taint from enclosing/parent body scope, keyed by [`BindingKey`]. /// Read-only fallback for `Param` ops representing captured or /// module-scope variables. Used in multi-body analysis for lexical /// containment propagation (top-level → function → closure). pub global_seed: Option<&'a HashMap>, /// Per-call-site parameter seed for context-sensitive inline /// analysis. Indexed by callee's formal [`SsaOp::Param`] index: a /// `Some(taint)` at index `i` seeds the callee's formal param `i` /// with the caller's argument taint. Out-of-range indices (e.g. /// synthetic capture params emitted by scoped lowering) fall back /// to [`Self::global_seed`]. pub param_seed: Option<&'a [Option]>, /// Per-call-site receiver seed for context-sensitive inline /// analysis. Mirrors [`Self::param_seed`] for [`SsaOp::SelfParam`] /// reads, seeds the callee's implicit `this` / `self` slot with /// the caller's method-receiver taint. pub receiver_seed: Option<&'a VarTaint>, /// Per-SSA-value constant lattice from constant propagation. /// Used for SSA-level literal suppression at sinks. pub const_values: Option<&'a HashMap>, /// Type facts from type analysis. /// Used for type-aware sink filtering (e.g., suppress SQL injection for int-typed values). pub type_facts: Option<&'a crate::ssa::type_facts::TypeFactResult>, /// XML-parser config facts. Used to suppress XXE bits at parse-class /// sinks whose receiver was provably hardened /// (`setFeature(FEATURE_SECURE_PROCESSING, true)`, etc.). Strictly /// additive: `None` falls back to the existing flat / gated XXE /// classification. pub xml_parser_config: Option<&'a crate::ssa::xml_config::XmlParserConfigResult>, /// XPath-receiver config facts. Used to suppress XPATH_INJECTION at /// `evaluate` / `compile` sinks whose receiver was provably bound to /// an `XPathVariableResolver` (parameterised-XPath shape). Strictly /// additive: `None` falls back to the existing flat / gated XPATH /// classification. pub xpath_config: Option<&'a crate::ssa::xpath_config::XPathConfigResult>, /// Precise per-function SSA summaries for intra-file callee resolution. /// Checked before legacy FuncSummary resolution. /// /// Keyed by canonical [`FuncKey`], never bare function name, so /// same-name functions in the same file cannot silently overwrite one /// another. pub ssa_summaries: Option<&'a HashMap>, /// Extra label rules from user config (custom sources/sanitizers/sinks). /// Used as fallback when `resolve_callee` finds no summary for an inner /// arg callee, so label-only sanitizers still reduce sink caps. pub extra_labels: Option<&'a [RuntimeLabelRule]>, /// Pre-lowered + optimized SSA bodies for intra-file functions. /// When present, enables context-sensitive inline analysis at call sites. /// /// Keyed by canonical [`FuncKey`] (same identity model as `ssa_summaries`). pub callee_bodies: Option<&'a HashMap>, /// Cache for context-sensitive inline results. Uses `RefCell` for interior /// mutability (safe: k=1 depth limit prevents re-entrancy during borrow). pub(crate) inline_cache: Option<&'a RefCell>, /// Base-variable alias groups for alias-aware sanitization propagation. /// When present, sanitization of `alias.field` also sanitizes `base.field` /// for all must-aliased base names. pub base_aliases: Option<&'a crate::ssa::alias::BaseAliasResult>, /// Current inline analysis depth (0 = top-level caller). When >= 1, /// inline analysis falls back to summary resolution (k=1 bound). pub context_depth: u8, /// Callback bindings: maps callee parameter name → resolved callee /// [`FuncKey`]. /// /// Populated during inline analysis when the caller passes a function /// reference as an argument. The value is a full `FuncKey` so that when /// the callee invokes the parameter the call resolves back to the exact /// same definition without re-entering bare-name lookup. pub callback_bindings: Option<&'a HashMap>, /// Points-to analysis result: per-SSA-value abstract heap object sets. /// When present, container taint flows through heap objects instead of /// being merged directly into SSA values. pub points_to: Option<&'a PointsToResult>, /// Dynamic points-to set: populated at call sites by inter-procedural /// container identity propagation from `param_container_to_return` summaries. /// Uses `RefCell` for interior mutability (same pattern as `inline_cache`). pub dynamic_pts: Option<&'a RefCell>>, /// Import alias bindings: local alias → (original name, module path). /// Used in `resolve_callee` to map aliased import names back to their /// original exported symbol before summary lookup. pub import_bindings: Option<&'a crate::cfg::ImportBindings>, /// Promisify alias bindings: `const alias = util.promisify(wrapped)` for /// JS/TS. Used in `resolve_callee` so summary lookup for `alias(...)` falls /// back to `wrapped`'s summary. Label-based sink/source detection is /// handled by a CFG post-pass that unions the wrapped callee's labels into /// every matching call-site's `info.taint.labels`. pub promisify_aliases: Option<&'a crate::cfg::PromisifyAliases>, /// Module aliases from `require()` calls: SSA value → possible module names. /// Used to resolve dynamic dispatch (e.g., `lib.request()` where /// `lib = require("http")`) for sink label matching. pub module_aliases: Option<&'a HashMap>>, /// Static-map analysis result: SSA values whose concrete string value is /// provably bounded to a finite set of literals (e.g. the result of /// `map.get(x).unwrap_or("fallback")` over an all-literal-insert map). /// When present, seeded into [`AbstractState`] at entry so downstream sink /// suppression can clear command-injection findings whose payload is /// provably metacharacter-free. pub static_map: Option<&'a crate::ssa::static_map::StaticMapResult>, /// When `true`, JS/TS formal parameters whose names strongly imply user /// input (see [`crate::labels::is_js_ts_handler_param_name`]) are /// auto-seeded with a `UserInput` source on entry. Defaults to `false` /// so summary probes and non-JS/TS pipelines keep their existing /// baseline-subtraction semantics; the findings pipeline flips this on /// to detect handler-style flows that have no registered caller. pub auto_seed_handler_params: bool, /// Cross-file callee bodies sourced from /// [`GlobalSummaries`]. Populated in pass 2 to enable /// context-sensitive inline re-analysis across file boundaries the /// same way `callee_bodies` enables it intra-file. `None` preserves /// non-cross-file behaviour for unit tests and non-cross-file /// construction sites. pub cross_file_bodies: Option<&'a HashMap>, /// per-body field-sensitive points-to facts. /// Populated only when [`crate::pointer::is_enabled()`]. When /// present, [`SsaOp::FieldProj`] reads consult /// [`SsaTaintState::field_taint`] for each `loc ∈ pt(receiver)`, /// unioning the field-cell taint into the projected value. Field /// writes (synthetic base-update [`SsaOp::Assign`] instructions /// emitted by SSA lowering) likewise record argument taint into /// the matching cells. Strict-additive: `None` reproduces today's /// pointer-unaware behaviour. pub pointer_facts: Option<&'a crate::pointer::PointsToFacts>, } /// Per-predecessor state tracking for path-sensitive phi evaluation. /// Maps (successor_block_idx, predecessor_block_idx) → predecessor's exit state. type PredStates = HashMap<(usize, usize), SsaTaintState>; struct SsaTaintRunResult { events: Vec, block_states: Vec>, block_exit_states: Vec>, } /// Run SSA-based taint analysis, returning events AND converged block states. pub fn run_ssa_taint_full( ssa: &SsaBody, cfg: &Cfg, transfer: &SsaTaintTransfer, ) -> (Vec, Vec>) { let result = run_ssa_taint_internal(ssa, cfg, transfer); (result.events, result.block_states) } /// Run SSA-based taint analysis, returning events plus converged entry and /// exit states for each block. Intended for debug/introspection views. pub fn run_ssa_taint_full_with_exits( ssa: &SsaBody, cfg: &Cfg, transfer: &SsaTaintTransfer, ) -> ( Vec, Vec>, Vec>, ) { let result = run_ssa_taint_internal(ssa, cfg, transfer); (result.events, result.block_states, result.block_exit_states) } fn run_ssa_taint_internal( ssa: &SsaBody, cfg: &Cfg, transfer: &SsaTaintTransfer, ) -> SsaTaintRunResult { let num_blocks = ssa.blocks.len(); // Detect induction variables before analysis let back_edges = detect_back_edges(ssa); let induction_vars = detect_induction_phis(ssa, &back_edges); // Per-block entry states let mut block_states: Vec> = vec![None; num_blocks]; let mut block_exit_states: Vec> = vec![None; num_blocks]; block_states[ssa.entry.0 as usize] = Some(SsaTaintState::initial()); // Seed entry block's PathEnv from optimization results if let Some(ref mut entry_state) = block_states[ssa.entry.0 as usize] { if let Some(ref mut env) = entry_state.path_env { if let (Some(cv), Some(tf)) = (transfer.const_values, transfer.type_facts) { env.seed_from_optimization(cv, tf); } } } // Seed entry block's AbstractState from optimization results if let Some(ref mut entry_state) = block_states[ssa.entry.0 as usize] { if let Some(ref mut abs) = entry_state.abstract_state { if let Some(cv) = transfer.const_values { use crate::abstract_interp::{ AbstractValue, BitFact, IntervalFact, PathFact, StringFact, }; use crate::ssa::const_prop::ConstLattice; for (v, cl) in cv { match cl { ConstLattice::Int(n) => { abs.set( *v, AbstractValue { interval: IntervalFact::exact(*n), string: StringFact::top(), bits: BitFact::from_const(*n), path: PathFact::top(), }, ); } ConstLattice::Str(s) => { abs.set( *v, AbstractValue { interval: IntervalFact::top(), string: StringFact::exact(s), bits: BitFact::top(), path: PathFact::top(), }, ); } _ => {} } } } // Static-map seeding is intentionally NOT fused into the // AbstractState here. A blanket `StringFact::finite_set` would // compose with `StringFact::exact` facts emitted by // `transfer_abstract` for every string literal, and downstream // suppression logic can't distinguish "single-literal exact" // from "multi-literal bounded lookup". Instead the sink check // consults `transfer.static_map` directly via the dedicated // `is_static_map_shell_safe` predicate, which only fires when // the value was proved bounded by the HashMap idiom detector. } } // Compute loop heads for widening let loop_heads: HashSet = back_edges .iter() .map(|(_, target)| target.0 as usize) .collect(); // Per-predecessor exit states for path-sensitive phi evaluation let mut pred_states: PredStates = HashMap::new(); // Fixed-point iteration let mut worklist: VecDeque = VecDeque::new(); let mut in_worklist: HashSet = HashSet::new(); worklist.push_back(ssa.entry.0 as usize); in_worklist.insert(ssa.entry.0 as usize); // Initialize orphan blocks (no predecessors, not entry) with initial state. // This handles catch blocks that are disconnected after exception edge stripping. for (bid, block) in ssa.blocks.iter().enumerate() { if bid != ssa.entry.0 as usize && block.preds.is_empty() { block_states[bid] = Some(SsaTaintState::initial()); worklist.push_back(bid); in_worklist.insert(bid); } } if !ssa.exception_edges.is_empty() { tracing::debug!( count = ssa.exception_edges.len(), "SSA taint: exception edges for catch-block seeding" ); } let mut iterations: usize = 0; let budget = effective_worklist_cap(); let mut worklist_capped = false; while let Some(bid) = worklist.pop_front() { in_worklist.remove(&bid); iterations += 1; if iterations >= budget { tracing::warn!("SSA taint: worklist budget exceeded"); worklist_capped = true; break; } let entry_state = match &block_states[bid] { Some(s) => s.clone(), None => continue, }; let block = &ssa.blocks[bid]; let exit_state = transfer_block( block, cfg, ssa, transfer, entry_state, &induction_vars, Some(&pred_states), ); block_exit_states[bid] = Some(exit_state.clone()); // Build per-successor states (branch-aware for Branch terminators) let succ_states = compute_succ_states(block, cfg, ssa, transfer, &exit_state); // Store predecessor-specific states before joining for &(succ_id, ref succ_state) in &succ_states { let succ_idx = succ_id.0 as usize; pred_states.insert((succ_idx, bid), succ_state.clone()); } // Propagate to successors for (succ_id, succ_state) in succ_states { let succ_idx = succ_id.0 as usize; let new_succ_state = match &block_states[succ_idx] { Some(existing) => { let mut joined = existing.join(&succ_state); // Widen abstract values at loop heads if loop_heads.contains(&succ_idx) { if let (Some(new_abs), Some(old_abs)) = (&joined.abstract_state, &existing.abstract_state) { let widened = old_abs.widen(new_abs); joined.abstract_state = Some(widened); } } joined } None => succ_state, }; let changed = block_states[succ_idx] .as_ref() .is_none_or(|existing| *existing != new_succ_state); if changed { block_states[succ_idx] = Some(new_succ_state); if in_worklist.insert(succ_idx) { worklist.push_back(succ_idx); } } } // Propagate taint to catch blocks via exception edges. // Mirrors legacy semantics: variable taint carries across exception // edges but predicates are cleared (exception bypasses try conditions). let bid_id = BlockId(bid as u32); for &(src_blk, catch_blk) in &ssa.exception_edges { if src_blk != bid_id { continue; } let catch_idx = catch_blk.0 as usize; let mut exc_state = exit_state.clone(); exc_state.predicates.clear(); exc_state.path_env = None; // constraints don't survive exceptions let new_catch_state = match &block_states[catch_idx] { Some(existing) => existing.join(&exc_state), None => exc_state, }; let changed = block_states[catch_idx] .as_ref() .is_none_or(|existing| *existing != new_catch_state); if changed { block_states[catch_idx] = Some(new_catch_state); if in_worklist.insert(catch_idx) { worklist.push_back(catch_idx); } } } } MAX_WORKLIST_ITERATIONS.fetch_max(iterations, std::sync::atomic::Ordering::Relaxed); if worklist_capped { WORKLIST_CAP_HITS.fetch_add(1, std::sync::atomic::Ordering::Relaxed); record_engine_note(crate::engine_notes::EngineNote::WorklistCapped { iterations: iterations as u32, }); } // Post-hoc origin-truncation detection. If any converged block state // has a `VarTaint` whose origin list reached the cap, assume at least // one origin was dropped during the fixed-point iteration. Coarse // but useful signal, `merge_origins` already emits the precise-count // note on the merge path; this complements push sites inside transfer. let cap = effective_max_origins(); let mut saturated = 0u32; for state in block_states.iter().flatten() { for (_v, taint) in &state.values { if taint.origins.len() >= cap { saturated = saturated.saturating_add(1); } } } if saturated > 0 { ORIGINS_TRUNCATION_COUNT .fetch_add(saturated as usize, std::sync::atomic::Ordering::Relaxed); record_engine_note(crate::engine_notes::EngineNote::OriginsTruncated { dropped: saturated, }); } // Single pass over converged states to collect events let mut events: Vec = Vec::new(); for bid in 0..num_blocks { let entry_state = match &block_states[bid] { Some(s) => s.clone(), None => continue, }; let block = &ssa.blocks[bid]; collect_block_events( block, cfg, ssa, transfer, entry_state, &mut events, &induction_vars, Some(&pred_states), ); } SsaTaintRunResult { events, block_states, block_exit_states, } } /// Convenience wrapper: returns only events (existing signature). pub fn run_ssa_taint(ssa: &SsaBody, cfg: &Cfg, transfer: &SsaTaintTransfer) -> Vec { run_ssa_taint_full(ssa, cfg, transfer).0 } /// Project SsaValue-keyed taint back to [`BindingKey`]-keyed taint via var_name. /// /// Recomputes exit states from converged entry states, then maps /// SsaValue → var_name → `BindingKey`. The returned map is suitable /// for seeding child bodies via `global_seed`. /// /// `owner_body_id` is the id of the body being summarised; it tags /// every key via [`BindingKey::new`] so that same-named bindings from /// different bodies do not silently alias when the seed is later /// merged (e.g. in the JS/TS two-level solve). pub fn extract_ssa_exit_state( block_states: &[Option], ssa: &SsaBody, cfg: &Cfg, transfer: &SsaTaintTransfer, owner_body_id: BodyId, ) -> HashMap { // Compute exit states by replaying transfer on converged entry states let empty_induction = HashSet::new(); let mut joined = SsaTaintState::initial(); for (bid, entry_state) in block_states.iter().enumerate() { if let Some(state) = entry_state { let exit_state = transfer_block( &ssa.blocks[bid], cfg, ssa, transfer, state.clone(), &empty_induction, None, ); joined = joined.join(&exit_state); } } // Map SsaValue → var_name → BindingKey, scoped to the owning body. let mut result: HashMap = HashMap::new(); for (val, taint) in &joined.values { let var_name = ssa .value_defs .get(val.0 as usize) .and_then(|vd| vd.var_name.as_deref()); if let Some(name) = var_name { let key = BindingKey::new(name, owner_body_id); result .entry(key) .and_modify(|existing| { existing.caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut existing.origins, *orig); } }) .or_insert_with(|| taint.clone()); } } // Capture source spans on all origins before the seed crosses a body // boundary. At consumption time the parent's graph is not in scope, // so we snapshot each origin's span now. Use the classification span // so the recorded origin points at the labeled sub-expression (e.g. // the inner `req.query.x` call) rather than the enclosing statement. for taint in result.values_mut() { for origin in taint.origins.iter_mut() { if origin.source_span.is_none() { if let Some(info) = cfg.node_weight(origin.node) { origin.source_span = Some(info.classification_span()); } } } } result } /// Join two [`BindingKey`]-keyed seed maps (OR caps, merge origins). pub fn join_seed_maps( a: &HashMap, b: &HashMap, ) -> HashMap { let mut result = a.clone(); for (key, taint) in b { result .entry(key.clone()) .and_modify(|existing| { existing.caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut existing.origins, *orig); } }) .or_insert_with(|| taint.clone()); } result } /// Filter a per-body exit seed map down to the top-level scope. /// /// `toplevel` is the set of binding names that appear syntactically at /// the top level (always keyed with `BodyId(0)`). Every matching entry /// in `seed` is kept but **re-keyed** to `BodyId(0)` so the resulting /// map is single-scope: same-name entries from different bodies merge /// via the normal OR-and-push-origins path in /// [`join_seed_maps`] instead of coexisting as distinct keys. /// /// This is the one legitimate place where a binding's owning scope /// changes: the JS/TS two-level solve joins exit states from many /// sibling function bodies into a single `combined_exit`, and each /// sibling's surviving bindings conceptually belong to the top-level /// scope they all write into. Every other writer in the pipeline /// preserves the owner's id. pub fn filter_seed_to_toplevel( seed: &HashMap, toplevel: &HashSet, ) -> HashMap { let toplevel_names: HashSet<&str> = toplevel.iter().map(|k| k.name.as_str()).collect(); let mut out: HashMap = HashMap::new(); for (key, taint) in seed.iter() { if !toplevel_names.contains(key.name.as_str()) { continue; } let rekeyed = BindingKey::new(key.name.clone(), BodyId(0)); out.entry(rekeyed) .and_modify(|existing| { existing.caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut existing.origins, *orig); } existing.uses_summary |= taint.uses_summary; }) .or_insert_with(|| taint.clone()); } out } // ── Loop Induction Variable Detection ──────────────────────────────────── /// Detect back edges using block numbering heuristic. /// A back edge is (pred, block) where pred.0 >= block.0, valid because /// `form_blocks()` builds blocks in BFS order. fn detect_back_edges(ssa: &SsaBody) -> HashSet<(BlockId, BlockId)> { let mut back_edges = HashSet::new(); for block in &ssa.blocks { for &pred in &block.preds { if pred.0 >= block.id.0 { back_edges.insert((pred, block.id)); } } } back_edges } /// Check if `inc_val` is defined as a simple increment of `phi_val`: /// `inc_val = phi_val + const` or `inc_val = phi_val - const`. fn is_simple_increment(ssa: &SsaBody, inc_val: SsaValue, phi_val: SsaValue) -> bool { let def = ssa.def_of(inc_val); let block = ssa.block(def.block); // Look in the block body for the defining instruction for inst in &block.body { if inst.value == inc_val { if let SsaOp::Assign(ref uses) = inst.op { // Pattern: assign([phi_val, const_val]), simple binary op if uses.len() == 2 && uses.contains(&phi_val) { let other = if uses[0] == phi_val { uses[1] } else { uses[0] }; // Check if the other operand is a constant let other_def = ssa.def_of(other); let other_block = ssa.block(other_def.block); for other_inst in other_block.phis.iter().chain(other_block.body.iter()) { if other_inst.value == other && matches!(other_inst.op, SsaOp::Const(_)) { return true; } } } } break; } } false } /// Detect phi nodes that represent loop induction variables. /// Returns the set of SsaValues (phi results) that are simple induction variables. fn detect_induction_phis( ssa: &SsaBody, back_edges: &HashSet<(BlockId, BlockId)>, ) -> HashSet { let mut induction_vars = HashSet::new(); for block in &ssa.blocks { for phi in &block.phis { if let SsaOp::Phi(ref operands) = phi.op { if operands.len() != 2 { continue; } // Identify which operand comes via back edge let mut back_edge_op = None; let mut init_op = None; for &(pred_blk, operand_val) in operands { if back_edges.contains(&(pred_blk, block.id)) { back_edge_op = Some(operand_val); } else { init_op = Some(operand_val); } } if let (Some(back_val), Some(_init_val)) = (back_edge_op, init_op) { if is_simple_increment(ssa, back_val, phi.value) { induction_vars.insert(phi.value); } } } } } induction_vars } /// Transfer a single block: process phis then body, return exit state. pub(super) fn transfer_block( block: &SsaBlock, cfg: &Cfg, ssa: &SsaBody, transfer: &SsaTaintTransfer, mut state: SsaTaintState, induction_vars: &HashSet, pred_states: Option<&PredStates>, ) -> SsaTaintState { // Process phis let block_idx = block.id.0 as usize; for phi in &block.phis { if let SsaOp::Phi(ref operands) = phi.op { // Induction variable optimization: skip back-edge operands let is_induction = induction_vars.contains(&phi.value); let mut combined_caps = Cap::empty(); let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut all_tainted_validated = true; let mut any_tainted = false; for &(pred_blk, operand_val) in operands { // Skip back-edge operands for induction vars if is_induction && pred_blk.0 >= block.id.0 { continue; } // Skip predecessor operands from infeasible paths if let Some(ps) = pred_states { if let Some(pred_st) = ps.get(&(block_idx, pred_blk.0 as usize)) { if pred_st.path_env.as_ref().is_some_and(|e| e.is_unsat()) { continue; } } } // Use predecessor-specific state when available (path sensitivity) let operand_taint = if let Some(ps) = pred_states { ps.get(&(block_idx, pred_blk.0 as usize)) .and_then(|pred_st| pred_st.get(operand_val)) } else { None }; // Fall back to joined entry state let operand_taint = operand_taint.or_else(|| state.get(operand_val)); if let Some(taint) = operand_taint { any_tainted = true; combined_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } // Path sensitivity: check if this operand is validated in its predecessor if let Some(ps) = pred_states { if let Some(pred_st) = ps.get(&(block_idx, pred_blk.0 as usize)) { let var_name = ssa .value_defs .get(operand_val.0 as usize) .and_then(|vd| vd.var_name.as_deref()); if let Some(name) = var_name { if let Some(sym) = transfer.interner.get(name) { if !pred_st.validated_must.contains(sym) { all_tainted_validated = false; } } else { all_tainted_validated = false; } } else { all_tainted_validated = false; } } else { all_tainted_validated = false; } } else { all_tainted_validated = false; } } } if combined_caps.is_empty() { state.remove(phi.value); } else { state.set( phi.value, VarTaint { caps: combined_caps, origins: combined_origins, uses_summary: false, }, ); // Path sensitivity: if all tainted predecessors validated, propagate to phi result if any_tainted && all_tainted_validated { if let Some(name) = ssa .value_defs .get(phi.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { state.validated_may.insert(sym); state.validated_must.insert(sym); } } } } } } // Abstract value phi join (from predecessor exit states) if state.abstract_state.is_some() { for phi in &block.phis { if let SsaOp::Phi(ref operands) = phi.op { use crate::abstract_interp::AbstractValue; let is_induction = induction_vars.contains(&phi.value); let mut joined = AbstractValue::bottom(); let mut any_operand = false; for &(pred_blk, operand_val) in operands { if is_induction && pred_blk.0 >= block.id.0 { continue; } // Skip infeasible predecessors if let Some(ps) = pred_states { if let Some(pred_st) = ps.get(&(block_idx, pred_blk.0 as usize)) { if pred_st.path_env.as_ref().is_some_and(|e| e.is_unsat()) { continue; } } } // Look up operand abstract value from predecessor exit state let pred_abs = pred_states .and_then(|ps| ps.get(&(block_idx, pred_blk.0 as usize))) .and_then(|s| s.abstract_state.as_ref()) .map(|a| a.get(operand_val)) .unwrap_or_else(AbstractValue::top); joined = joined.join(&pred_abs); any_operand = true; } if any_operand { if let Some(ref mut abs) = state.abstract_state { abs.set(phi.value, joined); } } } } } // Process body for inst in &block.body { transfer_inst(inst, cfg, ssa, transfer, &mut state); } state } /// Compute per-successor states with branch-aware predicate handling. /// /// For `Branch` terminators, inspects the condition node for validation/predicate /// info and produces specialized true/false states. For other terminators, /// propagates the exit state uniformly. fn compute_succ_states( block: &SsaBlock, cfg: &Cfg, ssa: &SsaBody, transfer: &SsaTaintTransfer, exit_state: &SsaTaintState, ) -> SmallVec<[(BlockId, SsaTaintState); 2]> { match &block.terminator { Terminator::Branch { cond, true_blk, false_blk, condition, } => { // Defensive: `cond` should always be present in `cfg`, but cross-file // proxy CFGs synthesized in `rebuild_body_graph` previously missed // Branch.cond entries (now fixed above). Falling through to uniform // propagation on a missing cond preserves liveness rather than // crashing the worker thread if a future regression re-introduces it. let Some(cond_info) = cfg.node_weight(*cond) else { return smallvec::smallvec![ (*true_blk, exit_state.clone()), (*false_blk, exit_state.clone()), ]; }; if cond_info.kind == crate::cfg::StmtKind::If && !cond_info.condition_vars.is_empty() { let cond_text = cond_info.condition_text.as_deref().unwrap_or(""); let (kind, target_var) = classify_condition_with_target(cond_text); // Determine which vars to apply validation to: // If we extracted a specific target, narrow to just that var // (if it's in condition_vars). Otherwise use all condition_vars. let effective_vars: Vec = if let Some(ref target) = target_var { if cond_info.condition_vars.iter().any(|v| v == target) { vec![target.clone()] } else { cond_info.condition_vars.clone() } } else { cond_info.condition_vars.clone() }; let mut true_state = exit_state.clone(); let mut false_state = exit_state.clone(); // Detect semantic negation that isn't captured by AST-level // `condition_negated` (which only detects unary `!`/`not`). // // - Python `not in`: comparison operator, not unary negation // - TypeCheck with `!==`/`!=`: "typeof x !== 'number'" means // the true branch is the REJECT path (type mismatch) let cond_lower = cond_text.to_ascii_lowercase(); let has_semantic_negation = (kind == PredicateKind::AllowlistCheck && cond_lower.contains(" not in ")) || (kind == PredicateKind::TypeCheck && (cond_lower.contains("!==") || cond_lower.contains("!="))); let effective_negated = if has_semantic_negation { !cond_info.condition_negated } else { cond_info.condition_negated }; // True edge polarity: effective_negated XOR true let true_polarity = !effective_negated; let false_polarity = effective_negated; // Apply validation/predicate to true branch apply_branch_predicates( &mut true_state, &effective_vars, kind, true_polarity, transfer.interner, ssa, ); // Apply validation/predicate to false branch apply_branch_predicates( &mut false_state, &effective_vars, kind, false_polarity, transfer.interner, ssa, ); // PathFact branch narrowing, language-agnostic. The // text-level rejection patterns recognised by // `classify_path_rejection_atom` cover the common idioms // across all 10 supported languages: // * `.contains("..")` (Rust, Java, JS String) / // `.includes("..")` (JS/TS) / `.include?("..")` (Ruby) / // `strings.Contains(s, "..")` (Go) / // `".." in s` (Python). // * `.starts_with('/')` (Rust) / // `.startsWith("/")` (JS/TS/Java) / // `.startswith("/")` (Python) / // `.start_with?("/")` (Ruby) / // `strings.HasPrefix(s, "/")` (Go). // * `.is_absolute()` / `.isAbsolute()` / // `os.path.isabs(s)` / `filepath.IsAbs(s)`. // // Rust positive-assertion `prefix_lock` recognition still // fires regardless of language; for non-Rust languages the // assertion classifier returns `None` for unfamiliar shapes. apply_path_fact_branch_narrowing_with_interner( &mut true_state, &mut false_state, cond_text, &effective_vars, ssa, Some(transfer.interner), effective_negated, ); // Validation-call err-check narrowing. When the condition // is an `err`-check (e.g. `if err != nil`) and `err` is the // result of a known value-producing validator // (`strconv.Atoi`, `parseInt`, etc.), mark the validator's // input argument(s) as validated on the success branch // (where `err` is null / `Ok` / no exception). Mirrors the // ValidationCall pathway but for the two-statement // validation idiom common in Go: // `_, err := strconv.Atoi(input); if err != nil { return }` // post-condition: input is provably a numeric string on the // surviving (`err == nil`) branch, so downstream sinks like // `db.Query("... " + input)` should suppress. if matches!(kind, PredicateKind::ErrorCheck) { apply_validation_err_check_narrowing( &mut true_state, &mut false_state, cond_text, &cond_info.condition_vars, ssa, block.id, transfer.interner, ); } // Generic input-validator branch narrowing. Recognises the // two-statement idiom // `const err = validate(x); if (err) throw …;` // (also `if (!isValid(x)) throw`), kinds the predicate // classifier returns Unknown / NullCheck / ErrorCheck for // because the if-condition is a bare result variable, not a // direct call expression. The narrowing only fires when // the condition has exactly one variable and that // variable's reaching SSA def is a Call to a callee // recognised by `classify_input_validator_callee`. // // Motivated by Novu CVE GHSA-4x48-cgf9-q33f // (`const ssrfError = await validateUrlSsrf(child.webhookUrl); // if (ssrfError) throw …;`). if matches!( kind, PredicateKind::Unknown | PredicateKind::NullCheck | PredicateKind::ErrorCheck ) { apply_input_validator_branch_narrowing( &mut true_state, &mut false_state, cond_text, &cond_info.condition_vars, ssa, block.id, transfer.interner, ); } // Constraint refinement // // `lower_condition` returns a ConditionExpr that represents the // full semantic condition (it already applies `condition_negated` // internally). The true branch is where the condition holds // (polarity=true), the false branch is where it doesn't // (polarity=false). We do NOT reuse `effective_negated` here , // that variable incorporates `has_semantic_negation` which is a // predicate-system concern, not a constraint-system concern. if true_state.path_env.is_some() || false_state.path_env.is_some() { // Prefer pre-lowered structured condition from terminator; // fall back to text-based lowering for backward compat. let cond_expr = if let Some(pre_lowered) = condition { (**pre_lowered).clone() } else { constraint::lower_condition(cond_info, ssa, block.id, transfer.const_values) }; if !matches!(cond_expr, constraint::ConditionExpr::Unknown) { if let Some(ref mut env) = true_state.path_env { *env = constraint::refine_env(env, &cond_expr, true); if env.is_unsat() { tracing::debug!( block = ?block.id, cond = cond_text, "constraint: pruned true branch (unsat)" ); } } if let Some(ref mut env) = false_state.path_env { *env = constraint::refine_env(env, &cond_expr, false); if env.is_unsat() { tracing::debug!( block = ?block.id, cond = cond_text, "constraint: pruned false branch (unsat)" ); } } } } // Contradiction pruning. // // Two sources of contradiction: // (a) `predicates`, a known_true and known_false bit // set for the same predicate kind on the same // symbol. This is genuine: prior branches asserted // conflicting truth values about the same predicate, // so the joined branch is unreachable. Reset the // branch state to bot. // (b) `path_env.is_unsat()`, the constraint solver's // interval / nullability domain proved the branch // infeasible. Empirically the constraint refinement // can over-prune branches whose feasibility hinges // on data introduced by writeback / container ops // (`err` from `dec.Decode(body)` becoming // constraint-bounded only after the writeback's // caps land on the destination). In those cases // resetting the data state to bot drops legitimate // taint flow that travels through the surviving // branch, see CVE-2024-31450's // `if err := …Decode(emoji); err != nil { return }` // shape. // // To preserve soundness without losing real flow, only // reset to bot when the contradiction is in `predicates`. // For path_env-only unsat, drop path_env (treat as Top // for downstream path-sensitive reasoning) and keep the // rest of the state, values, field_taint, heap, // predicates, validated_*, abstract_state. let true_pred_contra = true_state .predicates .iter() .any(|(_, s)| s.has_contradiction()); let false_pred_contra = false_state .predicates .iter() .any(|(_, s)| s.has_contradiction()); if true_pred_contra { true_state = SsaTaintState::bot(); } else if true_state.path_env.as_ref().is_some_and(|e| e.is_unsat()) { true_state.path_env = None; } if false_pred_contra { false_state = SsaTaintState::bot(); } else if false_state.path_env.as_ref().is_some_and(|e| e.is_unsat()) { false_state.path_env = None; } smallvec::smallvec![(*true_blk, true_state), (*false_blk, false_state),] } else { // Non-If condition or no condition vars, uniform propagation smallvec::smallvec![ (*true_blk, exit_state.clone()), (*false_blk, exit_state.clone()), ] } } Terminator::Goto(_) => { // `block.succs` is authoritative. The terminator target records // the single logical successor (or the first of a collapsed // ≥3-way fanout, see src/ssa/lower.rs `three_successor_collapse`). // Propagating only the terminator target would drop flow to the // other successors; iterate `succs` instead so every downstream // block receives the exit state. block .succs .iter() .map(|s| (*s, exit_state.clone())) .collect() } Terminator::Switch { .. } => { // Switch: all targets and default receive the same input state. // Per-target branch narrowing would require per-case literal // metadata on the terminator (a follow-up); for now, uniform // propagation across `block.succs` preserves soundness. block .succs .iter() .map(|s| (*s, exit_state.clone())) .collect() } Terminator::Return(_) | Terminator::Unreachable => { // `block.succs` is authoritative for analysis flow; the terminator // is advisory. Lowering records finally/cleanup continuation // edges on the try-body's succs even when the structured // terminator is `Return`/`Unreachable`. Propagate the exit state // across those edges (determinism: iterate in stored order) so // downstream analysis sees the flow. Empty `succs` preserves the // true-terminal fast path. block .succs .iter() .map(|s| (*s, exit_state.clone())) .collect() } } } /// Apply validation and predicate bits for a branch edge. fn apply_branch_predicates( state: &mut SsaTaintState, condition_vars: &[String], kind: PredicateKind, polarity: bool, interner: &SymbolInterner, ssa: &SsaBody, ) { // Validation-like predicates: mark condition vars as validated when polarity is true if matches!( kind, PredicateKind::ValidationCall | PredicateKind::AllowlistCheck | PredicateKind::TypeCheck ) && polarity { for var in condition_vars { if let Some(sym) = interner.get(var) { state.validated_may.insert(sym); state.validated_must.insert(sym); } } } // RelativeUrlValidated: TRUE branch is the validated path // (`x.startsWith("/")` succeeded → `x` cannot redirect off-host). // Cap-aware: clear `Cap::OPEN_REDIRECT` only; non-redirect sinks // (XSS / SQLi / FILE_IO) downstream still fire on residual taint. if kind == PredicateKind::RelativeUrlValidated && polarity { for var in condition_vars { let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new(); for (val, _) in state.values.iter() { if let Some(name) = ssa .value_defs .get(val.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if name == var { to_clear.push(*val); } } } for val in to_clear { if let Some(taint) = state.get(val).cloned() { let new_caps = taint.caps & !Cap::OPEN_REDIRECT; if new_caps.is_empty() { state.remove(val); } else { state.set( val, VarTaint { caps: new_caps, origins: taint.origins, uses_summary: taint.uses_summary, }, ); } } } } } // HostAllowlistValidated: TRUE branch is the validated path // (`new URL(x).host === ALLOWED` succeeded → `x` cannot redirect off-host). // Cap-aware: clear `Cap::OPEN_REDIRECT` only; non-redirect sinks downstream // still fire on the residual taint caps. Mirrors the // `RelativeUrlValidated` handler exactly, the only difference is the // recogniser shape (multi-statement parse + host comparison instead of // inline leading-slash check). if kind == PredicateKind::HostAllowlistValidated && polarity { for var in condition_vars { let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new(); for (val, _) in state.values.iter() { if let Some(name) = ssa .value_defs .get(val.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if name == var { to_clear.push(*val); } } } for val in to_clear { if let Some(taint) = state.get(val).cloned() { let new_caps = taint.caps & !Cap::OPEN_REDIRECT; if new_caps.is_empty() { state.remove(val); } else { state.set( val, VarTaint { caps: new_caps, origins: taint.origins, uses_summary: taint.uses_summary, }, ); } } } } } // ShellMetaValidated: inverted polarity, the FALSE branch (no metachar // found) is the validated path; the TRUE branch is the rejection path. // // Cap-aware: shell-metachar rejection only proves the value is safe for // shell-family sinks (it strips `;|&` etc.), not for SQL, path, code-exec, // SSRF, or other sink classes. Clear `Cap::SHELL_ESCAPE` from the var's // taint on the validated branch instead of marking it generically // validated, so non-shell sinks downstream still fire on the residual // taint caps. if kind == PredicateKind::ShellMetaValidated && !polarity { for var in condition_vars { let mut to_clear: SmallVec<[SsaValue; 4]> = SmallVec::new(); for (val, _) in state.values.iter() { if let Some(name) = ssa .value_defs .get(val.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if name == var { to_clear.push(*val); } } } for val in to_clear { if let Some(taint) = state.get(val).cloned() { let new_caps = taint.caps & !Cap::SHELL_ESCAPE; if new_caps.is_empty() { state.remove(val); } else { state.set( val, VarTaint { caps: new_caps, origins: taint.origins, uses_summary: taint.uses_summary, }, ); } } } } } // Whitelisted predicate kinds: update PredicateSummary bits if let Some(bit_idx) = predicate_kind_bit(kind) { for var in condition_vars { if let Some(sym) = interner.get(var) { let mut summary = state .predicates .binary_search_by_key(&sym, |(id, _)| *id) .ok() .map(|idx| state.predicates[idx].1) .unwrap_or_else(PredicateSummary::empty); if polarity { summary.known_true |= 1 << bit_idx; } else { summary.known_false |= 1 << bit_idx; } match state.predicates.binary_search_by_key(&sym, |(id, _)| *id) { Ok(idx) => state.predicates[idx].1 = summary, Err(idx) => state.predicates.insert(idx, (sym, summary)), } } } } } /// Mark the input arguments of a value-producing validator as validated /// on the success branch of a downstream `err`-check. /// /// Recognised idiom (most idiomatic in Go): /// /// ```text /// _, err := strconv.Atoi(input) /// if err != nil { return } /// // → input is provably a valid integer string on the surviving branch /// ``` /// /// Walks `cond_info.condition_vars` to locate the SSA value bound to the /// condition's `err`/result variable, finds the SsaInst that defined that /// value, and, if the defining op is a [`SsaOp::Call`] to a /// [`crate::ssa::type_facts::is_int_producing_callee`], copies the call's /// argument variable names into `validated_must` / `validated_may` on the /// `err == null` branch. /// /// The "success" branch direction is determined from `cond_text`: /// /// * `err == nil` / `err == None` / `error == nil` / `is_ok()` → TRUE branch /// * `err != nil` / `error != nil` / `is_err()` → FALSE branch /// /// Strict-additive: when the condition does not match the err-check shape, /// the defining op is not a Call, the callee is not recognised as a /// validator, or the arg has no SSA-level var_name to mark, the function /// is a no-op. fn apply_validation_err_check_narrowing( true_state: &mut SsaTaintState, false_state: &mut SsaTaintState, cond_text: &str, condition_vars: &[String], ssa: &SsaBody, block: BlockId, interner: &SymbolInterner, ) { if condition_vars.is_empty() { return; } // Determine which branch corresponds to "err is null / Ok / no error". // Defaults to FALSE for `err != nil`-style; flips to TRUE for // `err == nil`-style and `is_ok()`. let lower = cond_text.to_ascii_lowercase(); let success_branch_is_true = lower.contains("== nil") || lower.contains("== none") || lower.contains("is none") || lower.contains("is_ok") || lower.contains("=== null") || lower.contains("== null"); // Resolve `err`'s reaching SSA value (last def in this or earlier block). // We restrict to single-var conditions to avoid mis-attributing // validation when the condition mixes err and another variable // (e.g. `err != nil || other`). if condition_vars.len() != 1 { return; } let err_name = condition_vars[0].as_str(); let err_val = match resolve_var_to_ssa_value(err_name, ssa, block) { Some(v) => v, None => return, }; // Find the defining SsaInst. Search across blocks because the // assignment might have happened in a predecessor. let def_inst = ssa .blocks .iter() .flat_map(|b| b.body.iter()) .find(|i| i.value == err_val); let Some(def_inst) = def_inst else { return }; let SsaOp::Call { ref callee, ref args, .. } = def_inst.op else { return; }; if !crate::ssa::type_facts::is_int_producing_callee(callee) { return; } // Collect candidate input arg variable names: every SSA value across // every positional arg group, looked up by var_name. Conservative , // we mark *all* of them validated rather than guessing which arg the // validator narrows. The validators we recognise here // (`strconv.Atoi`, `parseInt`, `ParseFloat`, …) all take exactly one // primary string argument, so in practice this collects one name. let mut arg_names: SmallVec<[String; 2]> = SmallVec::new(); for arg_group in args { for &v in arg_group { if let Some(name) = ssa .value_defs .get(v.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if !arg_names.iter().any(|s: &String| s == name) { arg_names.push(name.to_string()); } } } } if arg_names.is_empty() { return; } let success_state = if success_branch_is_true { true_state } else { false_state }; for name in &arg_names { if let Some(sym) = interner.get(name) { success_state.validated_may.insert(sym); success_state.validated_must.insert(sym); } } } /// Mark the input arguments of a generic input-validator helper as /// validated on the success branch of a downstream truthiness check. /// /// Recognised idioms: /// /// ```text /// // ErrorReturning (Novu CVE GHSA-4x48-cgf9-q33f) /// const err = validateUrlSsrf(child.webhookUrl); /// if (err) throw …; /// // → child.webhookUrl is validated on the falsy (false) branch /// /// // BooleanTrueIsValid /// const ok = isValidPath(p); /// if (!ok) throw …; /// // → p is validated on the !ok==false (true value of ok) branch /// ``` /// /// Resolves `condition_vars[0]` to its reaching SSA def, checks that /// the def is a [`SsaOp::Call`] to a callee classified by /// [`classify_input_validator_callee`], and copies the call's input /// argument variable names into `validated_must`/`validated_may` on /// the branch the validator's polarity says succeeded. /// /// The branch direction starts from `cond_text` (uses the same /// `success_branch_is_true` heuristics as /// [`apply_validation_err_check_narrowing`]) and is then flipped for /// `BooleanTrueIsValid` validators (a truthy result means "valid", so /// the *true* branch carries the validation). /// /// Strict-additive: when no condition var matches, the def isn't a /// Call, the callee isn't a recognised validator, or no arg has an /// SSA-level var_name, the function is a no-op. fn apply_input_validator_branch_narrowing( true_state: &mut SsaTaintState, false_state: &mut SsaTaintState, cond_text: &str, condition_vars: &[String], ssa: &SsaBody, block: BlockId, interner: &SymbolInterner, ) { if condition_vars.len() != 1 { return; } let result_name = condition_vars[0].as_str(); let result_val = match resolve_var_to_ssa_value(result_name, ssa, block) { Some(v) => v, None => return, }; let def_inst = ssa .blocks .iter() .flat_map(|b| b.body.iter()) .find(|i| i.value == result_val); let Some(def_inst) = def_inst else { return }; let SsaOp::Call { ref callee, ref args, .. } = def_inst.op else { return; }; let polarity = match crate::ssa::type_facts::classify_input_validator_callee(callee.as_str()) { Some(p) => p, None => return, }; // Determine the success branch. // // Default: bare `if (X)` truthy-test → success is the FALSE branch // for ErrorReturning (X truthy means "error"), and the TRUE branch // for BooleanTrueIsValid (X truthy means "valid"). // // Equality checks (`X === null`, `X == null`, etc.) flip the // truthiness sense, match the same set of patterns // `apply_validation_err_check_narrowing` uses for the `err == nil` // family. let lower = cond_text.to_ascii_lowercase(); let cond_text_says_null_branch_is_true = lower.contains("== nil") || lower.contains("== none") || lower.contains("is none") || lower.contains("is_ok") || lower.contains("=== null") || lower.contains("== null"); let success_branch_is_true = match polarity { InputValidatorPolarity::ErrorReturning => cond_text_says_null_branch_is_true, InputValidatorPolarity::BooleanTrueIsValid => !cond_text_says_null_branch_is_true, }; // Collect candidate input-arg variable names. Conservative, every // SSA value across every positional arg group, looked up by // var_name, OR'd into validated_*. Validators usually take one // primary arg so this collects ≤ 1 name in practice. let mut arg_names: SmallVec<[String; 2]> = SmallVec::new(); for arg_group in args { for &v in arg_group { if let Some(name) = ssa .value_defs .get(v.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if !arg_names.iter().any(|s: &String| s == name) { arg_names.push(name.to_string()); } } } } if arg_names.is_empty() { return; } let success_state = if success_branch_is_true { true_state } else { false_state }; for name in &arg_names { if let Some(sym) = interner.get(name) { success_state.validated_may.insert(sym); success_state.validated_must.insert(sym); } } } /// JS/TS Array-method validator-callback narrowing. /// /// `arr.filter(isSafeIdentifier)`, `arr.find(isValidId)`, and the /// `findLast` variant are gating array methods whose return value is /// composed of elements that passed the callback. When the callback /// argument resolves to a name `classify_input_validator_callee` tags /// as `BooleanTrueIsValid` (`isValid…`, `isSafe…`, `hasValid…` and /// snake-case variants), every element of the result satisfies the /// validator, so the call's downstream sinks see the same flow as /// validated taint. /// /// The companion `if (isValidX(x)) use(x)` narrowing already exists in /// [`apply_input_validator_branch_narrowing`]; this is the same idea /// lifted to the call site for filter/find chains so taint stops at /// the gate rather than leaking through subsequent /// `Array[index]`/template/sink reads. /// /// Strict-additive: if the callback's name does not match the /// validator pattern (anonymous arrow, opaque identifier, etc.), the /// helper is a no-op and the existing default propagation runs /// unchanged. /// /// Motivated by CVE-2026-42353 (i18next-http-middleware path /// traversal): the patched fix is `languages.filter(utils.isSafeIdentifier)` /// before forwarding `languages` into the backend connector, and the /// dual deferred TS-side gap CVE-2026-25544 (Payload sqli). fn try_array_method_validator_callback_narrowing( inst: &SsaInst, info: &NodeInfo, callee: &str, args: &[SmallVec<[SsaValue; 2]>], return_bits: &mut Cap, return_origins: &mut SmallVec<[TaintOrigin; 2]>, state: &mut SsaTaintState, transfer: &SsaTaintTransfer, ssa: &SsaBody, ) -> bool { if !matches!(transfer.lang, Lang::JavaScript | Lang::TypeScript) { return false; } // Method-call shape: callee text contains a `.` and the trailing // segment is one of the gating array methods. `findIndex` / // `every` / `some` return scalar shapes (index, boolean) rather // than a filtered collection so they are excluded — element-level // validation does not apply to a numeric/boolean result. let dot = match callee.rfind('.') { Some(p) => p, None => return false, }; let method = &callee[dot + 1..]; if !matches!(method, "filter" | "find" | "findLast") { return false; } // The first positional argument's callable name. Two channels: // 1. `info.arg_callees` — populated by `extract_arg_callees` // (`call_ident_of` walks call shapes inside the arg). Catches // `arr.filter(cb())` and dotted-callback shapes where the // tree-sitter node kind reaches `Kind::CallFn` or // `Kind::CallMethod`. // 2. SSA `value_defs[v].var_name` for the arg's first SSA value // — covers the bare-identifier shape (`arr.filter(cb)`) // where the AST node is a plain identifier and // `extract_arg_callees` pushes `None` because there is no // call to recurse into. This is the shape every patched // CVE fix uses, so it is the dominant source of validator // callbacks in real code. let arg0 = match args.first() { Some(a) => a, None => return false, }; let cb_from_arg_callees = info.arg_callees.first().and_then(|s| s.as_deref()); let cb_from_ssa = arg0.iter().find_map(|&v| { ssa.value_defs .get(v.0 as usize) .and_then(|vd| vd.var_name.as_deref()) }); let cb_name = match cb_from_arg_callees.or(cb_from_ssa) { Some(n) => n, None => return false, }; if crate::ssa::type_facts::classify_input_validator_callee(cb_name) != Some(InputValidatorPolarity::BooleanTrueIsValid) { return false; } // Strip every cap from the return value: the returned array (or // single found element) is composed exclusively of elements the // recognised validator approved. `Cap::all()` is the conservative // ceiling because the validator's body is opaque to this layer; a // future extension could narrow caps by inspecting the body's // rejection patterns. *return_bits = Cap::empty(); return_origins.clear(); // Mark the result's var_name as validated, mirroring the // [`apply_input_validator_branch_narrowing`] insertion. Useful // for direct same-name reads of the rebound array (`arr = // arr.filter(p)` then `arr.length`) but does not propagate // through Assigns to differently-named bindings (`const lng = // arr[0]`); the `return_bits` strip above is what gates those // downstream flows. if let Some(name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { state.validated_must.insert(sym); state.validated_may.insert(sym); } } true } /// Find the latest reaching SSA definition for `var_name` at the end of /// `block`. Mirrors `crate::constraint::lower::resolve_single_var` but /// avoids the cross-module privacy leak: callers in this module need it /// for branch narrowing on err-check shapes. fn resolve_var_to_ssa_value(var_name: &str, ssa: &SsaBody, block: BlockId) -> Option { let mut best_in_block: Option = None; let mut best_outside: Option = None; for (idx, vd) in ssa.value_defs.iter().enumerate() { if vd.var_name.as_deref() != Some(var_name) { continue; } let v = SsaValue(idx as u32); if vd.block == block { best_in_block = Some(match best_in_block { Some(existing) if existing.0 > v.0 => existing, _ => v, }); } else { best_outside = Some(match best_outside { Some(existing) if existing.0 > v.0 => existing, _ => v, }); } } best_in_block.or(best_outside) } /// Apply Rust path-rejection / path-assertion branch narrowing to the /// true/false branch states produced by `compute_succ_states`. /// /// Looks up each SSA value in the per-branch `abstract_state` whose /// `var_name` matches one of the `effective_vars` (the condition's target /// variables) and updates its [`PathFact`] according to the classified /// rejection / assertion idiom. /// /// `negated` reflects the effective negation of `cond_text`: when true, /// the condition's surface form is `!` (or `not `) /// and the True/False successor states correspond to the *rejection* / /// *surviving* arms inverted relative to the unwrapped condition. The /// narrowing functions are written against the unwrapped condition; this /// flag lets the caller route prefix-lock / rejection-axis narrowing to /// the arm where the unwrapped condition holds. #[cfg(test)] fn apply_path_fact_branch_narrowing( true_state: &mut SsaTaintState, false_state: &mut SsaTaintState, cond_text: &str, effective_vars: &[String], ssa: &SsaBody, ) { apply_path_fact_branch_narrowing_with_interner( true_state, false_state, cond_text, effective_vars, ssa, None, false, ); } fn apply_path_fact_branch_narrowing_with_interner( true_state: &mut SsaTaintState, false_state: &mut SsaTaintState, cond_text: &str, effective_vars: &[String], ssa: &SsaBody, interner: Option<&SymbolInterner>, negated: bool, ) { use crate::abstract_interp::PathFact; use crate::abstract_interp::path_domain::{ PathAssertion, PathRejection, classify_path_assertion, classify_path_rejection_axes, cond_has_pre_negated_islocal_clause, }; let rejection_axes = classify_path_rejection_axes(cond_text); let assertion = classify_path_assertion(cond_text); if rejection_axes.is_empty() && matches!(assertion, PathAssertion::None) { return; } // Resolve the "safe arm" for the rejection axes. // // `classify_path_rejection_axes` reports axes that hold on the FALSE // branch of `cond_text` AS WRITTEN, with one exception: the // `!filepath.IsLocal(...)` Go idiom is matched at the clause level // and the classifier consumes the leading `!` itself (the safe arm // remains the FALSE branch of the whole condition). // // For polarity-blind atoms like `!path.contains("..")`, the // classifier ignores the leading `!` and still extracts `..`. In // that shape, AST detects the unary `!` and sets // `condition_negated = true`, but the rejection axis's *true* safe // arm is the TRUE branch of the whole condition. So when // `negated == true` AND no clause is the pre-negated IsLocal idiom, // flip the narrow target. let rejection_pre_negated = cond_has_pre_negated_islocal_clause(cond_text); let rejection_safe_is_true = negated && !rejection_pre_negated; // Mark validated_may on the safe arm when a path-rejection // pattern fires. Mirrors the AllowlistCheck quirk that already // marks validated on the rejection-arm via `apply_branch_predicates` // for languages whose `.contains(...)` / membership idiom hits the // AllowlistCheck classifier, but normalises behaviour for shapes // like C `strstr(path, "..") != NULL` that hit the NullCheck arm // first and never get a chance to mark validation through the // allowlist path. if !rejection_axes.is_empty() && let Some(intern) = interner { let safe_state: &mut SsaTaintState = if rejection_safe_is_true { &mut *true_state } else { &mut *false_state }; for var in effective_vars { if let Some(sym) = intern.get(var) { safe_state.validated_may.insert(sym); safe_state.validated_must.insert(sym); } } } // Collect SSA values whose `var_name` appears in `effective_vars`. We // pick the *highest-index* matching value (latest definition by SSA // ordering, closest to the current program point). Absent an // explicit name table, iterating `ssa.value_defs` is the only way to // recover the mapping from name → SsaValue. let mut targets: smallvec::SmallVec<[SsaValue; 2]> = smallvec::SmallVec::new(); for var_name in effective_vars { let mut latest: Option = None; for (idx, vd) in ssa.value_defs.iter().enumerate() { if vd.var_name.as_deref() == Some(var_name.as_str()) { latest = Some(SsaValue(idx as u32)); } } if let Some(v) = latest { targets.push(v); } } if targets.is_empty() { return; } // Apply rejection: true branch = reject (widen to Top / leave alone), // false branch = narrow the axis. The plan's polarity rule about // whether the enclosing block inherits the narrowing when the true // branch terminates is enforced by the existing CFG successor graph , // when the true branch returns/panics, only the false state reaches // subsequent blocks and the narrowed fact propagates naturally. let narrow_false = |fact: &mut PathFact| { for axis in rejection_axes.iter() { match axis { PathRejection::DotDot => { fact.dotdot = crate::abstract_interp::Tri::No; } PathRejection::AbsoluteSlash | PathRejection::IsAbsolute => { fact.absolute = crate::abstract_interp::Tri::No; } PathRejection::None => {} } } }; // Apply assertion (positive): true branch narrows prefix_lock. let narrow_true = |fact: &mut PathFact| { if let PathAssertion::PrefixLock(ref root) = assertion { let updated = fact.clone().with_prefix_lock(root); *fact = updated; } }; // Apply rejection axes to the safe arm. The rejection classifier // (`has_negated_filepath_is_local` + `classify_path_rejection_atom`) // reports axes that hold on the FALSE branch of `cond_text` AS // WRITTEN, with one exception: the `!filepath.IsLocal(...)` Go idiom // is matched at the clause level and the classifier consumes the // leading `!` itself (safe arm remains the FALSE branch). // // For polarity-blind atoms like `!path.contains("..")` the classifier // ignores the leading `!` but AST-level negation flips the safe arm // to TRUE. Use the same `rejection_safe_is_true` resolution as the // validated-marker block above so soundness is consistent. let rejection_state: &mut SsaTaintState = if rejection_safe_is_true { &mut *true_state } else { &mut *false_state }; for v in &targets { if let Some(ref mut abs) = rejection_state.abstract_state { let mut av = abs.get(*v); narrow_false(&mut av.path); if !av.is_top() { abs.set(*v, av); } } } // Apply prefix-lock assertion to the cond-holds branch. Unlike the // rejection classifier, `classify_path_assertion` is naive about // leading negation — it just searches cond_text for a // `starts_with`-like substring. When `condition_negated` is true // (e.g. `if !target.startsWith(ROOT) { return; }`) the assertion // actually holds on the *false* CFG edge, where the sink is reached. // Flip the destination state in that case so the lock attaches to // the surviving block. let assertion_state = if negated { &mut *false_state } else { &mut *true_state }; for v in &targets { if let Some(ref mut abs) = assertion_state.abstract_state { let mut av = abs.get(*v); narrow_true(&mut av.path); if !av.is_top() { abs.set(*v, av); } } } } // ── Context-Sensitive Inline Analysis Functions ─────────────────────── /// Build a compact taint signature from the actual argument taint at a call site. fn build_arg_taint_sig( args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &SsaTaintState, ) -> ArgTaintSig { let mut sig = SmallVec::new(); // Receiver taint at position usize::MAX (sentinel) if let Some(rv) = receiver { if let Some(taint) = state.get(*rv) { sig.push((usize::MAX, taint.caps.bits())); } } // Per-argument-position taint for (i, arg_vals) in args.iter().enumerate() { let mut caps = Cap::empty(); for v in arg_vals { if let Some(taint) = state.get(*v) { caps |= taint.caps; } } if !caps.is_empty() { sig.push((i, caps.bits())); } } sig.sort_by_key(|(idx, _)| *idx); ArgTaintSig(sig) } /// Attempt context-sensitive inline analysis of a callee at a specific call site. /// /// Returns `Some(InlineResult)` if inline analysis succeeded, `None` if the /// callee is unavailable, the body is too large, or we're already at depth limit. /// /// Resolution ordering for the callee body: /// /// 1. **Intra-file** (`transfer.callee_bodies`): resolve the callee via /// [`resolve_local_func_key`] against this file's local summaries and /// look up the body by canonical [`FuncKey`]. This is the intra-file /// context-sensitive path. /// 2. **Cross-file**: if (1) misses but /// [`GlobalSummaries::resolve_callee`] resolves the call site to a /// cross-file [`FuncKey`], look up the body in /// `transfer.cross_file_bodies`. Both in-memory and indexed-scan /// bodies are usable here: the former arrives with `body_graph` /// already set (pass 1), the latter has it rehydrated from /// `node_meta` via [`rebuild_body_graph`] at load time. /// /// The cache ([`InlineCache`]) is keyed by `(FuncKey, ArgTaintSig)`. /// `FuncKey` carries the callee's namespace, so cross-file and intra-file /// entries never collide even when two files define same-leaf helpers. fn inline_analyse_callee( callee: &str, args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &SsaTaintState, transfer: &SsaTaintTransfer, cfg: &Cfg, caller_ssa: &SsaBody, call_inst: &SsaInst, ) -> Option { // Enforce k=1 depth limit if transfer.context_depth >= 1 { return None; } let cache_ref = transfer.inline_cache?; // Resolve the call site to a canonical FuncKey and the body to inline. // Step 1: intra-file. Step 2: cross-file. // // Without a resolved key we cannot inline safely, bare-name lookup could // pick the wrong same-name sibling (e.g. `A::process/1` vs `B::process/1`). let normalized = callee_leaf_name(callee); let container_raw = callee_container_hint(callee); let container_hint = if container_raw.is_empty() { None } else { Some(container_raw) }; let intra_key = transfer.callee_bodies.and_then(|_| { resolve_local_func_key( transfer.local_summaries, transfer.lang, transfer.namespace, normalized, container_hint, ) }); let intra_body = intra_key .as_ref() .and_then(|k| transfer.callee_bodies.and_then(|cb| cb.get(k))); let (callee_key, callee_body) = if let (Some(k), Some(b)) = (intra_key, intra_body) { (k, b) } else if let Some(gs) = transfer.global_summaries { // Cross-file fallback. Build a structured query mirroring // resolve_callee_full (qualifier/receiver_var/caller_container) so that // qualified-first policy is preserved. let (namespace_qualifier, receiver_var) = split_qualifier(callee); let caller_func = caller_ssa .blocks .iter() .flat_map(|b| b.phis.iter().chain(b.body.iter())) .filter_map(|inst| { cfg.node_weight(inst.cfg_node) .and_then(|info| info.ast.enclosing_func.as_deref()) }) .next() .unwrap_or(""); let caller_container_opt = caller_container_for(transfer, caller_func); let caller_container: Option<&str> = caller_container_opt.as_deref(); let receiver_type = receiver_type_prefix(transfer, *receiver); let arity_hint = Some(args.len()); let query = CalleeQuery { name: normalized, caller_lang: transfer.lang, caller_namespace: transfer.namespace, caller_container, receiver_type, namespace_qualifier, receiver_var, arity: arity_hint, }; match gs.resolve_callee(&query) { CalleeResolution::Resolved(key) => { let xfile_bodies = transfer.cross_file_bodies?; let body = xfile_bodies.get(&key)?; // Indexed-scan bodies deserialized from SQLite // arrive with `body_graph: None`, but the load path // ([`rebuild_body_graph`] in `load_all_ssa_bodies`) // synthesizes a proxy `Cfg` from `node_meta` so the taint // engine can index `cfg[inst.cfg_node]` uniformly. A // body that still has neither a real graph nor any // rehydrated metadata is structurally unusable, skip it. if body.body_graph.is_none() { tracing::debug!( callee = %normalized, "cross-file inline miss: body has no body_graph and no node_meta" ); return None; } tracing::debug!( callee = %normalized, namespace = %key.namespace, "cross-file inline hit: using GlobalSummaries.bodies_by_key" ); (key, body) } _ => return None, } } else { return None; }; // Skip very large function bodies if callee_body.ssa.blocks.len() > MAX_INLINE_BLOCKS { tracing::debug!( callee = %callee_key.name, namespace = %callee_key.namespace, blocks = callee_body.ssa.blocks.len(), max = MAX_INLINE_BLOCKS, "inline miss: body too large (budget-exceeded)" ); return None; } // Build cache key from actual argument taint let sig = build_arg_taint_sig(args, receiver, state); // Check cache (keyed by FuncKey + arg signature). The cached value // is a structural shape, re-attribute origins to the current call // site before returning so two callers with matching caps but // different origins see their own source chains. { let cache = cache_ref.borrow(); if let Some(cached) = cache.get(&(callee_key.clone(), sig.clone())) { record_engine_note(crate::engine_notes::EngineNote::InlineCacheReused); return Some(apply_cached_shape( cached, args, receiver, state, call_inst.cfg_node, )); } } // Build per-call-site seed from actual argument taint, indexed by the // callee's formal parameter position (not by name). A caller with N // arguments produces an N-entry `Vec>`; the callee's // `Param { index }` read picks up slot `index` directly via // `SsaTaintTransfer::param_seed`. Receiver taint is carried on a // separate channel (`SsaTaintTransfer::receiver_seed`) consumed by // `SelfParam`. Name-based keying is not needed here, the callee // analysis is scoped to this one call site and cannot merge with // another callee's param seed. // Cross-file note: `populate_span` lazily fills `source_span` from // the *caller's* CFG before the origin crosses into the callee. The // Param-op branch of `transfer_inst` remaps `node` to the callee's // own `cfg_node` and preserves only `source_span`, so without this // pre-fill cross-file inline would lose the caller's source line // entirely (finding emission in `ast.rs` uses `source_span` first, // falls back to indexing the caller's CFG at `node`, which is now // the callee's NodeIndex and resolves to a wrong or missing span). let populate_span = |mut o: TaintOrigin| -> TaintOrigin { if o.source_span.is_none() { if let Some(info) = cfg.node_weight(o.node) { o.source_span = Some(info.classification_span()); } } o }; let combine_taint = |arg_vals: &SmallVec<[SsaValue; 2]>| -> Option { let mut combined_caps = Cap::empty(); let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for v in arg_vals { if let Some(taint) = state.get(*v) { combined_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, populate_span(*orig)); } } } if combined_caps.is_empty() { None } else { Some(VarTaint { caps: combined_caps, origins: combined_origins, uses_summary: false, }) } }; let param_seed: Vec> = args.iter().map(combine_taint).collect(); let receiver_seed: Option = receiver.and_then(|rv| { state.get(rv).map(|taint| VarTaint { caps: taint.caps, origins: taint.origins.iter().map(|o| populate_span(*o)).collect(), uses_summary: false, }) }); // Detect callback arguments: when a call argument refers to a known function // name (resolvable to a FuncKey in the local summaries index), record the // mapping so the callee's analysis can resolve calls through the parameter. // // The binding value is a full `FuncKey` rather than a leaf string so the // child transfer can look up `callee_bodies` / `ssa_summaries` / local // summaries by canonical identity. let mut callback_bindings: HashMap = HashMap::new(); for block in &callee_body.ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { if let SsaOp::Param { index } = &inst.op { if let Some(param_name) = inst.var_name.as_ref() { if *index < args.len() { for v in &args[*index] { if let Some(arg_var_name) = caller_ssa .value_defs .get(v.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { let norm = callee_leaf_name(arg_var_name); let hint_raw = callee_container_hint(arg_var_name); let hint = if hint_raw.is_empty() { None } else { Some(hint_raw) }; if let Some(target_key) = resolve_local_func_key( transfer.local_summaries, transfer.lang, transfer.namespace, norm, hint, ) { if transfer .callee_bodies .is_some_and(|cb| cb.contains_key(&target_key)) { callback_bindings.insert(param_name.clone(), target_key); } } } } } } } } } let cb_ref = if callback_bindings.is_empty() { None } else { Some(&callback_bindings) }; let param_seed_slice: Option<&[Option]> = if param_seed.is_empty() { None } else { Some(param_seed.as_slice()) }; // INVARIANT (`inline_cache` correctness across JS/TS pass-2 rounds): // `global_seed` MUST remain `None` on the child transfer. The // per-file [`InlineCache`] is reused across all iterations of the // pass-2 convergence loop in `taint::mod::analyse_multi_body`; the // cache is keyed by `(FuncKey, ArgTaintSig)` only, so if the // inlined callee could read from a caller's `global_seed`, which // is refined each round, the same cache key could map to two // different return shapes across rounds, producing a // non-reproducible fixed point. // // Today the invariant is preserved here (global_seed: None) so // cache reuse is safe without calling // [`inline_cache_clear_epoch`]. If a future refactor threads // `global_seed` into inline analysis, it MUST also clear the // inline cache at pass-2 round boundaries. The test // `inline_analyse_callee_does_not_thread_global_seed` in // `ssa_transfer/tests.rs` fails loudly if this invariant is // broken. let child_transfer = SsaTaintTransfer { lang: transfer.lang, namespace: transfer.namespace, interner: transfer.interner, local_summaries: transfer.local_summaries, global_summaries: transfer.global_summaries, interop_edges: transfer.interop_edges, owner_body_id: BodyId(0), parent_body_id: None, global_seed: None, param_seed: param_seed_slice, receiver_seed: receiver_seed.as_ref(), const_values: Some(&callee_body.opt.const_values), type_facts: Some(&callee_body.opt.type_facts), xml_parser_config: Some(&callee_body.opt.xml_parser_config), xpath_config: Some(&callee_body.opt.xpath_config), ssa_summaries: transfer.ssa_summaries, extra_labels: transfer.extra_labels, base_aliases: Some(&callee_body.opt.alias_result), callee_bodies: None, // no recursion into further inline analysis inline_cache: None, context_depth: transfer.context_depth + 1, callback_bindings: cb_ref, points_to: Some(&callee_body.opt.points_to), dynamic_pts: None, // no inter-procedural container propagation at k>1 import_bindings: transfer.import_bindings, promisify_aliases: transfer.promisify_aliases, module_aliases: None, // callee body has its own const_values; module aliases not propagated static_map: None, // static-map seeding is caller-body local, not propagated to inlined callees auto_seed_handler_params: transfer.auto_seed_handler_params, cross_file_bodies: transfer.cross_file_bodies, // Inline analysis re-lowers the callee in its own body-local // location space; pointer facts are body-relative, so we don't // forward the caller's facts. `PointsToSummary` is the // cross-call substitute. pointer_facts: None, }; // Use the callee's own body graph for inline analysis (per-body CFGs // have body-local NodeIndex spaces, so the caller's graph is wrong). let callee_cfg = callee_body.body_graph.as_ref().unwrap_or(cfg); let (_, callee_block_states, callee_block_exit_states) = run_ssa_taint_full_with_exits(&callee_body.ssa, callee_cfg, &child_transfer); // Extract the structural return shape from return-block exit states. // `block_exit_states` lets the extractor consult each return-block // predecessor's own exit state, which is needed to recover PathFacts // that would otherwise be diluted by the return-block entry join // (see the "merged return block" pattern the Rust SSA lowering // produces for `if cond { return X } Y`). let empty_induction = HashSet::new(); let shape = extract_inline_return_taint( &callee_body.ssa, callee_cfg, &child_transfer, &callee_block_states, &callee_block_exit_states, &empty_induction, ); // Cache the structural shape under the canonical FuncKey, then // re-attribute to this call site's actual arg/receiver origins. { let mut cache = cache_ref.borrow_mut(); cache.insert((callee_key, sig), shape.clone()); } Some(apply_cached_shape( &shape, args, receiver, state, call_inst.cfg_node, )) } /// Per-NodeIndex provenance bits for the callee's Param/SelfParam ops. /// /// Multiple synthetic `Param` ops can share the same `cfg_node` (the /// lowering emits them all at the function entry; see /// [`crate::ssa::lower::reorder_external_vars`]). When that happens, an /// origin whose `node` points at the shared entry cannot be attributed to /// a single param position from node identity alone. This struct unions /// the provenance of every Param/SelfParam sitting on the same node. /// /// Over-attribution is safe: at apply time, set-bit indices beyond the /// caller's actual argument count are skipped, and set bits whose param /// contributed no taint union an empty set of caller origins. #[derive(Copy, Clone, Debug, Default)] struct CalleeParamNodeBits { /// Bit i = a `Param { index: i }` op sits on this node. params: u64, /// At least one `SelfParam` op sits on this node. receiver: bool, } /// Extract the structural shape of the return value taint from an /// inline-analyzed callee. /// /// Replays `transfer_block` on converged return-block states and classifies /// each contributing origin as either **callee-internal** (originated from a /// `Source`/`CatchParam` op inside the callee body) or **caller-seeded** /// (propagated through a `Param`/`SelfParam` op; its `node` points at the /// callee's Param NodeIndex). /// /// Caller-seeded origins are *not* baked into the cached shape, their /// identity depends on the caller's argument chain, which varies across call /// sites with matching cap signatures. Instead, the origin position is /// recorded as a bit in [`ReturnShape::param_provenance`] (or the /// `receiver_provenance` flag), and the actual caller origins are re-unioned /// in by [`apply_cached_shape`] on each cache hit. /// /// Callee-internal origins *are* baked in: they carry `source_span` from the /// callee CFG (stable across callers) and a placeholder `node` that the /// applying caller overwrites with its own call-site NodeIndex. fn extract_inline_return_taint( ssa: &SsaBody, cfg: &Cfg, transfer: &SsaTaintTransfer, block_states: &[Option], block_exit_states: &[Option], induction_vars: &HashSet, ) -> CachedInlineShape { // Collect all param SSA values to separate from derived values let param_values: HashSet = ssa .blocks .iter() .flat_map(|b| b.phis.iter().chain(b.body.iter())) .filter(|i| matches!(i.op, SsaOp::Param { .. })) .map(|i| i.value) .collect(); // Map callee Param/SelfParam NodeIndex → union of provenance bits so // we can identify caller-seeded origins by inspecting `orig.node` // (which was rewritten to the Param's cfg_node in // `transfer_inst::SsaOp::Param`). Multiple Param ops may share a // cfg_node (synthetic external-var params emitted at the entry), so // a HashMap would lose information; we // union provenance bits per node instead. let mut param_node_map: HashMap = HashMap::new(); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { match &inst.op { SsaOp::Param { index } => { let entry = param_node_map.entry(inst.cfg_node).or_default(); if *index < 64 { entry.params |= 1u64 << *index; } } SsaOp::SelfParam => { let entry = param_node_map.entry(inst.cfg_node).or_default(); entry.receiver = true; } _ => {} } } } // Callee-internal origins carry their span from the callee CFG (lazily // filled when missing) but have `node` set to a placeholder, the // applying call site fills in its own call-site NodeIndex via // `apply_cached_shape`. // // `node` is initialized to `NodeIndex::end()` (the max-u32 sentinel) so // a forgotten override is loud (indexing it later panics) rather than // silently rendering wrong spans. let placeholder_node = NodeIndex::end(); let prep_internal = |o: &TaintOrigin| -> TaintOrigin { let mut out = *o; if out.source_span.is_none() { if let Some(info) = cfg.node_weight(o.node) { out.source_span = Some(info.classification_span()); } } out.node = placeholder_node; out }; // Internal origins all share `placeholder_node`, so the standard // [`push_origin_bounded`] (which dedups by node) would collapse them // to one entry. Dedup by `(source_span, source_kind)` here and // account for truncation explicitly so the engine-note signal // matches the rest of the pipeline. let push_internal = |target: &mut SmallVec<[TaintOrigin; 2]>, orig: &TaintOrigin| { let new_orig = prep_internal(orig); if target .iter() .any(|o| o.source_span == new_orig.source_span && o.source_kind == new_orig.source_kind) { return; } if target.len() < effective_max_origins() { target.push(new_orig); } else { ORIGINS_TRUNCATION_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); record_engine_note(crate::engine_notes::EngineNote::OriginsTruncated { dropped: 1 }); } }; let mut derived_caps = Cap::empty(); let mut derived_internal: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut derived_params: u64 = 0; let mut derived_receiver: bool = false; let mut param_caps = Cap::empty(); let mut param_internal: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut param_params: u64 = 0; let mut param_receiver: bool = false; // Join of the return value's [`PathFact`] across every return block. // Seeded with `None` (no observation) and widened conservatively to // [`PathFact::top`] if any return block gives Top or the value is // unobservable. Only set to a non-Top fact when every observed return // path proves it. let mut return_path_fact_acc: Option = None; // Per-return-block PathFact observations. Each entry records one // return block's PathFact under its own predicate gate so match-arm- // sensitive callers can pick the arm-specific fact. The joined // fallback in `return_path_fact_acc` stays the default for callers // that cannot distinguish paths. let mut per_return_path_entries: SmallVec< [crate::summary::ssa_summary::PathFactReturnEntry; 2], > = SmallVec::new(); let classify_and_push = |orig: &TaintOrigin, internal: &mut SmallVec<[TaintOrigin; 2]>, provenance: &mut u64, receiver_prov: &mut bool| { match param_node_map.get(&orig.node) { Some(bits) => { *provenance |= bits.params; if bits.receiver { *receiver_prov = true; } } None => { push_internal(internal, orig); } } }; for (bid, block) in ssa.blocks.iter().enumerate() { let ret_val = match &block.terminator { Terminator::Return(rv) => rv.as_ref().copied(), _ => continue, }; if let Some(entry_state) = &block_states[bid] { let exit = transfer_block( block, cfg, ssa, transfer, entry_state.clone(), induction_vars, None, ); if let Some(rv) = ret_val { // Explicit return value: use ONLY its taint. if let Some(taint) = exit.get(rv) { if param_values.contains(&rv) { param_caps |= taint.caps; for orig in &taint.origins { classify_and_push( orig, &mut param_internal, &mut param_params, &mut param_receiver, ); } } else { derived_caps |= taint.caps; for orig in &taint.origins { classify_and_push( orig, &mut derived_internal, &mut derived_params, &mut derived_receiver, ); } } } // Collect the return value's PathFact. For return blocks // with a single predecessor (or no predecessors) the // replayed `exit` state is sufficient. For multi-predecessor // return blocks the entry state's AbstractState has already // been diluted by the join, so we additionally replay // `transfer_block` once per predecessor seeded from that // predecessor's `block_exit_states` entry, yielding a // predecessor-specific exit whose PathFact on `rv` still // carries that path's narrowing. The per-predecessor facts // are then joined to describe the callee-intrinsic // (Top-seeded) return narrowing. // // Additionally collect per-block observations // (`block_fact`, `variant_inner_fact`) so the cached // shape's `return_path_facts` lets match-arm-sensitive // callers pick one path's fact without going through the // dilutive join. let single_pred = block.preds.len() <= 1; let mut block_outer_fact: Option = None; let mut block_variant_inner: Option = None; if single_pred { if let Some(ref abs) = exit.abstract_state { let fact = abs.get(rv).path; block_outer_fact = Some(fact); block_variant_inner = detect_variant_inner_fact(rv, ssa, &exit); } } else { for pred in &block.preds { let pred_idx = pred.0 as usize; if let Some(pred_exit) = block_exit_states.get(pred_idx).and_then(|o| o.as_ref()) { let per_pred_exit = transfer_block( block, cfg, ssa, transfer, pred_exit.clone(), induction_vars, None, ); if let Some(ref abs) = per_pred_exit.abstract_state { let fact = abs.get(rv).path; block_outer_fact = Some(match block_outer_fact { None => fact, Some(prev) => prev.join(&fact), }); let inner_this = detect_variant_inner_fact(rv, ssa, &per_pred_exit); block_variant_inner = match (block_variant_inner, inner_this) { (Some(a), Some(b)) => Some(a.join(&b)), (Some(a), None) => Some(a), (None, Some(b)) => Some(b), (None, None) => None, }; } } } } // Pick this block's contribution to the joined // `return_path_fact`. When the rv is a one-arg variant // constructor (structurally: upper-camel-case leaf, 1 arg, // no receiver), the *inner* fact is what a destructuring // caller would see on the match-bound variable, the outer // variant-wrapper fact is semantically irrelevant because // `Option` / `Result` / `Box` // values are not themselves path values. Summary-level // unwrapping keeps the joined fact precise for the common // "`sanitize(...) -> Option`; `let safe = match … // { Some(s) => s, None => return }`" idiom without // teaching the CFG/SSA layer about per-arm path // narrowing. // // Additionally, a return path whose rv carries no data // (nullary variant like `None`, or a constant `null` / // `nil`) is skipped from the joined fact: a // destructuring caller cannot extract a path value // from that path, so it is semantically unreachable at // any path-typed sink. Skipping avoids diluting an // otherwise proven narrowing on the data-producing // arms. let rv_carries_no_data = is_non_data_return(rv, ssa); let block_contribution = if rv_carries_no_data { None } else { block_variant_inner .clone() .or_else(|| block_outer_fact.clone()) }; if let Some(fact) = block_contribution { return_path_fact_acc = Some(match return_path_fact_acc.clone() { None => fact, Some(prev) => prev.join(&fact), }); } // Emit a per-return-path entry when we have a fact for // this block. The predicate hash and known-true/false // come from the *entry* predicate gate (the exit // replay's predicates describe the gate under which // this return is reached). Per-path entries carry both // the outer `path_fact` and the optional // `variant_inner_fact` so match-arm-sensitive callers // can distinguish the two. if let Some(outer) = block_outer_fact { let (predicate_hash, known_true, known_false) = summary_extract::summarise_return_predicates(&exit); let entry = crate::summary::ssa_summary::PathFactReturnEntry { predicate_hash, known_true, known_false, path_fact: outer, variant_inner_fact: block_variant_inner, }; crate::summary::ssa_summary::merge_path_fact_return_paths( &mut per_return_path_entries, &[entry], ); } } else { // Return(None): implicit return / empty body. // Fall back to collecting all live values. for (val, taint) in &exit.values { if param_values.contains(val) { param_caps |= taint.caps; for orig in &taint.origins { classify_and_push( orig, &mut param_internal, &mut param_params, &mut param_receiver, ); } } else { derived_caps |= taint.caps; for orig in &taint.origins { classify_and_push( orig, &mut derived_internal, &mut derived_params, &mut derived_receiver, ); } } } } } } // Prefer derived caps; fall back to param-return caps for passthrough functions. let (final_caps, final_internal, final_params, final_receiver) = if !derived_caps.is_empty() { ( derived_caps, derived_internal, derived_params, derived_receiver, ) } else { (param_caps, param_internal, param_params, param_receiver) }; let return_path_fact = return_path_fact_acc.unwrap_or_else(crate::abstract_interp::PathFact::top); // Only keep per-return-path entries when at least one entry carries // meaningful signal (non-Top path_fact or a variant_inner_fact). A // list of all-Top entries adds bytes on disk without helping a // caller pick a path. Additionally require ≥2 distinct entries , // a single-entry list is no finer than the joined `return_path_fact`. let return_path_facts = if per_return_path_entries.len() >= 2 && per_return_path_entries .iter() .any(|e| !e.path_fact.is_top() || e.variant_inner_fact.is_some()) { per_return_path_entries } else { SmallVec::new() }; // Even when the callee produces no return taint and no param/receiver // provenance, a non-Top PathFact on the return is still meaningful // (it tells callers "this helper's return is sanitised along a path // axis"). Keep the shape when *any* of the four signals is present. if final_caps.is_empty() && final_params == 0 && !final_receiver && final_internal.is_empty() && return_path_fact.is_top() && return_path_facts.is_empty() { return CachedInlineShape(None); } CachedInlineShape(Some(ReturnShape { caps: final_caps, internal_origins: final_internal, param_provenance: final_params, receiver_provenance: final_receiver, uses_summary: true, // inline analysis is a form of summary return_path_fact, return_path_facts, })) } /// Structural predicate: does `rv` represent a "non-data" return , /// a value that cannot carry path-typed content on this return path? /// /// Recognises the common failure-arm idioms without hard-coding /// specific identifier names: /// * [`SsaOp::Const`] whose text is a recognised nullary tag /// (`None`, `null`, `nil`, `NULL`, `()`, `Err`, `Nothing`, …) , /// tree-sitter-rust emits `None` as a constant path identifier /// rather than a call; across other languages `null` / `nil` /// cover the equivalents. /// * [`SsaOp::Call`] with *zero* arguments and no receiver whose /// callee leaf segment looks like a Rust-grammar variant / /// struct constructor (ASCII upper-case start, alphanumeric / /// underscore body), covers user-defined nullary variants like /// `Nothing` or `Default` without naming them. Zero-arg /// constructors carry no attacker-controlled content by /// definition, so they are provably not a path-typed payload. /// /// Returns `false` for taint-carrying returns (calls with arguments, /// string literals that could be interpreted as paths, identifiers /// that resolve to user input, etc.); skipping them would lose /// soundness of path-safety narrowing. fn is_non_data_return(rv: SsaValue, ssa: &SsaBody) -> bool { for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { if inst.value != rv { continue; } match &inst.op { SsaOp::Const(Some(text)) => { // Match the nullary sentinels used across the // supported languages. Intentionally narrow , // any non-sentinel constant may be a path // literal that must participate in the join. let trimmed = text.trim(); return matches!( trimmed, "None" | "NONE" | "null" | "NULL" | "nil" | "undefined" | "Nothing" | "()" | "" ); } SsaOp::Call { callee, args, receiver, .. } => { if receiver.is_none() && args.is_empty() && crate::abstract_interp::path_domain::is_structural_variant_ctor(callee) { return true; } return false; } _ => return false, } } } false } /// Structural detector for "return value is a one-argument variant /// constructor" at the callee's exit. /// /// Returns `Some(inner_fact)` when: /// * `rv` is defined by [`SsaOp::Call`] in `ssa`; /// * the call's callee leaf segment is a Rust-grammar variant / type /// constructor (upper-camel-case start, alphanumeric/underscore /// tail, see /// [`crate::abstract_interp::path_domain::is_structural_variant_ctor`]); /// * the call has no receiver and exactly one positional argument /// group whose size is 1 (a single SSA value); /// /// where `inner_fact` is the [`PathFact`] on that inner argument's SSA /// value at the callee's exit state. Name-agnostic: `Some`, `Ok`, /// `Err`, `Box::new`, and any user-defined single-field enum variant /// or tuple struct constructor all participate on the same footing. pub(super) fn detect_variant_inner_fact( rv: SsaValue, ssa: &SsaBody, exit: &state::SsaTaintState, ) -> Option { for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { if inst.value != rv { continue; } let SsaOp::Call { callee, args, receiver, .. } = &inst.op else { return None; }; if receiver.is_some() { return None; } if !crate::abstract_interp::path_domain::is_structural_variant_ctor(callee) { return None; } // Single positional argument in the first group. SSA // lowering appends an implicit chained-call uses group // after the positional ones, so we cannot read positional // arity from `args.len()` alone, however the *first* // group still captures the positional arg 0's contributing // SsaValues. Join PathFacts across every value in that // group so chained inner calls (`Some(s.to_string())` // surfaces both `s` and `s.to_string`'s result) contribute // their most precise narrowing. let group = args.first()?; if group.is_empty() { return None; } let abs = exit.abstract_state.as_ref()?; let mut joined: Option = None; for &v in group { let fact = abs.get(v).path; if fact.is_top() { continue; } joined = Some(match joined { None => fact, Some(prev) => prev.join(&fact), }); } return joined; } } None } /// Re-attribute a [`CachedInlineShape`] to a specific call site. /// /// Called on every inline-analysis return (both cache miss and cache hit) so /// that `InlineResult.return_taint.origins` always reflect the *current* /// caller's argument chain. See the module-level note on cache-vs-origin /// attribution. /// /// # Attribution rules /// /// * **Internal origins** (recorded by the callee's `Source` ops): cloned /// with `node` overwritten to `call_site_node`; `source_span` preserved /// from the callee CFG. /// * **Param-provenance bits**: for each set bit `i`, union caller's arg /// origins at position `i` into the result. Receiver provenance does the /// same for `receiver`. /// * **Truncation**: the combined origin set is capped at /// [`effective_max_origins`]; when any origins are dropped, /// [`EngineNote::OriginsTruncated`] is recorded via /// [`record_engine_note`]. fn apply_cached_shape( shape: &CachedInlineShape, args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &SsaTaintState, call_site_node: NodeIndex, ) -> InlineResult { let Some(ret) = shape.0.as_ref() else { return InlineResult { return_taint: None, return_path_fact: crate::abstract_interp::PathFact::top(), return_path_facts: SmallVec::new(), }; }; let cap = effective_max_origins(); let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut dropped: u32 = 0; let push = |origins: &mut SmallVec<[TaintOrigin; 2]>, dropped: &mut u32, new_orig: TaintOrigin| { if origins.iter().any(|o| { o.node == new_orig.node && o.source_span == new_orig.source_span && o.source_kind == new_orig.source_kind }) { return; } if origins.len() < cap { origins.push(new_orig); } else { *dropped += 1; } }; // 1. Callee-internal origins: rewrite `node` to the current call site. for orig in &ret.internal_origins { let mut o = *orig; o.node = call_site_node; push(&mut origins, &mut dropped, o); } // 2. Caller-attributed origins from param-provenance bits. let mut bits = ret.param_provenance; while bits != 0 { let idx = bits.trailing_zeros() as usize; bits &= bits - 1; if let Some(arg_vals) = args.get(idx) { for v in arg_vals { if let Some(taint) = state.get(*v) { for orig in &taint.origins { push(&mut origins, &mut dropped, *orig); } } } } } // 3. Receiver-attributed origins (SelfParam provenance). if ret.receiver_provenance { if let Some(rv) = receiver { if let Some(taint) = state.get(*rv) { for orig in &taint.origins { push(&mut origins, &mut dropped, *orig); } } } } if dropped > 0 { // Mirror the counter increment the shared helper does, so the // global `origins_truncation_count()` observability hook covers // this site too. ORIGINS_TRUNCATION_COUNT.fetch_add(dropped as usize, std::sync::atomic::Ordering::Relaxed); record_engine_note(crate::engine_notes::EngineNote::OriginsTruncated { dropped }); } // If the return taint is empty (no caps, no origins) we still need to // surface the PathFact contribution; represent "no return taint" with // `None` to preserve the existing InlineResult invariant while letting // callers apply the path fact regardless. let return_taint = if ret.caps.is_empty() && origins.is_empty() { None } else { Some(VarTaint { caps: ret.caps, origins, uses_summary: ret.uses_summary, }) }; InlineResult { return_taint, return_path_fact: ret.return_path_fact.clone(), return_path_facts: ret.return_path_facts.clone(), } } /// Apply a callee's [`FieldPointsToSummary`] field writes at a caller /// call site. /// /// For each `(param_idx, field_names)` in /// [`FieldPointsToSummary::param_field_writes`], substitute the callee /// `Param(callee, i)` with the caller's `pt(arg_i)` and union the /// argument's taint into each `(loc, field_id)` cell on the caller's /// `field_taint`. /// /// * `param_idx == u32::MAX` is the receiver sentinel, resolve via /// the call's `receiver` SsaValue rather than positional args. /// * `field_name == ""` translates to [`FieldId::ELEM`] without /// going through the caller's interner, matches the wire-format /// convention from /// [`crate::pointer::extract_field_points_to`]. /// * Any other field name is *looked up* (read-only) in the caller's /// [`FieldInterner`]. Names the caller never referenced are skipped /// , no FieldProj read in the caller could observe such a cell. /// * `pt(arg)` saturated to `{Top}` is conservatively skipped (matches /// the W1/W2 hooks' over-approximation policy). /// /// Strict-additive: when [`FieldPointsToSummary::overflow`] is `true` /// the helper does nothing, the conservative interpretation is "every /// param touches every field on every other param", which would /// require a body-wide field cell flood the lattice cannot /// efficiently represent. The bit is informational; consumers /// already fall back to today's pre-W3 behaviour. fn apply_field_points_to_writes( summary: &crate::summary::points_to::FieldPointsToSummary, args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &mut SsaTaintState, ssa: &SsaBody, pf: &crate::pointer::PointsToFacts, interner: &crate::state::symbol::SymbolInterner, ) { if summary.is_empty() || summary.overflow { return; } for (param_idx, field_names) in &summary.param_field_writes { // Resolve the caller-side SSA values for the arg position. let caller_vals: SmallVec<[SsaValue; 2]> = if *param_idx == u32::MAX { match receiver { Some(rv) => smallvec::smallvec![*rv], None => continue, } } else { let idx = *param_idx as usize; match args.get(idx) { Some(group) if !group.is_empty() => group.clone(), _ => continue, } }; // Compute combined arg taint from every contributing SSA value. let mut combined_caps = crate::labels::Cap::empty(); let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut combined_summary = false; // W4: combine validation channels across the caller-side SSA // values for this argument position. Vacuous AND for empty // values is `true`, but `caller_vals` here is non-empty (we // filtered above), so the AND fold is meaningful. let mut combined_must = true; let mut combined_may = false; for &v in &caller_vals { if let Some(t) = state.get(v) { combined_caps |= t.caps; combined_summary |= t.uses_summary; for o in &t.origins { push_origin_bounded(&mut combined_origins, *o); } } let (am, av) = ssa_value_validated_bits(v, ssa, interner, state); combined_must &= am; combined_may |= av; } if combined_caps.is_empty() { continue; } let cell_taint = VarTaint { caps: combined_caps, origins: combined_origins, uses_summary: combined_summary, }; // For each field name, intern through the caller's FieldInterner // (read-only) and apply to every caller pt(arg_v) loc. for name in field_names { let fid = if name == "" { crate::ssa::ir::FieldId::ELEM } else { match ssa.field_interner.lookup(name) { Some(id) => id, None => continue, } }; for &v in &caller_vals { let pt = pf.pt(v); if pt.is_empty() || pt.is_top() { continue; } for loc in pt.iter() { let key = crate::taint::ssa_transfer::state::FieldTaintKey { loc, field: fid }; state.add_field(key, cell_taint.clone(), combined_must, combined_may); } } } } } /// W4: container ELEM read counterpart. When the call is a /// recognised container read, walks `pt(receiver)`'s `(loc, ELEM)` /// cells and: /// /// * Unions their `taint.caps` into the call result's value taint /// (additive, preserves any caps already set by upstream /// `try_container_propagation` / heap analysis). /// * AND-intersects the cells' `validated_must`; OR-unions /// `validated_may`; seeds the call result's symbol-level bits /// accordingly. /// /// Strict-additive: skips when no cell exists, when `pt(receiver)` /// saturates / is empty, or when no contributing cell is found. fn apply_container_elem_read_w4( inst: &SsaInst, ssa: &SsaBody, transfer: &SsaTaintTransfer, state: &mut SsaTaintState, ) { let SsaOp::Call { callee, receiver, .. } = &inst.op else { return; }; let (Some(pf), Some(rcv)) = (transfer.pointer_facts, *receiver) else { return; }; if !crate::pointer::is_container_read_callee_pub(callee) { return; } let pt = pf.pt(rcv); if pt.is_empty() || pt.is_top() { return; } let mut elem_caps = Cap::empty(); let mut elem_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut elem_summary = false; let mut cell_must_all: Option = None; let mut cell_may_any = false; for loc in pt.iter() { let key = crate::taint::ssa_transfer::state::FieldTaintKey { loc, field: crate::ssa::ir::FieldId::ELEM, }; if let Some(cell) = state.get_field(key) { elem_caps |= cell.taint.caps; elem_summary |= cell.taint.uses_summary; for o in &cell.taint.origins { push_origin_bounded(&mut elem_origins, *o); } cell_must_all = Some(match cell_must_all { Some(prev) => prev && cell.validated_must, None => cell.validated_must, }); cell_may_any |= cell.validated_may; } } if cell_must_all.is_none() { return; } if !elem_caps.is_empty() { let cur = state.get(inst.value).cloned(); let merged = match cur { Some(mut acc) => { acc.caps |= elem_caps; acc.uses_summary |= elem_summary; for o in &elem_origins { push_origin_bounded(&mut acc.origins, *o); } acc } None => VarTaint { caps: elem_caps, origins: elem_origins, uses_summary: elem_summary, }, }; state.set(inst.value, merged); } if let Some(name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { if cell_must_all == Some(true) { state.validated_must.insert(sym); } if cell_may_any { state.validated_may.insert(sym); } } } } /// W4: look up the symbol-keyed `validated_must` / `validated_may` /// flags for an SSA value via its `var_name`. Returns `(false, /// false)` when the value has no name, when the name isn't interned, /// or when the symbol bits aren't set. fn ssa_value_validated_bits( v: SsaValue, ssa: &SsaBody, interner: &crate::state::symbol::SymbolInterner, state: &SsaTaintState, ) -> (bool, bool) { let name = match ssa .value_defs .get(v.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { Some(n) => n, None => return (false, false), }; match interner.get(name) { Some(sym) => ( state.validated_must.contains(sym), state.validated_may.contains(sym), ), None => (false, false), } } /// Transfer a single SSA instruction. pub(super) fn transfer_inst( inst: &SsaInst, cfg: &Cfg, ssa: &SsaBody, transfer: &SsaTaintTransfer, state: &mut SsaTaintState, ) { let info = &cfg[inst.cfg_node]; // Cross-file abstract return fact from callee resolution. // Set inside the Call arm, applied after transfer_abstract to override Top. let mut callee_return_abstract: Option = None; match &inst.op { SsaOp::Source => { // Apply source labels from NodeInfo let mut source_caps = Cap::empty(); for lbl in &info.taint.labels { if let DataLabel::Source(bits) = lbl { source_caps |= *bits; } } if !source_caps.is_empty() { let callee = info.call.callee.as_deref().unwrap_or(""); let source_kind = crate::labels::infer_source_kind(source_caps, callee); let origin = TaintOrigin { node: inst.cfg_node, source_kind, source_span: None, }; state.set( inst.value, VarTaint { caps: source_caps, origins: SmallVec::from_elem(origin, 1), uses_summary: false, }, ); } } SsaOp::CatchParam => { let origin = TaintOrigin { node: inst.cfg_node, source_kind: SourceKind::CaughtException, source_span: None, }; state.set( inst.value, VarTaint { caps: Cap::all(), origins: SmallVec::from_elem(origin, 1), uses_summary: false, }, ); } SsaOp::Call { callee, args, receiver, .. } => { // Excluded callees (e.g. router.get, app.post) should not propagate // taint through their return value, they are framework scaffolding, // not data-flow operations. if crate::labels::is_excluded(transfer.lang.as_str(), callee.as_bytes()) { return; } // Chain-wrapper sanitiser detection. Computed up-front so // both the container-element-write hook and the outer- // callee taint suppression block below can consult it. // Walks `info.arg_callees` for the chain shape // `outer(... wrapper() ...)`, collecting any // sanitiser caps the wrapper's summary or label exposes. // The set is empty when there is no chain wrapper or when // none of the wrappers expose sanitisation. // // Argument attribution: when `find_classifiable_inner_call` // overrode the callee to an inner Source, the source can be // either (a) a direct argument call (`outer(escape(x), // source())`) or (b) nested inside one wrapper // (`outer(escape(source(x)))`). Crediting any wrapper's // sanitizer caps when the source sits in a different argument // position would suppress real taint flow. // // * `source_arg_pos = Some(N)` — the source call is the // immediate callee of arg N (`arg_callees[N] == callee`). // No other-arg wrapper can sanitize it. Credit nothing. // * `source_arg_pos = None` — the source is nested inside // some arg's wrapper. Credit only when exactly one arg // has a sanitizing wrapper, since that one must be the // parent of the nested source. Multiple sanitizing // wrappers across different positions is ambiguous; stay // conservative and credit nothing. let caller_func_for_chain = info.ast.enclosing_func.as_deref().unwrap_or(""); let mut chain_wrapper_sanitizer_caps = Cap::empty(); if !info.arg_callees.is_empty() { let source_arg_pos = info .arg_callees .iter() .position(|c| c.as_deref() == Some(callee.as_str())); let mut per_arg_sanitizer_caps: SmallVec<[Cap; 4]> = SmallVec::new(); for (idx, maybe_callee) in info.arg_callees.iter().enumerate() { if Some(idx) == source_arg_pos { continue; } let Some(wrap_callee) = maybe_callee else { continue; }; if Some(wrap_callee.as_str()) == info.call.outer_callee.as_deref() { continue; } let mut caps_here = Cap::empty(); if let Some(resolved) = resolve_callee_hinted( transfer, wrap_callee, caller_func_for_chain, info.call.call_ordinal, None, ) { caps_here |= resolved.sanitizer_caps; } else { let labels = crate::labels::classify_all( transfer.lang.as_str(), wrap_callee, transfer.extra_labels, ); for lbl in &labels { if let DataLabel::Sanitizer(bits) = lbl { caps_here |= *bits; } } } if !caps_here.is_empty() { per_arg_sanitizer_caps.push(caps_here); } } if source_arg_pos.is_none() && per_arg_sanitizer_caps.len() == 1 { chain_wrapper_sanitizer_caps = per_arg_sanitizer_caps[0]; } } // Container element-write hook. Runs before other Call-arm // processing so `try_container_propagation`'s early-return // can't bypass us. Writes only into `(loc, ELEM)` cells on // `field_taint`, strictly additive. // // Each pushed value's `validated_must`/`validated_may` flow // through: cell `must = AND` over args (every writer must be // must-validated), `may = OR` over args. Anonymous SSA temps // contribute `false/false` and break the `must` invariant. // // Two callee shapes: // * Method-style write (`receiver.push(val)`) — `receiver` // channel resolves the container, value args start at // position 0. // * Go `append` builtin (or chain shape with // `outer_callee == "append"`) — no receiver channel, // `args[0]` is the slice itself, value args start at // position 1. if let Some(pf) = transfer.pointer_facts { let go_append_chain = transfer.lang == Lang::Go && receiver.is_none() && (callee == "append" || info.call.outer_callee.as_deref() == Some("append")); // For Go append, args[0] is the input slice whose // points-to set may be empty when the slice was just // initialised with a composite literal (`cmds := // []string{}`). The call result (inst.value) carries // the fresh allocation site that pointer analysis // attaches to every Call op, and downstream uses of // the slice flow through that result, so it is the // authoritative container identity. Fall back to // args[0] when the result has no pt set yet. let resolved_recv: Option = if let Some(rcv) = *receiver { Some(rcv) } else if go_append_chain { let result_v = inst.value; let result_pt = pf.pt(result_v); if !result_pt.is_empty() && !result_pt.is_top() { Some(result_v) } else { args.first().and_then(|a| a.first().copied()) } } else { None }; let value_arg_start = if go_append_chain { 1 } else { 0 }; let write_callee_match = if go_append_chain { true } else { crate::pointer::is_container_write_callee(callee) }; if let (Some(rcv), true) = (resolved_recv, write_callee_match) { let pt = pf.pt(rcv); if !pt.is_empty() && !pt.is_top() { let mut elem_caps = Cap::empty(); let mut elem_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut elem_summary = false; let mut elem_must_all = true; // AND over args (vacuously true for empty args) let mut elem_may_any = false; // OR over args let mut saw_any_arg = false; for arg_group in args.iter().skip(value_arg_start) { for &arg_v in arg_group { saw_any_arg = true; if let Some(t) = state.get(arg_v) { elem_caps |= t.caps; elem_summary |= t.uses_summary; for o in &t.origins { push_origin_bounded(&mut elem_origins, *o); } } let (am, av) = ssa_value_validated_bits(arg_v, ssa, transfer.interner, state); elem_must_all &= am; elem_may_any |= av; } } // Chain-shape Go append: the inner Source label // fires on this same call instruction, so its // caps are not yet on any positional arg's SSA // value at this point. Pull them in directly // from the source labels so the W4 cell sees // the real source caps; without this the cell // is empty for the chain shape and the index- // read taint flow appears clean for the wrong // reason. if go_append_chain { for lbl in &info.taint.labels { if let DataLabel::Source(bits) = lbl { elem_caps |= *bits; saw_any_arg = true; } } // A chain-shape sanitising wrapper around the // source counts as the validation that the // ELEM cell needs. Each entry in // `info.arg_callees` whose summary or label // exposes non-empty `sanitizer_caps` // contributes to validation, the cell's // must/may bits flip on so the index-read // counterpart sees the value as validated. if !chain_wrapper_sanitizer_caps.is_empty() { elem_must_all = true; elem_may_any = true; } } // Vacuous AND: a zero-arg container write supplies // no validation source, so coerce must to false. if !saw_any_arg { elem_must_all = false; } if !elem_caps.is_empty() { let cell = VarTaint { caps: elem_caps, origins: elem_origins, uses_summary: elem_summary, }; for loc in pt.iter() { let key = crate::taint::ssa_transfer::state::FieldTaintKey { loc, field: crate::ssa::ir::FieldId::ELEM, }; state.add_field(key, cell.clone(), elem_must_all, elem_may_any); } } } } } // Check for source labels first let mut return_bits = Cap::empty(); let mut return_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); // Network-fetch source suppression: a Call that carries BOTH // a Source label and a Sink(SSRF) label is a network-fetch // primitive (e.g. PHP `file_get_contents`, `curl_exec`, // Python `requests.get`, JS `axios.get`). When invoked with // a hardcoded URL whose prefix passes `is_string_safe_for_ssrf` // (a fully-formed `scheme://host/path`), the developer has // explicitly bound the endpoint at compile time, the SSRF // sink suppression already trusts this prefix-lock to // silence the SSRF concern, and the same trust applies on // the source side: the response body is developer-chosen, // not attacker-chosen. Suppressing the Source label here // mirrors the existing sink suppression so a single // hardcoded-URL fetch does not create a phantom // `taint-unsanitised-flow` finding when its result is // echoed/printed later in the same scope. let is_network_fetch_source = info .taint .labels .iter() .any(|l| matches!(l, DataLabel::Source(_))) && info .taint .labels .iter() .any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SSRF))); // Detect a hardcoded URL via three channels: // 1. `info.string_prefix`, populated by the JS/TS template- // literal extractor and inline call shapes. // 2. AbstractState `StringFact` on the first positional arg , // populated by const propagation for plain string literals. // 3. As a last resort when `info.call.first_arg_text` is // populated with a hardcoded literal, extracted at CFG // construction time for network-fetch primitive callees. let url_prefix_safe_via_node = info .string_prefix .as_deref() .map(|p| { let synthetic = crate::abstract_interp::StringFact::from_prefix(p); is_string_safe_for_ssrf(&synthetic) }) .unwrap_or(false); let url_prefix_safe_via_abs = state.abstract_state.as_ref().is_some_and(|abs| { args.first().is_some_and(|first_arg| { !first_arg.is_empty() && first_arg .iter() .all(|v| is_string_safe_for_ssrf(&abs.get(*v).string)) }) }); let url_prefix_safe_via_first_arg_text = is_network_fetch_source && info .call .arg_string_literals .first() .and_then(|v| v.as_deref()) .map(|s| { let synthetic = crate::abstract_interp::StringFact::from_prefix(s); is_string_safe_for_ssrf(&synthetic) }) .unwrap_or(false); let url_is_hardcoded_safe = is_network_fetch_source && (url_prefix_safe_via_node || url_prefix_safe_via_abs || url_prefix_safe_via_first_arg_text); for lbl in &info.taint.labels { if let DataLabel::Source(bits) = lbl { if url_is_hardcoded_safe { // Skip Source propagation, see network-fetch // source suppression rationale above. continue; } return_bits |= *bits; let callee_str = info.call.callee.as_deref().unwrap_or(""); let source_kind = crate::labels::infer_source_kind(*bits, callee_str); let origin = TaintOrigin { node: inst.cfg_node, source_kind, source_span: None, }; if !return_origins.iter().any(|o| o.node == inst.cfg_node) { return_origins.push(origin); } } } // Output-parameter source tainting (C/C++): for known APIs that // write to a buffer argument (fgets, getline, recv, etc.), taint // the argument SSA values at the registered output positions. if !return_bits.is_empty() { if let Some(positions) = crate::labels::output_param_source_positions(transfer.lang.as_str(), callee) { for &pos in positions { if let Some(arg_group) = args.get(pos) { for &arg_v in arg_group { state.set( arg_v, VarTaint { caps: return_bits, origins: return_origins.clone(), uses_summary: false, }, ); } } } } } // Check for sanitizer labels let mut sanitizer_bits = Cap::empty(); for lbl in &info.taint.labels { if let DataLabel::Sanitizer(bits) = lbl { sanitizer_bits |= *bits; } } // Call-site replace sanitizer detection. Recognises // `s.replace*(pat, rep)` / `strings.ReplaceAll(s, pat, rep)` / // `str_replace($pat, $rep, $s)` shapes whose pattern is a // concrete shell/HTML/SQL escape literal and treats the call // as a sanitizer for the corresponding caps. Mirrors the // semantics that label-rule sanitizers already provide. if let Some(extra) = crate::symex::strings::detect_call_site_replace_sanitizer( callee, transfer.lang, &info.call.arg_string_literals, ) { sanitizer_bits |= extra; } // Resolve callee summary, always attempt, even when explicit // labels are present. Labels take precedence for source caps, but // summary propagation and sanitizer behaviour must still apply // (matches legacy `apply_call()` semantics). let caller_func = info.ast.enclosing_func.as_deref().unwrap_or(""); let has_source_label = info .taint .labels .iter() .any(|l| matches!(l, DataLabel::Source(_))); let mut resolved_callee = false; // Context-sensitive inline analysis: attempt before summary fallback. // Only for intra-file calls when context sensitivity is enabled. // Only claims resolution when the inline result produces non-empty // return taint, otherwise falls through to summary for cases like // receiver-only method calls where summary propagation is needed. if transfer.inline_cache.is_some() && transfer.context_depth < 1 { if let Some(result) = inline_analyse_callee(callee, args, receiver, state, transfer, cfg, ssa, inst) { if let Some(ref ret) = result.return_taint { resolved_callee = true; return_bits |= ret.caps; for orig in &ret.origins { push_origin_bounded(&mut return_origins, *orig); } } // PathFact propagation from inline analysis: when the // callee's body narrowed its return value's [`PathFact`] // (e.g. a `sanitize(s) -> Option` helper whose // `Some` arm is gated by `s.contains("..")` rejection), // meet that fact into the call-result's abstract state // so downstream FILE_IO sinks see the sanitised axis. // // Uses meet rather than set so any caller-side narrowing // from constant propagation or local transfer wins over // callee-derived Top axes. if !result.return_path_fact.is_top() { if let Some(ref mut abs) = state.abstract_state { let mut av = abs.get(inst.value); av.path = ::meet( &av.path, &result.return_path_fact, ); if !av.is_top() { abs.set(inst.value, av); } } } } } // Inter-procedural container fields: populated from resolve_callee // even when inline analysis already handled return taint, since inline // analysis doesn't model cross-parameter container stores. let mut resolved_container_to_return: Vec = Vec::new(); let mut resolved_container_store: Vec<(usize, usize)> = Vec::new(); // Captured alongside container fields because the // callee_summary gets moved when the main taint branch takes it // below. We only need the points_to summary itself, clone it // out before the move so application can still read it. let mut resolved_points_to: crate::summary::points_to::PointsToSummary = crate::summary::points_to::PointsToSummary::empty(); // Resolve callee summary (used for both taint propagation and container fields) // Pass arity (positional-arg count) so same-name/different-arity // overloads are not conflated during cross-file resolution. // // Use `info.call.arg_uses.len()` rather than `args.len()`: `args` // may include an extra "implicit" trailing group built by SSA // lowering to surface chained-call taint (see `build_call_args` in // `ssa/lower.rs`), which inflates `args.len()` beyond the real // positional arity. The CFG's `arg_uses` is the authoritative // positional-arg list. // // Fallback: certain TypeScript call shapes — notably calls // inside template-string substitutions (`${fn(arg)}`) — get // their `arg_uses` dropped by CFG lowering even though the // call's positional `args` are intact. When that happens // the strict `Some(0)` arity hint silently fails to match // any callee that takes ≥1 arg, swallowing summary // resolution. Detect the asymmetry and pass `None` so // `resolve_local_func_key_query`'s unique-name fallback // can still pick up the lone candidate. let arity_hint = if info.call.arg_uses.is_empty() && !args.is_empty() { None } else { Some(info.call.arg_uses.len()) }; // Type-aware resolution: when the SSA receiver value has a // known abstract type (HttpClient, URL, …), feed that into // the resolver as an authoritative `receiver_type`. This // causes qualified-first resolution to prefer // `{Type}::{name}` over any same-leaf collision in the // global summary table. let callee_summary = resolve_callee_typed( transfer, callee, caller_func, info.call.call_ordinal, arity_hint, *receiver, ); // Capture container fields and return type regardless of whether // inline analysis handled the call if let Some(ref resolved) = callee_summary { resolved_container_to_return = resolved.param_container_to_return.clone(); resolved_container_store = resolved.param_to_container_store.clone(); resolved_points_to = resolved.points_to.clone(); // Cross-call field-points-to application: walk the // callee's `field_points_to.param_field_writes`; for // each `(param_idx, field_names)` substitute the // callee's param with the caller's `pt(arg_i)` and // union the caller's argument taint into each // `(loc, field_id)` cell on `field_taint`. // // Receiver flow uses sentinel `param_idx == u32::MAX`. // Field names are looked up in the *caller's* // `field_interner`, names the caller never referenced // are skipped. The `""` sentinel translates to // [`FieldId::ELEM`]. if let Some(pf) = transfer.pointer_facts { apply_field_points_to_writes( &resolved.field_points_to, args, receiver, state, ssa, pf, transfer.interner, ); } // Capture abstract return for post-transfer injection callee_return_abstract = resolved.return_abstract.clone(); // Apply per-parameter abstract transfers. // // For each (param_idx, transfer) in the callee's summary, // apply the transfer to the caller's current abstract value // of the argument at that position. Join the per-parameter // contributions (disjunctive: any transfer's output is a // valid over-approximation of the return), then `meet` with // the baseline `return_abstract` (both facts must hold). // // Runs regardless of whether inline analysis already // resolved the call: inline re-analyses taint only; abstract // values are not threaded into or out of the callee body on // that path, so abstract transfer remains the summary-level // channel for propagating intervals / string prefixes across // a cross-file call. if !resolved.abstract_transfer.is_empty() { let mut synthesised: Option = None; for (idx, transfer) in &resolved.abstract_transfer { if transfer.is_top() { continue; } let arg_abs = if let Some(group) = args.get(*idx) { let mut joined: Option = None; for &v in group { let av = state .abstract_state .as_ref() .map(|a| a.get(v)) .unwrap_or_else(crate::abstract_interp::AbstractValue::top); joined = Some(match joined { None => av, Some(prev) => prev.join(&av), }); } joined.unwrap_or_else(crate::abstract_interp::AbstractValue::top) } else { crate::abstract_interp::AbstractValue::top() }; let applied = transfer.apply(&arg_abs); if applied.is_top() { continue; } synthesised = Some(match synthesised { None => applied, Some(prev) => prev.join(&applied), }); } if let Some(synth) = synthesised { callee_return_abstract = match callee_return_abstract.take() { Some(base) => { let m = base.meet(&synth); // Fall back to whichever side is non-bottom // (meet can contradict when the callee's // baseline and the caller-side transfer // describe disjoint facts, rare, but sound // to widen back to the less restrictive). if m.is_bottom() { Some(synth.join(&base)) } else { Some(m) } } None => Some(synth), }; } } // Cross-file type propagation: if the callee has a known return // type (from SSA summary), inject it into the caller's path env // so downstream type-qualified resolution can use it. if let Some(ref rtype) = resolved.return_type { if let Some(ref mut env) = state.path_env { use crate::constraint::domain::{TypeSet, ValueFact}; let mut fact = ValueFact::top(); fact.types = TypeSet::singleton(rtype); env.refine(inst.value, &fact); } } // Validated-flow propagation through callee summaries. // // Runs regardless of whether inline analysis already // resolved the call: inline analysis re-runs the // callee's taint with caller-side seeds but does not // surface the callee's symbol-keyed // `validated_must` / `validated_may` state into the // caller, so the summary-level signal is the only // channel for propagating helper-validation across // a function boundary. // // When the callee's body validates a parameter on // every return path that carries the param's caps // (regex allowlist, type check, validation call, …), // a normal-returning call site is the validating arm // by construction: control could not reach the // post-call instruction unless the helper's // predicate(s) accepted the argument. Mark each // tainted argument's `var_name` and the call's // result `var_name` in the caller's // `validated_must` / `validated_may` sets so // subsequent sinks observe `all_validated = true`, // the same way an inline `if (!regex.test(x)) throw` // validates the surviving branch. Closes the // helper-validator propagation gap surfaced by // CVE-2026-25544 (Payload `sanitizeValue` SQLi). if !resolved.validated_params_to_return.is_empty() { propagate_validated_params_to_return( inst, args, ssa, transfer.interner, state, &resolved.validated_params_to_return, ); } } // When find_classifiable_inner_call overrides the callee (e.g. // `storeInto(req.query.input, items)` → callee="req.query.input"), // the outer_callee preserves the original. Resolve it too for // container fields that depend on the wrapping function's summary. if resolved_container_store.is_empty() { if let Some(ref oc) = info.call.outer_callee { if let Some(ref resolved) = resolve_callee_hinted( transfer, oc, caller_func, info.call.call_ordinal, arity_hint, ) { if resolved_container_to_return.is_empty() { resolved_container_to_return = resolved.param_container_to_return.clone(); } resolved_container_store = resolved.param_to_container_store.clone(); } } } if !resolved_callee && let Some(resolved) = callee_summary { resolved_callee = true; // Source caps from summary: only when no explicit Source label if !has_source_label && !resolved.source_caps.is_empty() { return_bits |= resolved.source_caps; let source_kind = crate::labels::infer_source_kind(resolved.source_caps, callee); let origin = TaintOrigin { node: inst.cfg_node, source_kind, source_span: None, }; if !return_origins.iter().any(|o| o.node == inst.cfg_node) { return_origins.push(origin); } } // Per-parameter predicate-consistent transforms. // // When the summary carries `param_return_paths`, apply a // per-parameter effective sanitizer narrowed by the caller's // current predicate state. This recovers callee-internal // path splits that the coarse `resolved.sanitizer_caps` // union would erase (`if validated { return sanitised } // else { return raw }` can be resolved to "strip all // sanitised bits" when the caller validated the input). // // Falls back to the aggregate path when: // * `param_return_paths` is empty (single-path callee or // non-SSA resolution); // * the parameter has no entry (no per-path decomposition // was recorded for this param); // * no paths are predicate-compatible (conservative: keep // the aggregate sanitizer bits). let mut aggregate_sanitizer_applied = false; // Propagation: ALWAYS apply if resolved.propagates_taint { // Only use positional filtering when original arg_uses is populated let effective_params = if info.call.arg_uses.is_empty() { &[] as &[usize] } else { &resolved.propagating_params }; if !resolved.param_return_paths.is_empty() && !effective_params.is_empty() { // Per-parameter application: each propagating param // contributes taint narrowed by its own per-path // sanitizer. Origins are still aggregated across // params, they name source anchors, not transforms. let mut any_origin_added = false; for ¶m_idx in effective_params { let arg_caps_origins = collect_args_taint(args, receiver, state, &[param_idx]); let arg_caps = arg_caps_origins.0; let arg_origins = arg_caps_origins.1; let param_sanitizer = effective_param_sanitizer(&resolved, param_idx, state); return_bits |= arg_caps & !param_sanitizer; for orig in &arg_origins { if push_origin_bounded(&mut return_origins, *orig) { any_origin_added = true; } } } aggregate_sanitizer_applied = true; // Sentinel reference to silence unused on cold paths. let _ = any_origin_added; } else { let (prop_caps, prop_origins) = collect_args_taint(args, receiver, state, effective_params); return_bits |= prop_caps; for orig in &prop_origins { push_origin_bounded(&mut return_origins, *orig); } } } // Summary sanitizer: apply the aggregate only when per-param // path narrowing above did not already strip per-argument. if !aggregate_sanitizer_applied { return_bits &= !resolved.sanitizer_caps; } // Validated-flow propagation through callee summaries. // // When the callee's body validates a parameter on every // return path (regex allowlist, type check, validation // call, etc. — see // [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`]), // a normal-returning call site is the validating arm by // construction: control could not reach the post-call // instruction unless the helper's predicate(s) accepted // the argument. Mark each tainted argument's `var_name` // and the call's result `var_name` in the caller's // `validated_must` / `validated_may` sets so subsequent // sinks observe `all_validated = true`, the same way an // inline `if (!regex.test(x)) throw` validates the // surviving branch. Closes the helper-validator // propagation gap surfaced by CVE-2026-25544 (Payload // `sanitizeValue` SQLi). } // Type-qualified receiver resolution: when normal callee resolution // failed and explicit labels are absent, try constructing a type-qualified // callee name from the receiver's inferred type (e.g., client.send → // HttpClient.send when client is typed as HttpClient). if !resolved_callee && info.taint.labels.is_empty() { if let Some(rv) = receiver { if transfer.type_facts.is_some() || state.path_env.is_some() { let tq_labels = resolve_type_qualified_labels( callee, *rv, transfer.type_facts, state.path_env.as_ref(), transfer.lang, transfer.extra_labels, Some(ssa), ); for lbl in &tq_labels { match lbl { DataLabel::Source(bits) if !has_source_label => { return_bits |= *bits; let source_kind = crate::labels::infer_source_kind(*bits, callee); let origin = TaintOrigin { node: inst.cfg_node, source_kind, source_span: None, }; if !return_origins.iter().any(|o| o.node == inst.cfg_node) { return_origins.push(origin); } } DataLabel::Sanitizer(bits) => { sanitizer_bits |= *bits; } DataLabel::Sink(_) => { // Sink detection is handled separately in // collect_block_events via resolve_sink_caps_typed } _ => {} } } } } } // Apply explicit sanitizer labels. When a callee summary has // already resolved the call, `return_bits` reflects the summary's // precise propagation + sanitization; re-unioning `use_caps` here // would restore taint the summary already stripped and clobber // any cross-procedural sanitization (e.g. an interprocedural // path-traversal sanitizer whose caller also carries a label-only // sanitizer matching on callee name). Only collect `use_caps` // when no summary applied, that is the original pure-label // sanitizer-wrapper code path. if !sanitizer_bits.is_empty() { if !resolved_callee { let (use_caps, use_origins) = collect_args_taint(args, receiver, state, &[]); return_bits |= use_caps; for orig in &use_origins { push_origin_bounded(&mut return_origins, *orig); } } return_bits &= !sanitizer_bits; // UNAUTHORIZED_ID models a caller-supplied id that must // clear an ownership/membership guard. Sanitizers for // this cap don't pass inputs through a return value , // the ownership proof is the side effect. Strip the bit // from each argument's SSA value so downstream uses see // it cleared. Isolated to UNAUTHORIZED_ID; other caps // keep return-only sanitizer semantics. if sanitizer_bits.contains(Cap::UNAUTHORIZED_ID) { strip_cap_from_call_args(args, receiver, state, Cap::UNAUTHORIZED_ID); } } else if !resolved_callee { // Container operation propagation (push/pop/get/set/etc.) // Try the primary callee first, then fall back to outer_callee // (set when find_classifiable_inner_call overrides the callee, // e.g. `parts.add(req.getParameter("input"))`, callee is // "req.getParameter" but outer_callee is "parts.add"). let mut container_handled = try_container_propagation( inst, info, args, receiver, state, transfer, callee, ssa, ); if !container_handled { if let Some(ref oc) = info.call.outer_callee { container_handled = try_container_propagation( inst, info, args, receiver, state, transfer, oc, ssa, ); } } if container_handled { // When this call node is also a Source (e.g. items.push(req.query.item) // where req.query.item triggers a Source label on the call), merge // the source taint into the container receiver too. if !return_bits.is_empty() { let recv_callee = info.call.outer_callee.as_deref().unwrap_or(callee); if let Some(container_val) = find_container_receiver(recv_callee, receiver, args, ssa, transfer.lang) { // Also store into heap objects when available if let Some(pts) = lookup_pts(transfer, container_val) { state.heap.store_set( &pts, HeapSlot::Elements, return_bits, &return_origins, ); } merge_taint_into(state, container_val, return_bits, &return_origins); } } // Fall through to write return_bits to inst.value if non-empty if return_bits.is_empty() { // Container ELEM read counterpart fires for // container_handled calls with no source label // (e.g. `cmd := arr.shift()`) whose taint comes // from the cell rather than an inline source. apply_container_elem_read_w4(inst, ssa, transfer, state); return; } } else { // Curl special case: propagate URL taint to handle if try_curl_url_propagation(inst, info, args, state) { return; } // Arg-to-arg propagation for known C/C++ functions (e.g., // inet_pton). When an input arg is tainted, propagate to // all SSA values in the output arg positions. if let Some(prop) = crate::labels::arg_propagation(transfer.lang.as_str(), callee) { let mut input_caps = Cap::empty(); let mut input_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for &from_pos in prop.from_args { if let Some(arg_group) = args.get(from_pos) { for &v in arg_group { if let Some(taint) = state.get(v) { input_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut input_origins, *orig); } } } } } if !input_caps.is_empty() { for &to_pos in prop.to_args { if let Some(arg_group) = args.get(to_pos) { for &arg_v in arg_group { state.set( arg_v, VarTaint { caps: input_caps, origins: input_origins.clone(), uses_summary: false, }, ); } } } } } // No labels and no summary, default propagation (gen/kill) let (use_caps, use_origins) = collect_args_taint(args, receiver, state, &[]); if return_bits.is_empty() { return_bits = use_caps; return_origins = use_origins; } // Validated-flow propagation through unresolved external // calls. When every tainted argument's symbol is already // in `validated_must` at the call site, the call result // is derived solely from validated values, so its symbol // inherits the same `validated_must` / `validated_may` // status. Without this, helper-validated taint that // crosses an external boundary (`db.execute(sanitisedSql)`, // `fetch(safeUrl)`, …) re-emerges as unvalidated taint at // the next sink (`res.json(result)`), reproducing the // residual finding in the patched fixture for // CVE-2026-25544 even though the SQL injection itself is // suppressed. if !return_bits.is_empty() { let mut all_args_validated = true; let mut any_tainted_arg = false; let check_value = |v: SsaValue, state: &SsaTaintState| -> Option { // Returns Some(true) if validated_must, Some(false) // if tainted-but-not-validated, None if not tainted. let taint = state.get(v)?; if taint.caps.is_empty() { return None; } let name = ssa .value_defs .get(v.0 as usize) .and_then(|vd| vd.var_name.as_deref())?; let sym = transfer.interner.get(name)?; Some(state.validated_must.contains(sym)) }; for arg_group in args { for &v in arg_group { if let Some(is_validated) = check_value(v, state) { any_tainted_arg = true; if !is_validated { all_args_validated = false; break; } } } if !all_args_validated { break; } } if all_args_validated { if let Some(rv) = receiver { if let Some(is_validated) = check_value(*rv, state) { any_tainted_arg = true; if !is_validated { all_args_validated = false; } } } } if any_tainted_arg && all_args_validated { if let Some(name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { state.validated_must.insert(sym); state.validated_may.insert(sym); } } } } } } // Receiver-side validator strip. Some method-call validators // raise on failure rather than transforming a return value, // so the canonical `Sanitizer` mechanism (which clears the // return) is the wrong shape. After the call returns, the // *receiver* (and any args carrying the same equivalence // class) is proven to satisfy the validated property. Strip // the registered cap from receiver+args here so that // `path.relative_to(base)` clears `Cap::FILE_IO` from // `path` for downstream uses. Motivated by CVE-2024-23334 // (aiohttp StaticResource symlink-bypass): the patched code // calls `filepath.relative_to(self._directory)` inside a // try/except and serves `filepath` afterwards. if let Some(cap) = crate::labels::lookup_receiver_validator(transfer.lang.as_str(), callee) { strip_cap_from_call_args(args, receiver, state, cap); } // Alias-aware sanitization: propagate through must-aliased field paths if !sanitizer_bits.is_empty() { if let Some(aliases) = transfer.base_aliases { if !aliases.is_empty() { propagate_sanitization_to_aliases( inst, state, sanitizer_bits, aliases, ssa, ); } } } // Inter-procedural container identity propagation: // If callee returns the same container it received, propagate // the caller's points-to set for that argument to the call result. // Uses precise positional matching: param indices correspond to // call-site argument positions (ensured by lower_to_ssa_with_params). if !resolved_container_to_return.is_empty() { if let Some(dyn_ref) = transfer.dynamic_pts { let mut container_pts_list: SmallVec<[PointsToSet; 2]> = SmallVec::new(); for ¶m_idx in &resolved_container_to_return { if let Some(arg_group) = args.get(param_idx) { for &arg_v in arg_group { if let Some(pts) = lookup_pts(transfer, arg_v) { container_pts_list.push(pts); } } } } if !container_pts_list.is_empty() { let mut dyn_pts = dyn_ref.borrow_mut(); for pts in &container_pts_list { match dyn_pts.get(&inst.value) { Some(existing) => { let merged = existing.union(pts); dyn_pts.insert(inst.value, merged); } None => { dyn_pts.insert(inst.value, pts.clone()); } } } } } } // Inter-procedural container store propagation: // If callee stores src_param taint into container_param's container, // use precise positional matching: param indices correspond to // call-site argument positions (ensured by lower_to_ssa_with_params). if !resolved_container_store.is_empty() { for &(src_param, container_param) in &resolved_container_store { // Collect container pts at the specific arg position let mut container_pts: SmallVec<[PointsToSet; 2]> = SmallVec::new(); if let Some(arg_group) = args.get(container_param) { for &v in arg_group { if let Some(pts) = lookup_pts(transfer, v) { container_pts.push(pts); } } } if container_pts.is_empty() { continue; } // Collect source taint at the specific arg position let mut src_caps = Cap::empty(); let mut src_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); if let Some(arg_group) = args.get(src_param) { for &v in arg_group { if let Some(taint) = state.get(v) { src_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut src_origins, *orig); } } } } // When the primary callee is a Source (e.g. req.query.input // overrode storeInto as the callee), the source taint is // produced as the call's return, not yet in args. Use // return_bits as the source taint for the container store. if src_caps.is_empty() && !return_bits.is_empty() { src_caps = return_bits; src_origins = return_origins.clone(); } // Store source taint into container's heap objects if !src_caps.is_empty() { for pts in &container_pts { state .heap .store_set(pts, HeapSlot::Elements, src_caps, &src_origins); } } } } // Parameter-granularity points-to summary application. // // Extends the container-store channel above (which catches // `arr.push(v)` / `map.set(k, v)`) to direct field writes like // `obj.x = val` that `classify_container_op` does not recognise. // The callee's `PointsToSummary` records May-alias edges between // parameter positions and the return; at the call site we replay // each edge against the caller's taint state. // // * `Param(src) → Param(dst)`, union caller-arg[src]'s taint // into caller-arg[dst]'s heap slot. Sound because the // callee *may* have stored data derived from arg[src] into // an alias of arg[dst]; the caller must assume any later // read from arg[dst] could surface that taint. // * `Param(src) → Return`, union caller-arg[src]'s points-to // set into the call's return value, giving the result the // same heap identity as its input argument. Overlaps with // `param_container_to_return`; both channels are idempotent // so re-propagation is safe. // // Fresh-container factory synthesis: when the callee's // `PointsToSummary` marks a return path as a fresh allocation // (container literal or known constructor not tracing to any // parameter), synthesise a `HeapObjectId` keyed on the call's // SSA value and seed it into `dynamic_pts`. This closes the // factory-pattern cross-file gap, `const bag = makeBag()` // gives `bag` a stable heap identity so subsequent // `fillBag(bag, …)` / `bag[0]` operations have a heap cell // to store into or read from. // // Strictly additive: the existing `Param(i) → Return` edge // handling below joins the caller's argument pts when the // function also returns a parameter on some path, so a mixed // factory (`if (x) return []; else return arg`) carries both // the synthetic fresh cell and the aliased argument cells. if resolved_points_to.returns_fresh_alloc && let Some(dyn_ref) = transfer.dynamic_pts { let fresh = PointsToSet::singleton(HeapObjectId(inst.value)); let mut dyn_pts = dyn_ref.borrow_mut(); match dyn_pts.get(&inst.value) { Some(existing) => { let merged = existing.union(&fresh); dyn_pts.insert(inst.value, merged); } None => { dyn_pts.insert(inst.value, fresh); } } } // Overflow (the callee's alias graph exceeded // `MAX_ALIAS_EDGES`): conservatively treat *every* parameter // as aliasing every other parameter and the return. if resolved_points_to.overflow || !resolved_points_to.edges.is_empty() { use crate::summary::points_to::AliasPosition; // Effective edge set: when overflow is signalled, synthesise // the conservative all-pairs graph instead of reading the // possibly-truncated edge vector. type ParamToParamEdges = SmallVec<[(usize, usize); 8]>; type ParamToReturnEdges = SmallVec<[usize; 4]>; let (param_to_param_edges, param_to_return_edges): ( ParamToParamEdges, ParamToReturnEdges, ) = if resolved_points_to.overflow { let n = args.len(); let mut p2p: SmallVec<[(usize, usize); 8]> = SmallVec::new(); let mut p2r: SmallVec<[usize; 4]> = SmallVec::new(); for i in 0..n { p2r.push(i); for j in 0..n { if i != j { p2p.push((i, j)); } } } (p2p, p2r) } else { let mut p2p: SmallVec<[(usize, usize); 8]> = SmallVec::new(); let mut p2r: SmallVec<[usize; 4]> = SmallVec::new(); for edge in &resolved_points_to.edges { match (edge.source, edge.target) { (AliasPosition::Param(s), AliasPosition::Param(t)) => { p2p.push((s as usize, t as usize)); } (AliasPosition::Param(s), AliasPosition::Return) => { p2r.push(s as usize); } // Return → Param / Return → Return are not emitted // by the points-to analysis; ignore defensively. _ => {} } } (p2p, p2r) }; // Apply Param → Param edges: caller-arg[src] taint into // caller-arg[dst]'s heap objects *and* directly onto the // destination SSA value. Store-into-heap handles later // container-style reads from `dst`'s pts set; the direct // taint ensures field reads expressed as `Assign uses=[dst]` // (the common case when the caller's heap analysis did // not register an allocation site for `dst`) still surface // the aliased taint. // // The loop must borrow `state` mutably (for the heap // store and the direct taint merge), so it is written // inline instead of split across helper closures. for (src, dst) in ¶m_to_param_edges { // Collect src arg taint. let mut src_caps = Cap::empty(); let mut src_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); if let Some(arg_vals) = args.get(*src) { for &v in arg_vals { if let Some(taint) = state.get(v) { src_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut src_origins, *orig); } } } } if src_caps.is_empty() { continue; } // Collect dst arg points-to for heap-level // propagation (cloned out so the mutable // `state.heap` borrow below is independent of the // immutable PTS lookup). let mut dst_pts: SmallVec<[PointsToSet; 2]> = SmallVec::new(); let mut dst_ssa_vals: SmallVec<[SsaValue; 2]> = SmallVec::new(); if let Some(arg_vals) = args.get(*dst) { for &v in arg_vals { dst_ssa_vals.push(v); if let Some(pts) = lookup_pts(transfer, v) { dst_pts.push(pts); } } } for pts in &dst_pts { state .heap .store_set(pts, HeapSlot::Elements, src_caps, &src_origins); } // Direct-taint the dst SSA value(s). Required when // the caller's heap analysis has no allocation site // for `dst` (common for plain class constructors in // Python / JS / Java without fine-grained // points-to). Without this, later reads expressed // as Assigns over `dst` would see no taint. for dv in &dst_ssa_vals { merge_taint_into(state, *dv, src_caps, &src_origins); } } // Apply Param → Return edges: the call result inherits the // source argument's points-to set. Re-runs the same // channel `resolved_container_to_return` drives a few // lines above, safe (idempotent union), and catches // cases where the callee returned a param through a // non-identity chain (e.g. `return Box::new(x)`). if !param_to_return_edges.is_empty() && let Some(dyn_ref) = transfer.dynamic_pts { for src in ¶m_to_return_edges { let mut src_pts: SmallVec<[PointsToSet; 2]> = SmallVec::new(); if let Some(arg_vals) = args.get(*src) { for &v in arg_vals { if let Some(pts) = lookup_pts(transfer, v) { src_pts.push(pts); } } } if src_pts.is_empty() { continue; } let mut dyn_pts = dyn_ref.borrow_mut(); for pts in &src_pts { match dyn_pts.get(&inst.value) { Some(existing) => { let merged = existing.union(pts); dyn_pts.insert(inst.value, merged); } None => { dyn_pts.insert(inst.value, pts.clone()); } } } } } } // Alias-aware taint propagation: when a.field becomes tainted and // a/b are base aliases, b.field should also be tainted. if !return_bits.is_empty() { if let Some(aliases) = transfer.base_aliases { if !aliases.is_empty() { propagate_taint_to_aliases( inst, state, return_bits, &return_origins, aliases, ssa, ); } } } // Outer-callee taint suppression: when find_classifiable_inner_call // overrode the callee (e.g. transform(req.query.data) → callee becomes // "req.query.data" Source, outer_callee="transform"), the Source label // produces return_bits. Check if the wrapper function blocks taint: // if its SSA summary shows no propagation, no source_caps, and no // container identity return, the return value is independent of its // arguments, clear return_bits. Additionally apply the wrapper's // sanitizer caps (StripBits transforms) so a sanitising wrapper // like `validate()` clears the relevant cap bits even // when the wrapper still propagates other taint. if !return_bits.is_empty() && has_source_label { if let Some(ref oc) = info.call.outer_callee { if let Some(ref oc_sum) = resolve_callee_hinted( transfer, oc, caller_func, info.call.call_ordinal, arity_hint, ) { if !oc_sum.propagates_taint && oc_sum.source_caps.is_empty() { // Outer callee blocks taint: no param→return flow, // no internal sources reaching return. return_bits = Cap::empty(); return_origins.clear(); } else if !oc_sum.sanitizer_caps.is_empty() { return_bits &= !oc_sum.sanitizer_caps; } } } } // Chain-wrapper sanitizer suppression: when the chain shape // `outer(... wrapper() ...)` puts a sanitising wrapper // function between the inner Source and the outer call, // mark the call result's symbol as validated so any // downstream sink event over the same value fires with // `all_validated = true`, suppressing the taint finding and // (via [`record_path_safe_suppressed_span`]) the // `state-unauthed-access` finding on the same span. // `chain_wrapper_sanitizer_caps` is computed up-front above // so the container-element-write hook can also consult it. if has_source_label && !chain_wrapper_sanitizer_caps.is_empty() { if let Some(name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { state.validated_must.insert(sym); state.validated_may.insert(sym); } } } // JS/TS array-method validator-callback narrowing. When a // call shape matches `.filter()` // (or `find` / `findLast`), strip the caps that flowed into // `return_bits` from the receiver — the result holds only // elements the validator approved. Strict-additive: the // helper is a no-op when the callback name does not match // the BooleanTrueIsValid bucket, leaving the default // propagation result unchanged. See // [`try_array_method_validator_callback_narrowing`] for the // motivating CVE pair. try_array_method_validator_callback_narrowing( inst, info, callee, args, &mut return_bits, &mut return_origins, state, transfer, ssa, ); // Constructor cap narrowing: a `new X(...)` call returns an object // instance, not a string. Caps that name a string-shaped sink // pattern (path argument, format string, URL component, JSON // input) cannot fire on a wrapper object, so they must not // survive the construction. Without this narrowing, a tainted // argument to `new SdkClient(secret)` propagates `Cap::all()` // into the wrapper, every method call on the wrapper inherits // those bits via receiver propagation, and any downstream // `fs.write*` / `printf` / `JSON.parse` on a string property // returned by an SDK method (e.g. `client.create().id`) flags // a phantom flow that has no real path-traversal etc. payload. // // Caps preserved (legitimately travel through wrappers): // - SHELL_ESCAPE / SQL_QUERY / CODE_EXEC / DESERIALIZE: a // wrapper that captures a tainted command/query string can // replay it via methods, the bit must survive the wrap. // - SSRF / DATA_EXFIL: URL/payload concerns persist on URL or // content-bearing objects. // - UNAUTHORIZED_ID: ownership obligation persists on a // wrapper that carries a request-bound identifier. // - ENV_VAR: provenance marker, never a sink trigger by // itself. // - HTML_ESCAPE: kept for safety, conservative dual concern // (a wrapper used as a string in template rendering). // - CRYPTO: kept conservatively. // // Caps stripped on construction: // - FILE_IO: path strings only. // - FMT_STRING: printf-style format args only. // - URL_ENCODE: URL components only. // - JSON_PARSE: parser inputs only. if info.call.is_constructor && !return_bits.is_empty() { let strip = Cap::FILE_IO | Cap::FMT_STRING | Cap::URL_ENCODE | Cap::JSON_PARSE; return_bits &= !strip; if return_bits.is_empty() { return_origins.clear(); } } // Write result if return_bits.is_empty() { state.remove(inst.value); } else { state.set( inst.value, VarTaint { caps: return_bits, origins: return_origins, uses_summary: resolved_callee, }, ); } } SsaOp::Assign(uses) => { // Check for sanitizer labels let mut sanitizer_bits = Cap::empty(); for lbl in &info.taint.labels { if let DataLabel::Sanitizer(bits) = lbl { sanitizer_bits |= *bits; } } // Collect taint from operands. Equality-with-constant comparisons // (`x === 'literal'`) produce a boolean result that carries no // attacker-controlled data, so skip unioning operand caps into the // result. Source/sanitizer labels on this same node still apply // normally below. let mut combined_caps = Cap::empty(); let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut inherited_summary = false; if !info.is_eq_with_const { for &use_val in uses { if let Some(taint) = state.get(use_val) { combined_caps |= taint.caps; inherited_summary |= taint.uses_summary; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } } } } // Synthetic field-write inheritance. When SSA lowering emits // `u_new = Assign(rhs)` to model `u.f = rhs` (an obj-update // synth), `u_new` represents the same logical object after the // field write, it retains every other field's taint. The // base-only Assign uses include only the rhs, so without this // step a clean rhs (`u.Path = "/foo"`) would zero out every // tainted field on the prior `u`. Owncast CVE-2023-3188 hit // this: `requestURL.Path = "/.well-known/webfinger"` killed the // tainted host carried by `requestURL` from `url.Parse(tainted)`. if let Some((receiver, _fid)) = ssa.field_writes.get(&inst.value).copied() { if let Some(taint) = state.get(receiver) { combined_caps |= taint.caps; inherited_summary |= taint.uses_summary; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } } } // Apply sanitizer combined_caps &= !sanitizer_bits; // Alias-aware sanitization: propagate through must-aliased field paths if !sanitizer_bits.is_empty() { if let Some(aliases) = transfer.base_aliases { if !aliases.is_empty() { propagate_sanitization_to_aliases( inst, state, sanitizer_bits, aliases, ssa, ); } } } // Check for source labels for lbl in &info.taint.labels { if let DataLabel::Source(bits) = lbl { combined_caps |= *bits; let callee_str = info.call.callee.as_deref().unwrap_or(""); let source_kind = crate::labels::infer_source_kind(*bits, callee_str); let origin = TaintOrigin { node: inst.cfg_node, source_kind, source_span: None, }; push_origin_bounded(&mut combined_origins, origin); } } // Alias-aware taint propagation if !combined_caps.is_empty() { if let Some(aliases) = transfer.base_aliases { if !aliases.is_empty() { propagate_taint_to_aliases( inst, state, combined_caps, &combined_origins, aliases, ssa, ); } } } if combined_caps.is_empty() { state.remove(inst.value); } else { state.set( inst.value, VarTaint { caps: combined_caps, origins: combined_origins.clone(), uses_summary: inherited_summary, }, ); } // Synthetic base-update Assign emitted by SSA lowering for // `obj.f = rhs`. The side-table maps this synth assign's // value → (prior_receiver, FieldId), so we lift it into a // field WRITE: union rhs taint into every `(loc, field)` // cell for non-Top `loc ∈ pt(prior_receiver)`. if let Some(pf) = transfer.pointer_facts { if let Some((receiver, fid)) = ssa.field_writes.get(&inst.value).copied() { let pt = pf.pt(receiver); if !pt.is_empty() && !pt.is_top() && !combined_caps.is_empty() { let rhs_taint = VarTaint { caps: combined_caps, origins: combined_origins.clone(), uses_summary: inherited_summary, }; // W4: validation channels lift from the rhs's // symbol-level bits. An anonymous SSA temp on // the rhs has no name → contributes (false, // false), matching "no validation". An Assign // with multiple operands ANDs `must` (every // operand must be must-validated for the cell) // and ORs `may`. let mut must_all = true; let mut may_any = false; let mut saw_use = false; if let SsaOp::Assign(uses) = &inst.op { for &u in uses { saw_use = true; let (am, av) = ssa_value_validated_bits(u, ssa, transfer.interner, state); must_all &= am; may_any |= av; } } if !saw_use { must_all = false; } for loc in pt.iter() { let key = crate::taint::ssa_transfer::state::FieldTaintKey { loc, field: fid, }; state.add_field(key, rhs_taint.clone(), must_all, may_any); } } } } } SsaOp::Const(_) | SsaOp::Nop => { // No taint, this is the kill mechanism for `x = "literal"` after // `x = source()`. The fresh SsaValue carries zero caps. } SsaOp::Param { .. } | SsaOp::SelfParam => { // Seeding order for inbound taint on this body's param: // 1. Per-call-site seed (inline analysis only). // `param_seed[index]` for `Param { index }`, or // `receiver_seed` for `SelfParam`. Takes precedence // because it reflects the exact caller argument taint // for this specific call. // 2. Lexical-scope seed (`global_seed`), read in ancestor // order: parent body first, then the top-level scope // (`BodyId(0)`) to pick up re-keyed JS/TS combined_exit // entries (see `filter_seed_to_toplevel`). // // `SelfParam` receives the same treatment as positional `Param`: // both represent inbound values whose taint comes from the // surrounding scope. let mut seeded_from_scope = false; // Step 1: per-call-site seed for inline analysis. let per_call_taint: Option<&VarTaint> = match &inst.op { SsaOp::Param { index } => transfer .param_seed .and_then(|ps| ps.get(*index)) .and_then(|slot| slot.as_ref()), SsaOp::SelfParam => transfer.receiver_seed, _ => None, }; if let Some(taint) = per_call_taint { let remapped_origins: SmallVec<[TaintOrigin; 2]> = taint .origins .iter() .map(|o| TaintOrigin { node: inst.cfg_node, source_kind: o.source_kind, source_span: o.source_span, }) .collect(); state.set( inst.value, VarTaint { caps: taint.caps, origins: remapped_origins, uses_summary: true, }, ); seeded_from_scope = true; } // Step 2: lexical-scope seed via ancestor-chain lookup. if !seeded_from_scope { if let Some(seed) = &transfer.global_seed { if let Some(var_name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { // Ancestor chain: parent body first (for direct // lexical captures), then BodyId(0) (for JS/TS // pass-2 re-keyed entries). Deduplicated so a // body whose parent is already the top-level // only looks up once. let mut ancestors: SmallVec<[BodyId; 2]> = SmallVec::new(); if let Some(pid) = transfer.parent_body_id { ancestors.push(pid); } if !ancestors.contains(&BodyId(0)) { ancestors.push(BodyId(0)); } for body_id in ancestors { let key = BindingKey::new(var_name, body_id); if let Some(taint) = seed_lookup(seed, &key) { // Remap origins to this body's Param cfg_node: // the meaningful anchor where taint enters // this body. Preserve source_span for // diagnostics (captured in // extract_ssa_exit_state). let remapped_origins: SmallVec<[TaintOrigin; 2]> = taint .origins .iter() .map(|o| TaintOrigin { node: inst.cfg_node, source_kind: o.source_kind, source_span: o.source_span, }) .collect(); state.set( inst.value, VarTaint { caps: taint.caps, origins: remapped_origins, uses_summary: true, }, ); seeded_from_scope = true; break; } } } } } // Handler-param auto-seed: formal parameters whose names imply // user input (e.g. `userInput`, `payload`, `cmd`) start tainted // so downstream sinks still fire when a function has no // registered caller (typical for controller methods, handler // dispatch functions, and stream lambda bodies). Skipped in // summary-extraction mode so baseline probes keep their // intrinsic-source contract. Gate is set by the caller, e.g. // always-on for JS/TS, only AnonymousFunction bodies for Java. // // The `Param` branch fires for both real formal parameters and // synthetic externals injected by lowering for free / closure- // captured variables (`SsaBody.synthetic_externals`). Only real // formals should receive the heuristic seed: a closure capturing // an out-of-scope `userId` / `cmd` / `payload` is NOT a handler // entry point — the variable is supplied by the enclosing scope // and seeding it here produces phantom sources anchored to the // function's declaration line. if transfer.auto_seed_handler_params && !seeded_from_scope && matches!(&inst.op, SsaOp::Param { .. }) && !ssa.synthetic_externals.contains(&inst.value) { if let Some(var_name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { // Direct match: the Param's name itself is a handler // identifier (e.g. `input`, `cmd`, `userId`). // // Root-prefix match: dotted-path Params produced by // lowering for member-expression uses inside the body // (`input.cmd` — an unbacked phantom Param) inherit the // seed when their *root* is a handler-param formal. // Without this, the field-aware suppression downstream // sees `input.cmd` as a "clean field" and strips // `input`'s taint, even though `input.cmd` is just a // structural projection of the auto-seeded formal. let root_is_handler = var_name .split_once('.') .map(|(root, _)| crate::labels::is_js_ts_handler_param_name(root)) .unwrap_or(false); if crate::labels::is_js_ts_handler_param_name(var_name) || root_is_handler { let origin = TaintOrigin { node: inst.cfg_node, source_kind: SourceKind::UserInput, source_span: None, }; state.set( inst.value, VarTaint { caps: Cap::all(), origins: SmallVec::from_elem(origin, 1), uses_summary: false, }, ); } } } } SsaOp::Phi(_) => { // Phis processed separately above, shouldn't appear in body } SsaOp::Undef => { // Undef is a phi-operand sentinel that lives in block 0's // body so it has a valid `value_defs` entry. It contributes // no taint: leave `state` unchanged so the phi operand // lookup (`state.get(operand_val)`) returns `None` for // predecessors whose incoming edge carries no definition. } SsaOp::FieldProj { receiver, field, .. } => { // Field projection: pass the receiver's full taint record // through to the projected value. Untainted receiver → // untainted projection (no entry inserted). let mut combined: Option = state.get(*receiver).cloned(); // W4: collect cell validation channels alongside taint. // `must` AND-intersects across the contributing cells; a // single un-validated cell wins because it represents a // path on which the projection isn't validated. `may` // OR-unions. let mut cell_must_all: Option = None; let mut cell_may_any = false; // When per-body PointsToFacts are available, also union // taint from each `(loc, field)` cell for `loc ∈ pt(receiver)`. // Carries cross-method field flow within a single body. if let Some(pf) = transfer.pointer_facts { let pt = pf.pt(*receiver); if !pt.is_empty() && !pt.is_top() { for loc in pt.iter() { // Read the specific `(loc, *field)` cell first // (per-field-name flow from cross-call writes). // When it's absent, fall back to the // `(loc, ANY_FIELD)` wildcard, populated by the // [`ContainerOp::Writeback`] handler for sinks // like `json.NewDecoder(r.Body).Decode(&dest)` // that taint every field of the destination // wholesale. The fallback is gated on // specific-field absence so existing field-cell // semantics are bit-identical when the writer // used a named field. ANY_FIELD is distinct // from `ELEM` (container-element wildcard) to // avoid a struct-with-`length`-field reading // taint from a sibling array's `push` writes. let mut hit_specific = false; for field_id in [*field, crate::ssa::ir::FieldId::ANY_FIELD].iter().copied() { if field_id == crate::ssa::ir::FieldId::ANY_FIELD && hit_specific { break; } if field_id == crate::ssa::ir::FieldId::ANY_FIELD && *field == crate::ssa::ir::FieldId::ANY_FIELD { continue; } let key = crate::taint::ssa_transfer::state::FieldTaintKey { loc, field: field_id, }; if let Some(cell) = state.get_field(key) { if field_id == *field { hit_specific = true; } let t = cell.taint.clone(); cell_must_all = Some(match cell_must_all { Some(prev) => prev && cell.validated_must, None => cell.validated_must, }); cell_may_any |= cell.validated_may; combined = Some(match combined { Some(mut acc) => { acc.caps |= t.caps; acc.uses_summary |= t.uses_summary; // A7 audit: route the cell's origins // through `push_origin_bounded` so the // cap-driven survivor selection (sorted // by `origin_sort_key`, deterministic // truncation when over cap, observability // counter increments) applies the same // way as the per-SSA-value lattice. The // pre-A7 inline walk only deduped by // node and silently grew past // `effective_max_origins` when the cell // had a wider origin set than the cap. for o in &t.origins { push_origin_bounded(&mut acc.origins, *o); } acc } None => { // First contribution: still apply the // bounded-push so a cell built up // above-cap upstream gets re-bounded // here at read time. `push_origin_bounded` // dedups by node, sorts deterministically. let mut bounded: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for o in &t.origins { push_origin_bounded(&mut bounded, *o); } VarTaint { caps: t.caps, origins: bounded, uses_summary: t.uses_summary, } } }); } } } } } if let Some(t) = combined { state.set(inst.value, t); } // W4: seed the projected value's symbol-level validation // bits from the cells that fed it. This is the read-side // counterpart to the W1 / W2 / W3 cell-write hooks: if // every cell that contributed to this projection was // must-validated, the projected value is must-validated; // any may-validated cell sets may. Skipped when no cell // contributed (`cell_must_all == None`). if let Some(must_all) = cell_must_all { if let Some(name) = ssa .value_defs .get(inst.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { if must_all { state.validated_must.insert(sym); } if cell_may_any { state.validated_may.insert(sym); } } } } } } // Container read counterpart, post-match. Also invoked inline // before container-handled early-returns inside the Call arm. if matches!(&inst.op, SsaOp::Call { .. }) { apply_container_elem_read_w4(inst, ssa, transfer, state); } // Constraint propagation through instructions if let Some(ref mut env) = state.path_env { match &inst.op { SsaOp::Assign(uses) if uses.len() == 1 => { // Copy: propagate facts from source to destination let src_fact = env.get(uses[0]); if !src_fact.is_top() { env.refine(inst.value, &src_fact); env.assert_equal(inst.value, uses[0]); } // Cast/assertion type narrowing. // // If this Assign's CFG node is a cast/type-assertion expression, // narrow the destination value's type in PathEnv. // // Semantics vary by language: // - Java casts: runtime-checked, type is reliably narrowed // - TypeScript `as`: compile-time assertion only, not runtime proof // - Go type assertions: runtime-checked (direct form) // // In ALL cases: taint is preserved. Narrowing the type does NOT // erase taint, a tainted value cast to String is still tainted. let node_info = &cfg[inst.cfg_node]; if let Some(ref cast_type) = node_info.cast_target_type { if let Some(kind) = crate::constraint::solver::parse_type_name(cast_type) { let mut fact = constraint::ValueFact::top(); fact.types = constraint::TypeSet::singleton(&kind); fact.null = constraint::Nullability::NonNull; env.refine(inst.value, &fact); } } } SsaOp::Const(Some(text)) => { // Constant: seed fact from literal value if let Some(cv) = constraint::ConstValue::parse_literal(text) { let mut fact = constraint::ValueFact::top(); fact.exact = Some(cv.clone()); match &cv { constraint::ConstValue::Int(i) => { fact.lo = Some(*i); fact.hi = Some(*i); fact.types = constraint::TypeSet::singleton( &crate::ssa::type_facts::TypeKind::Int, ); fact.null = constraint::Nullability::NonNull; } constraint::ConstValue::Bool(b) => { fact.bool_state = if *b { constraint::BoolState::True } else { constraint::BoolState::False }; fact.types = constraint::TypeSet::singleton( &crate::ssa::type_facts::TypeKind::Bool, ); fact.null = constraint::Nullability::NonNull; } constraint::ConstValue::Null => { fact.null = constraint::Nullability::Null; fact.types = constraint::TypeSet::singleton( &crate::ssa::type_facts::TypeKind::Null, ); } constraint::ConstValue::Str(_) => { fact.types = constraint::TypeSet::singleton( &crate::ssa::type_facts::TypeKind::String, ); fact.null = constraint::Nullability::NonNull; } } env.refine(inst.value, &fact); } } _ => { // All other ops: no constraint propagation (conservative) } } } // Forward abstract value transfer if let Some(ref mut abs) = state.abstract_state { transfer_abstract(inst, cfg, abs, Some(transfer.lang)); } // Cross-file abstract return injection. // Applied after transfer_abstract so summary-provided facts override the // default Top that transfer_abstract assigns to unknown callees. if let Some(ref abs_val) = callee_return_abstract { if let Some(ref mut abs) = state.abstract_state { abs.set(inst.value, abs_val.clone()); } } } /// Compute abstract values for an SSA instruction. /// /// Propagates interval and string domain facts forward through constants, /// copies, binary arithmetic, and concatenation. Conservative (Top) for /// unknown operations (calls, sources, params). /// /// `lang` is consulted only for language-specific transfer rules (currently /// Rust path primitives, `fs::canonicalize`, `.starts_with`, etc.); `None` /// disables them and matches the pre-PathFact behaviour exactly. fn transfer_abstract(inst: &SsaInst, cfg: &Cfg, abs: &mut AbstractState, lang: Option) { use crate::abstract_interp::{AbstractValue, BitFact, IntervalFact, PathFact, StringFact}; use crate::cfg::BinOp; let info = &cfg[inst.cfg_node]; match &inst.op { SsaOp::Const(Some(text)) => { let trimmed = text.trim(); // Try integer if let Ok(n) = trimmed.parse::() { abs.set( inst.value, AbstractValue { interval: IntervalFact::exact(n), string: StringFact::top(), bits: BitFact::from_const(n), path: PathFact::top(), }, ); } else if is_string_const(trimmed) { let s = strip_string_quotes(trimmed); // String literal: derive PathFact axes from the *literal* // content. An empty string has no `..` segment and no // absolute root, both axes proven safe, so a Const `""` // (Python / JS / TS / Java rejection-arm sentinel) carries a // path-safe fact even without a per-language allocator // recogniser like Rust's `String::new()`. Non-empty // literals also surface their own dotdot/absolute axes // when the literal text proves them. let mut pf = PathFact::top(); if !s.contains("..") { pf = pf.with_dotdot_cleared(); } if !(s.starts_with('/') || s.starts_with('\\')) { pf = pf.with_absolute_cleared(); } abs.set( inst.value, AbstractValue { interval: IntervalFact::top(), string: StringFact::exact(&s), bits: BitFact::top(), path: pf, }, ); } // Bool/Null/other: leave as Top } // Template-literal / string-prefix override: when the RHS is // `\`scheme://host/…${x}\`` or `"scheme://host/" + x`, seed the // result's StringFact prefix regardless of interpolation arity. Taint // still flows through the normal taint lattice; the prefix is only // consumed by `is_string_safe_for_ssrf` to suppress SSRF sinks on // fixed-host URLs. Placed before the arithmetic/copy arms so it wins // over the default Top StringFact. SsaOp::Assign(_) if info.string_prefix.is_some() => { let prefix = info.string_prefix.as_deref().unwrap(); abs.set( inst.value, AbstractValue { interval: IntervalFact::top(), string: StringFact::from_prefix(prefix), bits: BitFact::top(), path: PathFact::top(), }, ); } // Same prefix-from-CFG override for Call instructions whose result is // the variable binding (e.g. `url = wrapper('lit' + userPath)`). The // CFG node carries `string_prefix` extracted from the call's first // positional argument; without this arm the Call result's StringFact // is Top and downstream SSRF suppression (`is_call_abstract_safe` // looking at `axios.get(url)`'s own first arg) cannot read the lock. // Mirrors the same passthrough-heuristic that the // `is_call_abstract_safe` node-attached check at the sink site // already relies on. SsaOp::Call { .. } if info.string_prefix.is_some() => { let prefix = info.string_prefix.as_deref().unwrap(); abs.set( inst.value, AbstractValue { interval: IntervalFact::top(), string: StringFact::from_prefix(prefix), bits: BitFact::top(), path: PathFact::top(), }, ); } SsaOp::Assign(uses) if uses.len() == 1 => { // Single-use Assign with bin_op + literal operand. // When a binary expression like `x & 0x07` has one identifier use // and one numeric literal, the SSA sees only the identifier (1 use). // Use bin_op_const from the CFG node to reconstruct the full binary // operation for abstract transfer. if let (Some(bin_op), Some(const_val)) = (info.bin_op, info.bin_op_const) { let var_abs = abs.get(uses[0]); let const_abs = AbstractValue { interval: IntervalFact::exact(const_val), string: StringFact::top(), bits: BitFact::from_const(const_val), path: PathFact::top(), }; let result_interval = match bin_op { BinOp::Add => var_abs.interval.add(&const_abs.interval), BinOp::Sub => var_abs.interval.sub(&const_abs.interval), BinOp::Mul => var_abs.interval.mul(&const_abs.interval), BinOp::Div => var_abs.interval.div(&const_abs.interval), BinOp::Mod => var_abs.interval.modulo(&const_abs.interval), BinOp::BitAnd => var_abs.interval.bit_and(&const_abs.interval), BinOp::BitOr => var_abs.interval.bit_or(&const_abs.interval), BinOp::BitXor => var_abs.interval.bit_xor(&const_abs.interval), BinOp::LeftShift => var_abs.interval.left_shift(&const_abs.interval), BinOp::RightShift => var_abs.interval.right_shift(&const_abs.interval), BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => IntervalFact { lo: Some(0), hi: Some(1), }, }; let result_bits = match bin_op { BinOp::BitAnd => var_abs.bits.bit_and(&const_abs.bits), BinOp::BitOr => var_abs.bits.bit_or(&const_abs.bits), BinOp::BitXor => var_abs.bits.bit_xor(&const_abs.bits), BinOp::LeftShift => var_abs.bits.left_shift(&const_abs.interval), BinOp::RightShift => var_abs.bits.right_shift(&const_abs.interval), _ => BitFact::top(), }; let val = AbstractValue { interval: result_interval, string: StringFact::top(), bits: result_bits, path: PathFact::top(), }; if !val.is_top() { abs.set(inst.value, val); } } else { // Copy: propagate abstract value (including bits) let src = abs.get(uses[0]); if !src.is_top() { abs.set(inst.value, src); } } } SsaOp::Assign(uses) if uses.len() == 2 => { let lhs_abs = abs.get(uses[0]); let rhs_abs = abs.get(uses[1]); if let Some(bin_op) = info.bin_op { // Known operator → apply interval transfer let result_interval = match bin_op { BinOp::Add => lhs_abs.interval.add(&rhs_abs.interval), BinOp::Sub => lhs_abs.interval.sub(&rhs_abs.interval), BinOp::Mul => lhs_abs.interval.mul(&rhs_abs.interval), BinOp::Div => lhs_abs.interval.div(&rhs_abs.interval), BinOp::Mod => lhs_abs.interval.modulo(&rhs_abs.interval), BinOp::BitAnd => lhs_abs.interval.bit_and(&rhs_abs.interval), BinOp::BitOr => lhs_abs.interval.bit_or(&rhs_abs.interval), BinOp::BitXor => lhs_abs.interval.bit_xor(&rhs_abs.interval), BinOp::LeftShift => lhs_abs.interval.left_shift(&rhs_abs.interval), BinOp::RightShift => lhs_abs.interval.right_shift(&rhs_abs.interval), // Comparisons produce boolean 0/1 BinOp::Eq | BinOp::NotEq | BinOp::Lt | BinOp::LtEq | BinOp::Gt | BinOp::GtEq => IntervalFact { lo: Some(0), hi: Some(1), }, }; // For Add: also handle string concatenation (+ is overloaded) let result_string = if bin_op == BinOp::Add { lhs_abs.string.concat(&rhs_abs.string) } else { StringFact::top() }; // Bitwise transfer via BitFact subdomain let result_bits = match bin_op { BinOp::BitAnd => lhs_abs.bits.bit_and(&rhs_abs.bits), BinOp::BitOr => lhs_abs.bits.bit_or(&rhs_abs.bits), BinOp::BitXor => lhs_abs.bits.bit_xor(&rhs_abs.bits), BinOp::LeftShift => lhs_abs.bits.left_shift(&rhs_abs.interval), BinOp::RightShift => lhs_abs.bits.right_shift(&rhs_abs.interval), _ => BitFact::top(), }; let val = AbstractValue { interval: result_interval, string: result_string, bits: result_bits, path: PathFact::top(), }; if !val.is_top() { abs.set(inst.value, val); } } else { // Unknown operator: conservative for interval and bits, // but still propagate string concat (prefix from LHS, suffix from RHS) let string_result = lhs_abs.string.concat(&rhs_abs.string); if !string_result.is_top() { abs.set( inst.value, AbstractValue { interval: IntervalFact::top(), string: string_result, bits: BitFact::top(), path: PathFact::top(), }, ); } } } // Known integer-producing calls get a bounded interval so downstream // arithmetic transfer produces useful facts (e.g. parseInt(x) * 10). // Unknown calls: implicit Top (don't store). SsaOp::Call { callee, .. } if is_int_producing_callee(callee) => { abs.set( inst.value, AbstractValue { interval: IntervalFact { lo: Some(i32::MIN as i64), hi: Some(i32::MAX as i64), }, string: StringFact::top(), bits: BitFact::top(), path: PathFact::top(), }, ); } // Path-primitive calls, per-language classifiers map known stdlib // sanitisers (`fs::canonicalize`, `os.path.normpath`, // `path.normalize`, `filepath.Clean`, `Path.normalize()`, // `File.expand_path`, `realpath`, `std::filesystem::canonical`) // onto a PathFact effect on the result. See // `crate::abstract_interp::path_domain::classify_path_primitive_for_lang`. // // Rust-only (gated by inner `matches!(lang, Some(Lang::Rust))` checks): // * `s.replace("..", "")` clears the `..` axis. // * Structural variant-wrapper transparency (`Some(s)` / `Ok(s)`). // * Zero-arg fresh-allocator constructor (`String::new()`). // // Other supported languages get the path-primitive transfer only; // their grammar-specific extensions would slot in here behind a // similar inner gate. SsaOp::Call { callee, args, receiver, .. } if lang.is_some() => { // Determine the "input" SSA value: receiver for method calls, // first positional arg for free-function calls. let input_val = receiver .as_ref() .copied() .or_else(|| args.first().and_then(|g| g.first().copied())); let input_fact = input_val .map(|v| abs.get(v).path) .unwrap_or_else(PathFact::top); // Primary path-producing primitives, per-language dispatch. let lang_unwrapped = lang.expect("guard ensures lang.is_some()"); if let Some(pf) = crate::abstract_interp::path_domain::classify_path_primitive_for_lang( lang_unwrapped, callee, &input_fact, ) { abs.set(inst.value, AbstractValue::with_path_fact(pf)); } else if matches!(lang, Some(Lang::Rust)) { // Rust-specific: `.replace(...)` sanitiser, variant-wrapper // transparency, and zero-arg fresh-allocator transfer. // These rely on Rust grammar conventions (scoped `Type::method`, // upper-camel-case variant ctor) that don't generalise. // // `.replace(...)` sanitiser on a string receiver. The Call // result re-binds the sanitised string; downstream // `Path::new` / `PathBuf::from` carries the cleared axis. // The literal needle is read from the first argument's // StringFact (exact value), which `SsaOp::Const` seeds for // string literals during the same pass. let leaf = crate::callgraph::callee_leaf_name(callee); let mut handled = false; if leaf == "replace" { if let Some(first_arg) = args.first().and_then(|g| g.first()) { let arg_string = abs.get(*first_arg).string; let needle = arg_string .domain .as_ref() .and_then(|d| (d.len() == 1).then(|| d[0].clone())); if let Some(needle) = needle { let mut new_fact = input_fact.clone(); let mut narrowed = false; if needle == ".." { new_fact = new_fact.with_dotdot_cleared(); narrowed = true; } else if needle == "/" || needle == "\\" { new_fact = new_fact.with_absolute_cleared(); narrowed = true; } if narrowed { abs.set(inst.value, AbstractValue::with_path_fact(new_fact)); handled = true; } } } } // Structural variant-wrapper transparency. When a call is // a one-positional-argument variant / type constructor // (receiver-less; callee leaf begins with ASCII upper-case //, the // [`crate::abstract_interp::path_domain::is_structural_variant_ctor`] // gate), its result inherits the joined PathFact of every // SSA value the lowering recorded for that single // positional argument. Covers `Some(s)`, `Ok(s)`, // `Err(s)`, `Box::new(s)`, and user-defined single-field // variants / tuple structs alike, the classification is // deliberately name-agnostic, so a freshly introduced // wrapper variant participates without code change. // // Positional arity is read from the CFG's // `info.call.arg_uses` (the authoritative list), not // from `args.len()`: SSA lowering appends an implicit // group of chained-call uses after the positional // groups, so `args.len()` over-counts. For the // positional group itself we join the PathFacts across // all contributing SsaValues, chained calls inside the // argument (`Some(s.to_string())`) surface every uses' // value; the join picks the most precise axis each // value proves. if !handled && receiver.is_none() && info.call.arg_uses.len() == 1 && crate::abstract_interp::path_domain::is_structural_variant_ctor(callee) { if let Some(group) = args.first() { let mut joined_inner: Option = None; for &v in group { let f = abs.get(v).path; if f.is_top() { continue; } joined_inner = Some(match joined_inner { None => f, Some(prev) => prev.join(&f), }); } if let Some(inner_fact) = joined_inner { abs.set(inst.value, AbstractValue::with_path_fact(inner_fact)); handled = true; } } } // Structural zero-argument allocator / constructor. // Callee is a Rust scoped identifier (contains `::`) whose // parent segment (e.g. `String` in `String::new`) begins // with ASCII upper-case, the call has no receiver and no // arguments, and the node carries no Source label , // i.e. the helper is a fresh-allocation entry point, not // an external-input read. Zero inputs ⇒ the result // carries no attacker-controlled path content and is // provably free of `..` components and absolute roots. // This closes the early-return path of sanitisers whose // rejection returns `String::new()` / `PathBuf::new()` / // etc., without a hard-coded allocator name list. if !handled && receiver.is_none() && args.is_empty() && has_typeprefix_upper_scoped(callee) && !has_source_label_on_node(info) { let fact = PathFact::top() .with_dotdot_cleared() .with_absolute_cleared(); abs.set(inst.value, AbstractValue::with_path_fact(fact)); } } } SsaOp::Source | SsaOp::CatchParam | SsaOp::Param { .. } => { // Untrusted / unknown: Top (no abstract knowledge) } _ => {} } } /// Re-export from type_facts for use in transfer_abstract. fn is_int_producing_callee(callee: &str) -> bool { crate::ssa::type_facts::is_int_producing_callee(callee) } /// Structural check: does `callee` look like a Rust scoped identifier /// whose parent segment is a type (upper-camel-case)? /// /// Used by the zero-argument-allocator arm of `transfer_abstract` to /// recognise `Type::new` / `Type::default` / `Type::with_capacity` / /// `Type::empty`, and any user-defined associated allocator, as a /// fresh-allocation site without hard-coding the leaf name. The check /// is deliberately conservative: /// /// * Must contain at least one `::` separator. /// * The segment *before* the final leaf must start with an ASCII /// upper-case letter and contain only ASCII alphanumeric / `_` /// characters, Rust's grammar for type identifiers. (Module-only /// paths like `std::env` don't qualify; the gate fires only on /// type paths like `String::new`.) /// /// Returns `false` on empty input or bare function calls. fn has_typeprefix_upper_scoped(callee: &str) -> bool { // `peel_identity_suffix` strips trailing `.unwrap()` etc. so // `String::new.unwrap` normalises to `String::new`. Fallback to the // raw callee when peeling produces an empty string. let normalised = crate::ssa::type_facts::peel_identity_suffix(callee); let normalised = if normalised.is_empty() { callee } else { normalised.as_str() }; let mut segments: smallvec::SmallVec<[&str; 4]> = normalised.split("::").filter(|s| !s.is_empty()).collect(); if segments.len() < 2 { return false; } // Drop trailing method-style `.ident` noise from the leaf segment. if let Some(leaf) = segments.last_mut() { if let Some(dot_idx) = leaf.find('.') { *leaf = &leaf[..dot_idx]; } } // Parent is the second-to-last segment. let parent = segments[segments.len() - 2]; let Some(first) = parent.chars().next() else { return false; }; if !first.is_ascii_uppercase() { return false; } parent .chars() .all(|c| c.is_ascii_alphanumeric() || c == '_') } /// True when `info` carries any [`DataLabel::Source`] label. /// /// Guards the zero-argument-allocator arm of `transfer_abstract` against /// mis-classifying external-input readers (e.g. an environment-variable /// getter that happens to have a scoped upper-camel-case parent /// segment) as empty allocations. fn has_source_label_on_node(info: &NodeInfo) -> bool { info.taint .labels .iter() .any(|l| matches!(l, DataLabel::Source(_))) } /// Check if a constant text is a string literal (quoted). fn is_string_const(text: &str) -> bool { (text.starts_with('"') && text.ends_with('"') && text.len() >= 2) || (text.starts_with('\'') && text.ends_with('\'') && text.len() >= 2) } /// Strip surrounding quotes from a string literal. fn strip_string_quotes(text: &str) -> String { if text.len() >= 2 && ((text.starts_with('"') && text.ends_with('"')) || (text.starts_with('\'') && text.ends_with('\''))) { text[1..text.len() - 1].to_string() } else { text.to_string() } } /// Collect events from a block. fn collect_block_events( block: &SsaBlock, cfg: &Cfg, ssa: &SsaBody, transfer: &SsaTaintTransfer, mut state: SsaTaintState, events: &mut Vec, induction_vars: &HashSet, pred_states: Option<&PredStates>, ) { // Replay phis to get accurate state (mirrors transfer_block phi handling) let block_idx = block.id.0 as usize; for phi in &block.phis { if let SsaOp::Phi(ref operands) = phi.op { let is_induction = induction_vars.contains(&phi.value); let mut combined_caps = Cap::empty(); let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); let mut all_tainted_validated = true; let mut any_tainted = false; for &(pred_blk, operand_val) in operands { // Skip back-edge operands for induction vars if is_induction && pred_blk.0 >= block.id.0 { continue; } // Use predecessor-specific state when available let operand_taint = if let Some(ps) = pred_states { ps.get(&(block_idx, pred_blk.0 as usize)) .and_then(|pred_st| pred_st.get(operand_val)) } else { None }; let operand_taint = operand_taint.or_else(|| state.get(operand_val)); if let Some(taint) = operand_taint { any_tainted = true; combined_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } // Path sensitivity: check if this operand is validated in predecessor if let Some(ps) = pred_states { if let Some(pred_st) = ps.get(&(block_idx, pred_blk.0 as usize)) { let var_name = ssa .value_defs .get(operand_val.0 as usize) .and_then(|vd| vd.var_name.as_deref()); if let Some(name) = var_name { if let Some(sym) = transfer.interner.get(name) { if !pred_st.validated_must.contains(sym) { all_tainted_validated = false; } } else { all_tainted_validated = false; } } else { all_tainted_validated = false; } } else { all_tainted_validated = false; } } else { all_tainted_validated = false; } } } if combined_caps.is_empty() { state.remove(phi.value); } else { state.set( phi.value, VarTaint { caps: combined_caps, origins: combined_origins, uses_summary: false, }, ); // Path sensitivity: if all tainted predecessors validated, propagate if any_tainted && all_tainted_validated { if let Some(name) = ssa .value_defs .get(phi.value.0 as usize) .and_then(|vd| vd.var_name.as_deref()) { if let Some(sym) = transfer.interner.get(name) { state.validated_may.insert(sym); state.validated_must.insert(sym); } } } } } } // Replay abstract value phi join (from predecessor exit states). // Mirrors the same logic in transfer_block(), without this, abstract // values for phi-defined SSA values would be stale during sink suppression. if state.abstract_state.is_some() { for phi in &block.phis { if let SsaOp::Phi(ref operands) = phi.op { use crate::abstract_interp::AbstractValue; let is_induction = induction_vars.contains(&phi.value); let mut joined = AbstractValue::bottom(); let mut any_operand = false; for &(pred_blk, operand_val) in operands { if is_induction && pred_blk.0 >= block.id.0 { continue; } // Skip infeasible predecessors if let Some(ps) = pred_states { if let Some(pred_st) = ps.get(&(block_idx, pred_blk.0 as usize)) { if pred_st.path_env.as_ref().is_some_and(|e| e.is_unsat()) { continue; } } } // Look up operand abstract value from predecessor exit state let pred_abs = pred_states .and_then(|ps| ps.get(&(block_idx, pred_blk.0 as usize))) .and_then(|s| s.abstract_state.as_ref()) .map(|a| a.get(operand_val)) .unwrap_or_else(AbstractValue::top); joined = joined.join(&pred_abs); any_operand = true; } if any_operand { if let Some(ref mut abs) = state.abstract_state { abs.set(phi.value, joined); } } } } } // Process body with sink detection for inst in &block.body { transfer_inst(inst, cfg, ssa, transfer, &mut state); // Check for sink let info = &cfg[inst.cfg_node]; if info.all_args_literal { continue; } // Parameterized SQL queries are safe, skip sink detection. if info.parameterized_query { continue; } let sink_info = resolve_sink_info(info, transfer); let mut sink_caps = sink_info.caps; // [detectors.data_exfil] enabled toggle. When the detector class is // disabled per-project, strip Cap::DATA_EXFIL from sink_caps so no // taint-data-exfiltration event is emitted regardless of which gate // would have fired. Strict-additive: defaults to enabled, no effect // for projects that don't opt in. if !crate::utils::detector_options::current().data_exfil.enabled { sink_caps &= !Cap::DATA_EXFIL; } // Receiver-type-incompatibility stripping. When the receiver's type // proves a structurally-attached cap cannot apply (e.g. an // `LdapClient` receiver carrying an `HTML_ESCAPE` Sink label that was // attached to the CFG node by a `*.send`/`*.json`-style suffix // matcher), drop the offending bits *before* the type-qualified- // resolution branch below, so that branch is reachable on the // remaining empty `sink_caps` and can re-anchor a precise sink class // (`LdapClient.search` → `Cap::LDAP_INJECTION`). Both the // flow-sensitive type from `path_env` and the static type from // `type_facts` are consulted; the static path is what enables // closure-captured receivers (parent body → child body via // [`crate::taint::inject_external_type_facts`]) to participate. if let SsaOp::Call { receiver: Some(rv), .. } = &inst.op { if let Some(ref env) = state.path_env { if let Some(kind) = env.get(*rv).types.as_singleton() { sink_caps &= !receiver_incompatible_sink_caps(&kind, sink_caps); } } if let Some(tf) = transfer.type_facts { if let Some(kind) = tf.get_type(*rv) { sink_caps &= !receiver_incompatible_sink_caps(kind, sink_caps); } } } // Type-qualified sink resolution: when normal sink resolution found nothing, // try using the receiver's inferred type to construct a qualified callee name. if sink_caps.is_empty() { if let SsaOp::Call { callee, receiver: Some(rv), .. } = &inst.op { if transfer.type_facts.is_some() || state.path_env.is_some() { let tq_labels = resolve_type_qualified_labels( callee, *rv, transfer.type_facts, state.path_env.as_ref(), transfer.lang, transfer.extra_labels, Some(ssa), ); for lbl in &tq_labels { if let DataLabel::Sink(bits) = lbl { sink_caps |= *bits; } } } } } // Module alias resolution: when the receiver was assigned from require() // of a known module (e.g., `const lib = require("http")`), substitute // the module name into the callee for label matching. // Example: `lib.request(url)` with lib→"http" tries "http.request". if sink_caps.is_empty() { if let SsaOp::Call { callee, receiver: Some(rv), .. } = &inst.op { if let Some(aliases) = transfer.module_aliases { if let Some(module_names) = aliases.get(rv) { if let Some(dot_pos) = callee.find('.') { let method = &callee[dot_pos + 1..]; let lang_str = transfer.lang.as_str(); for module_name in module_names { let qualified = format!("{}.{}", module_name, method); let labels = crate::labels::classify_all( lang_str, &qualified, transfer.extra_labels, ); for lbl in &labels { if let DataLabel::Sink(bits) = lbl { sink_caps |= *bits; } } } } } } } } // ADD XXE on opt-in. When the receiver was constructed // with an explicit external-entity opt-in // (`new XMLParser({ processEntities: true })`, // `lxml.etree.XMLParser(resolve_entities=True)`), the subsequent // `parser.parse(xml)` is an XXE flow even though the callee // carries no flat XXE rule (fast-xml-parser and lxml are // XXE-safe by default). Runs BEFORE the empty check below so a // previously-empty sink_caps becomes non-empty and downstream // emission proceeds. The complementary `xxe_safe` suppress path // still runs after this; a call where the receiver was both // opt-in AND later hardened by a setter results in net-zero // (suppress strips what we added). if let SsaOp::Call { receiver: Some(rv), callee: callee_str, .. } = &inst.op { if let Some(xc) = transfer.xml_parser_config { if xc.is_unsafe_explicit(*rv) { let suffix = callee_str .rsplit(['.', ':']) .next() .unwrap_or(callee_str.as_str()); // `feed` covers Python lxml incremental parsing // (`parser.feed(body); parser.close()`). if matches!(suffix, "parse" | "parseString" | "parseFromString" | "feed") { sink_caps |= Cap::XXE; } } } } if sink_caps.is_empty() { // Callback pattern: check if callee has source_to_callback and the // actual callback argument has a matching param_to_sink. if let SsaOp::Call { callee, .. } = &inst.op { let caller_func = info.ast.enclosing_func.as_deref().unwrap_or(""); // Use arg_uses.len() for arity (see transfer_inst's Call arm). if let Some(resolved) = resolve_callee_hinted( transfer, callee, caller_func, info.call.call_ordinal, Some(info.call.arg_uses.len()), ) { for &(cb_idx, src_caps) in &resolved.source_to_callback { let cb_name = info.arg_callees.get(cb_idx).and_then(|ac| ac.as_ref()); if let Some(cb_callee) = cb_name { // First try the standard summary-based resolution // path (covers user-defined functions and built-ins // that landed in label-derived summaries upstream). // If that yields no matching sink caps, fall back // to gated-sink classification on the callback // callee's name — gated sinks (e.g. // `child_process.exec` post-fix) carry their // payload positions in the gate, not in any // summary, and the callback pipeline still needs // those positions to pair source caps against // param_to_sink. let cb_resolved = resolve_callee(transfer, cb_callee, caller_func, 0); let mut matching_sink_caps = Cap::empty(); let cb_param_to_sink_sites: Vec<(usize, SmallVec<[SinkSite; 1]>)> = if let Some(ref r) = cb_resolved { matching_sink_caps = r .param_to_sink .iter() .filter(|(_, caps)| !(src_caps & *caps).is_empty()) .fold(Cap::empty(), |acc, (_, c)| acc | *c); r.param_to_sink_sites.clone() } else { vec![] }; if matching_sink_caps.is_empty() { // Gate-fallback: classify_gated_sink yields the // callback callee's payload positions + sink // caps directly when the name matches a gated // sink rule. let lang_str = transfer.lang.as_str(); let gates = crate::labels::classify_gated_sink( lang_str, cb_callee, |_| None, |_| None, |_| false, ); for gm in gates.iter() { if let DataLabel::Sink(bits) = gm.label { if !(src_caps & bits).is_empty() { matching_sink_caps |= bits; } } } } if !matching_sink_caps.is_empty() { let source_kind = crate::labels::infer_source_kind(src_caps, callee); let origin = TaintOrigin { node: inst.cfg_node, source_kind, source_span: None, }; // Pick callback-path sink sites. // The callback callee's `param_to_sink_sites` // drives attribution when available; cap-only // fallback yields `primary_sink_site = None`. let cb_tainted: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)> = vec![( inst.value, src_caps & matching_sink_caps, SmallVec::from_elem(origin, 1), )]; let cb_sites = pick_primary_sink_sites_from_resolved( matching_sink_caps, &cb_param_to_sink_sites, ); emit_ssa_taint_events( events, inst.cfg_node, cb_tainted, matching_sink_caps, false, None, true, cb_sites, ); } } } } } continue; } if sink_caps.is_empty() { continue; } // XXE config-fact suppression. A parse-class sink whose receiver // was provably hardened (`setFeature(FEATURE_SECURE_PROCESSING, // true)`, `setExpandEntityReferences(false)`, etc.) is not an XXE // flow. Drop the bit before downstream sink emission. Runs after // type-qualified resolution / module alias resolution so the XXE // bit added by `XmlParser.parse` resolution is visible here. if sink_caps.intersects(Cap::XXE) { if let SsaOp::Call { receiver: Some(rv), .. } = &inst.op { if let Some(xc) = transfer.xml_parser_config { if crate::ssa::xml_config::xxe_safe(Some(*rv), xc) { sink_caps &= !Cap::XXE; } } } } if sink_caps.is_empty() { continue; } // XPath resolver-binding suppression. An XPath `evaluate` / // `compile` sink whose receiver was provably bound to an // `XPathVariableResolver` is treated as parameterised and the // XPATH_INJECTION bit is stripped. Mirrors the XXE config-fact // shape above. Only fires when the receiver also carries // `TypeKind::XPathClient` (gates the suppression behind // type-fact disambiguation so a generic `obj.evaluate(...)` // matched as XPATH_INJECTION via name-only labelling does not // accidentally clear). if sink_caps.intersects(Cap::XPATH_INJECTION) { if let SsaOp::Call { receiver: Some(rv), .. } = &inst.op { if let Some(xpc) = transfer.xpath_config { let receiver_is_xpath = transfer .type_facts .and_then(|tf| tf.get_type(*rv)) .map(|kind| matches!(kind, crate::ssa::type_facts::TypeKind::XPathClient)) .unwrap_or(false); if receiver_is_xpath && crate::ssa::xpath_config::xpath_safe(Some(*rv), xpc) { sink_caps &= !Cap::XPATH_INJECTION; } } } } if sink_caps.is_empty() { continue; } // Prototype-pollution suppression (flow-sensitive). // `Object.create(null)` produces a `NullPrototypeObject`-typed // value; subscript writes to such an object cannot pollute // `Object.prototype` because there is no prototype chain. // Receiver SsaValue is read off the synthetic `__index_set__` // Call op; phi joins downgrade to `Unknown` via `TypeFact::meet` // so an if/else where only one branch initialises with // `Object.create(null)` keeps the PROTOTYPE_POLLUTION bit on // the unsafe path. if sink_caps.intersects(Cap::PROTOTYPE_POLLUTION) { if let SsaOp::Call { callee, receiver: Some(rv), .. } = &inst.op { if callee == "__index_set__" { let receiver_is_null_proto = transfer .type_facts .and_then(|tf| tf.get_type(*rv)) .map(|kind| { matches!(kind, crate::ssa::type_facts::TypeKind::NullPrototypeObject) }) .unwrap_or(false); if receiver_is_null_proto { sink_caps &= !Cap::PROTOTYPE_POLLUTION; } } } } if sink_caps.is_empty() { continue; } // Go interface satisfaction check. // For Go sinks that require http.ResponseWriter (e.g., fmt.Fprintf), // skip if the first argument's type is known to NOT satisfy the interface. if transfer.lang == Lang::Go { if let Some(ref env) = state.path_env { if let SsaOp::Call { args, .. } = &inst.op { if let Some(first_arg_vals) = args.first() { if let Some(&first_val) = first_arg_vals.first() { if let Some(kind) = env.get(first_val).types.as_singleton() { if crate::ssa::type_facts::GoInterfaceTable::definitely_not( &kind, "http.ResponseWriter", ) && sink_caps.intersects(Cap::HTML_ESCAPE) { sink_caps &= !Cap::HTML_ESCAPE; } } } } } } } if sink_caps.is_empty() { continue; } // Same-node Sanitizer subtraction. When the CFG node carries both // Sink and Sanitizer labels for overlapping caps, the shape-based // synthesis pattern used by Ruby AR safe-arg-0 detection // (`src/cfg/mod.rs`) and the Java JPA parameterised-execute chain , // the sanitizer reflexively dominates the sink and the cap should // not surface as a taint-flow finding. The SSA Call arm already // applies same-node sanitizer to the *return* value // (`return_bits &= !sanitizer_bits`); without this mirror at the // sink-detection site, the sink still fires on the call's own // arguments / receiver despite the sanitizer label. let same_node_sanitizer_caps = info.taint.labels.iter().fold(Cap::empty(), |acc, lbl| { if let DataLabel::Sanitizer(caps) = lbl { acc | *caps } else { acc } }); if !same_node_sanitizer_caps.is_empty() { sink_caps &= !same_node_sanitizer_caps; if sink_caps.is_empty() { continue; } } // Suppress known non-sink callees (e.g., System.out.println in Java) if let SsaOp::Call { callee, .. } = &inst.op { sink_caps = suppress_known_safe_callees(sink_caps, callee, transfer.lang); if sink_caps.is_empty() { continue; } } // Interprocedural sanitizer: subtract sanitizer caps from inner arg callees. // If an argument is wrapped in a call to a known sanitizer (e.g. // `os.system(sanitize(cmd))`), the sanitizer's caps reduce the effective // sink sensitivity so tainted data stripped by the inner call isn't flagged. for maybe_callee in &info.arg_callees { if let Some(inner_callee) = maybe_callee { let caller_func = info.ast.enclosing_func.as_deref().unwrap_or(""); if let Some(resolved) = resolve_callee(transfer, inner_callee, caller_func, 0) { sink_caps &= !resolved.sanitizer_caps; } else { // Fallback: check label classification (built-in + custom rules). // This handles sanitizers that have no function summary (e.g. // external libraries like `escapeHtml`, `DOMPurify.sanitize`). let lang_str = transfer.lang.as_str(); let labels = crate::labels::classify_all(lang_str, inner_callee, transfer.extra_labels); for lbl in &labels { if let DataLabel::Sanitizer(bits) = lbl { sink_caps &= !*bits; } } } } } if sink_caps.is_empty() { continue; } // SSA-level literal suppression: if all argument SSA values are known // constants (from const propagation), skip sink detection. // Only applies to non-Call instructions (Assign to a sink), for Call // instructions, the CFG-level `all_args_literal` check already handles // chained calls more accurately. if !matches!(inst.op, SsaOp::Call { .. }) { if let Some(const_values) = transfer.const_values { if all_args_const(inst, const_values) { continue; } } } // Type-aware sink filtering: suppress SQL injection for int-typed values. // Only applies to non-Call instructions to avoid interfering with // call-chain taint detection. if !matches!(inst.op, SsaOp::Call { .. }) { if let Some(type_facts) = transfer.type_facts { if is_type_safe_for_sink(inst, sink_caps, type_facts) { continue; } } } // Path-sensitive type-safe sink filtering. // Uses flow-sensitive type constraints from PathEnv (branch narrowing, // casts) to suppress sinks when all argument values are proven to have // non-injectable types (Int, Bool). if !matches!(inst.op, SsaOp::Call { .. }) { if let Some(ref env) = state.path_env { if is_path_type_safe_for_sink(inst, sink_caps, env) { continue; } } } // Abstract-domain-aware sink suppression. // Includes SSRF prefix locking and dual-gate (type + interval) for SQL/FILE_IO. if let Some(ref abs) = state.abstract_state { if is_abstract_safe_for_sink( inst, sink_caps, abs, transfer.type_facts, transfer.static_map, &state, ssa, cfg, ) { continue; } } // Call-site abstract suppression. if let SsaOp::Call { ref args, .. } = inst.op { if let Some(ref abs) = state.abstract_state { if is_call_abstract_safe( inst, args, sink_caps, abs, transfer.type_facts, transfer.static_map, &state, ssa, cfg, ) { continue; } } } // Per-gate-filter dispatch. When the call site carries multiple // gated-sink classes (e.g. `fetch` is both an SSRF gate on the URL // arg and a `DATA_EXFIL` gate on the body / headers / json arg), // each filter contributes its own sink-cap mask, payload positions, // and destination-uses. Iterating per-filter keeps cap attribution // exact: a body-only taint surfaces as a `DATA_EXFIL` event with no // SSRF bit, and vice versa. // // The single-filter / no-filter case takes one trip through the // loop with the legacy `(sink_caps, info.call.sink_payload_args, // info.call.destination_uses)` triple, preserving prior behavior // for every non-multi-gate site. // // Cross-file wrapper case: when the resolved callee summary carries // [`SinkInfo::param_to_gate_filters`] (the wrapper's body contains // an inner multi-gate sink whose per-position cap split was lifted // at extraction time), expand one filter pass per `(param_idx, // label_caps)` entry restricted to that single arg position. This // preserves SSRF-vs-DATA_EXFIL attribution across a // `fn forward(url, body) { fetch(url, {body}) }` wrapper that is // NOT itself a known gated sink. // // Params NOT covered by `param_to_gate_filters` retain coverage // via their `param_to_sink` entry, expanded per-position so the // emitted event's `sink_caps` reflects the param-specific cap // mask rather than the aggregate union. This matters for // wrappers that mix gated sinks with label-based sinks // (e.g. `fn dispatch(cmd, url) { execSync(cmd); fetch(url) }`), // where param 0 reaches a non-gated SHELL_ESCAPE sink and the // gate-filter list only carries the SSRF gate for param 1. let multi_gate = info.call.gate_filters.len() > 1; let summary_per_position = !multi_gate && !sink_info.param_to_gate_filters.is_empty(); type FilterEntry<'a> = (Cap, Option<&'a [usize]>, Option<&'a [String]>); // Per-position dispatch source for the summary-per-position branch. // First, every entry from `param_to_gate_filters` (cap-narrowed by // the inner gate); then, for any param_to_sink index NOT mentioned // in `param_to_gate_filters`, an entry using that param's // `param_to_sink` cap mask. struct PerPosEntry { idx: [usize; 1], caps: Cap, } let per_position_entries: Vec = if summary_per_position { let mut out: Vec = Vec::with_capacity(sink_info.param_to_gate_filters.len()); for (idx, caps) in &sink_info.param_to_gate_filters { out.push(PerPosEntry { idx: [*idx], caps: *caps, }); } for (idx, caps) in &sink_info.param_to_sink { if sink_info .param_to_gate_filters .iter() .any(|(i, _)| *i == *idx) { continue; } out.push(PerPosEntry { idx: [*idx], caps: *caps, }); } out } else { Vec::new() }; let filter_iter: smallvec::SmallVec<[FilterEntry<'_>; 2]> = if multi_gate { info.call .gate_filters .iter() .map(|f| { ( sink_caps & f.label_caps, Some(f.payload_args.as_slice()), f.destination_uses.as_deref(), ) }) .collect() } else if summary_per_position { per_position_entries .iter() .map(|e| (sink_caps & e.caps, Some(e.idx.as_slice()), None)) .collect() } else { smallvec::smallvec![(sink_caps, None, None)] }; for (filter_caps, positions_override, destination_override) in filter_iter { let mut filter_caps = filter_caps; // Per-filter destination allowlist for DATA_EXFIL. When this // filter would emit Cap::DATA_EXFIL and the call's destination // arg has a trusted static prefix (configured via // detectors.data_exfil.trusted_destinations), drop the bit // for this filter only. Other gates on the same call site // (notably SSRF) are unaffected. Mirrors the semantics of // is_call_data_exfil_destination_trusted but operates per-gate // so a multi-gate fetch site keeps SSRF attribution while // dropping DATA_EXFIL when the destination is trusted. if filter_caps.intersects(Cap::DATA_EXFIL) { if let SsaOp::Call { ref args, .. } = inst.op { if let Some(ref abs) = state.abstract_state { if is_call_data_exfil_destination_trusted(inst, args, abs, cfg) { filter_caps &= !Cap::DATA_EXFIL; } } } } if filter_caps.is_empty() { continue; } // Collect tainted SSA values that flow into this sink let tainted = collect_tainted_sink_values( inst, info, &state, filter_caps, ssa, transfer, &sink_info.param_to_sink, positions_override, destination_override, ); if tainted.is_empty() { continue; } // Compute all_validated: check if all tainted vars are validated let all_validated = tainted.iter().all(|(val, _, _)| { let var_name = ssa .value_defs .get(val.0 as usize) .and_then(|vd| vd.var_name.as_deref()); if let Some(name) = var_name { if let Some(sym) = transfer.interner.get(name) { return state.validated_may.contains(sym); } } false }); let guard_kind = if all_validated { Some(PredicateKind::ValidationCall) } else { None }; // Check if any tainted value's taint chain used summary resolution let any_uses_summary = tainted .iter() .any(|(val, _, _)| state.get(*val).is_some_and(|t| t.uses_summary)); // Pick primary sink sites (if any) from the resolved callee // summary. Multi-site cases emit one event per matching // [`SinkSite`] so each downstream Finding carries one attribution. let primary_sites = pick_primary_sink_sites( inst, &tainted, filter_caps, &sink_info.param_to_sink_sites, ); emit_ssa_taint_events( events, inst.cfg_node, tainted, filter_caps, all_validated, guard_kind, any_uses_summary, primary_sites, ); } } } // ── Primary sink-site attribution ─────────────────────────────────────── /// Pick primary [`SinkSite`]s for a summary-based sink event in the main /// sink-detection path. /// /// Filters `param_to_sink_sites` to entries whose: /// 1. `param_idx` appears in the call's positional `args` and contains one /// of the `tainted` SSA values (proves this site's parameter actually /// carried the tainted flow), AND /// 2. [`SinkSite`] carries resolved coordinates (`line != 0`, cap-only /// sites are ignored), AND /// 3. [`SinkSite::cap`] intersects `sink_caps` (the propagated cap mask). /// /// Returns the deduped list of matching sites (`dedup_key` identity). /// Empty ⇒ no primary attribution, caller emits a single event with /// `primary_sink_site = None`. fn pick_primary_sink_sites( inst: &SsaInst, tainted: &[(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)], sink_caps: Cap, param_to_sink_sites: &[(usize, SmallVec<[SinkSite; 1]>)], ) -> Vec { if param_to_sink_sites.is_empty() || tainted.is_empty() { return Vec::new(); } let SsaOp::Call { ref args, .. } = inst.op else { return Vec::new(); }; let mut out: Vec = Vec::new(); let mut seen: HashSet<(String, u32, u32, u32)> = HashSet::new(); for (param_idx, sites) in param_to_sink_sites { let Some(arg_vals) = args.get(*param_idx) else { continue; }; let carries_tainted = arg_vals .iter() .any(|v| tainted.iter().any(|(tv, _, _)| tv == v)); if !carries_tainted { continue; } for site in sites { if site.line == 0 { continue; } if (site.cap & sink_caps).is_empty() { continue; } let key = (site.file_rel.clone(), site.line, site.col, site.cap.bits()); if seen.insert(key) { out.push(site.clone()); } } } out } /// Pick primary [`SinkSite`]s for the callback-pattern path, where the /// tainted-arg positional mapping is not directly available (the callback /// callee is resolved separately from the outer call's `args`). Matches /// solely on cap intersection and coordinate resolution. fn pick_primary_sink_sites_from_resolved( sink_caps: Cap, param_to_sink_sites: &[(usize, SmallVec<[SinkSite; 1]>)], ) -> Vec { if param_to_sink_sites.is_empty() { return Vec::new(); } let mut out: Vec = Vec::new(); let mut seen: HashSet<(String, u32, u32, u32)> = HashSet::new(); for (_, sites) in param_to_sink_sites { for site in sites { if site.line == 0 { continue; } if (site.cap & sink_caps).is_empty() { continue; } let key = (site.file_rel.clone(), site.line, site.col, site.cap.bits()); if seen.insert(key) { out.push(site.clone()); } } } out } /// Emit one or more [`SsaTaintEvent`]s for a sink hit. /// /// Multi-primary collapse: when `primary_sites` contains more than one /// entry, one event is emitted per site so downstream findings each carry /// a single attribution. When `primary_sites` is empty, a single event /// is emitted with `primary_sink_site = None` (intra-procedural sinks, /// cap-only callee summaries, or label-based sinks). /// /// # Invariants enforced by debug_assert! /// /// Every [`SinkSite`] in `primary_sites` must have been filtered at the /// pick-site to satisfy: /// * `site.line != 0`, cap-only sites carry no primary attribution and /// must not reach the event stream. /// * `(site.cap & sink_caps).is_empty() == false`, the site's cap /// intersects the propagated cap mask (it's the dangerous-bit /// justification for the finding). /// /// Note: `uses_summary` intentionally does not gate `primary_sites` here. /// The taint-chain `uses_summary` flag tracks whether a callee summary /// propagated taint along the source→sink chain, whereas a primary /// [`SinkSite`] only requires that the *sink* itself was resolved via a /// callee summary, an intra-file source can still reach a cross-file /// sink, producing `uses_summary == false` alongside a populated primary. fn emit_ssa_taint_events( events: &mut Vec, sink_node: NodeIndex, tainted_values: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>, sink_caps: Cap, all_validated: bool, guard_kind: Option, uses_summary: bool, primary_sites: Vec, ) { // Data-integrity invariant: every surviving primary site carries // resolved coordinates and a cap that intersects `sink_caps`. This is // the contract the pick functions enforce; the assertion defends // against a future caller that builds `primary_sites` by hand. debug_assert!( primary_sites .iter() .all(|s| s.line != 0 && !(s.cap & sink_caps).is_empty()), "primary_sites must all carry resolved coordinates and cap ∩ sink_caps ≠ ∅", ); if primary_sites.is_empty() { events.push(SsaTaintEvent { sink_node, tainted_values, sink_caps, all_validated, guard_kind, uses_summary, primary_sink_site: None, }); return; } for site in primary_sites { events.push(SsaTaintEvent { sink_node, tainted_values: tainted_values.clone(), sink_caps, all_validated, guard_kind, uses_summary, primary_sink_site: Some(site), }); } } /// Collect taint from call arguments. /// /// `args` contains **positional arguments only**, the receiver is a separate /// channel and is passed via `receiver`. `propagating_params` indexes directly /// into `args` using callee positional-parameter indices (no receiver offset). /// /// When `propagating_params` is empty, taint is collected from the receiver /// (if any) and from all positional args. fn collect_args_taint( args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &SsaTaintState, propagating_params: &[usize], ) -> (Cap, SmallVec<[TaintOrigin; 2]>) { let mut combined_caps = Cap::empty(); let mut combined_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); if propagating_params.is_empty() { // Collect from all args + receiver if let Some(rv) = receiver { if let Some(taint) = state.get(*rv) { combined_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } } } for arg_vals in args { for &v in arg_vals { if let Some(taint) = state.get(v) { combined_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } } } } } else { // Collect only from propagating param positions. Positional only , // receiver-to-return propagation is handled by `receiver_to_return` on // the summary, not by this path. for ¶m_idx in propagating_params { if let Some(arg_vals) = args.get(param_idx) { for &v in arg_vals { if let Some(taint) = state.get(v) { combined_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut combined_origins, *orig); } } } } } } (combined_caps, combined_origins) } /// Strip a capability bit from every argument SSA value of a call. /// Used by the [`DataLabel::Sanitizer`] arm when the sanitizer covers /// [`Cap::UNAUTHORIZED_ID`], ownership/membership guards prove on /// inputs rather than the return value. Other caps and origins are /// untouched. /// Apply [`SsaFuncSummary::validated_params_to_return`] at a call site. /// /// For each parameter index `p` in `validated_params`, mark the /// `var_name` of every tainted SSA value at `args[p]` and the call's /// own result `inst.value` in the caller's `validated_must` / /// `validated_may` sets. Mirrors the symbol-keyed validation a direct /// `if (!regex.test(x)) throw` would set on the surviving branch. /// /// Sound because the callee summary records `validated_params_to_return` /// only when the param's `var_name` is in `validated_must` at *every* /// return block — a normal-returning call therefore proves the /// validating arm. No-op when no actual argument is tainted (avoids /// spuriously validating untouched names downstream). fn propagate_validated_params_to_return( inst: &SsaInst, args: &[SmallVec<[SsaValue; 2]>], ssa: &SsaBody, interner: &crate::state::symbol::SymbolInterner, state: &mut SsaTaintState, validated_params: &[usize], ) { let mark = |val: SsaValue, st: &mut SsaTaintState| { let Some(name) = ssa .value_defs .get(val.0 as usize) .and_then(|vd| vd.var_name.as_deref()) else { return; }; let Some(sym) = interner.get(name) else { return; }; st.validated_must.insert(sym); st.validated_may.insert(sym); }; let mut any_arg_tainted = false; for &p in validated_params { let Some(arg_vals) = args.get(p) else { continue; }; for &v in arg_vals { if state.get(v).is_some_and(|t| !t.caps.is_empty()) { any_arg_tainted = true; mark(v, state); } } } if any_arg_tainted { mark(inst.value, state); } } fn strip_cap_from_call_args( args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &mut SsaTaintState, cap: Cap, ) { let mut targets: SmallVec<[SsaValue; 8]> = SmallVec::new(); if let Some(rv) = receiver { targets.push(*rv); } for arg_vals in args { for &v in arg_vals { targets.push(v); } } for v in targets { if let Some(current) = state.get(v) { if !current.caps.contains(cap) { continue; } let mut updated = current.clone(); updated.caps &= !cap; state.set(v, updated); } } } /// Scoped libcurl special case: when `curl_easy_setopt(handle, CURLOPT_URL, value)` /// is called and `value` is tainted, propagate that taint to `handle`. /// /// Mirrors `TaintTransfer::try_curl_url_propagation` from `transfer.rs`. fn try_curl_url_propagation( inst: &SsaInst, info: &NodeInfo, args: &[SmallVec<[SsaValue; 2]>], state: &mut SsaTaintState, ) -> bool { if info.taint.defines.is_some() { return false; } let callee = match info.call.callee.as_deref() { Some(c) if c.ends_with("curl_easy_setopt") => c, _ => return false, }; if !info.taint.uses.iter().any(|u| u == "CURLOPT_URL") { return false; } // Identify handle and URL SSA values from args. // Layout: args[0]=handle, args[1]=CURLOPT_URL, args[2]=url_value // But the uses list determines which are which. We need handle = first use // that isn't the callee or CURLOPT_URL. // In SSA form, the args vec gives us positional access. // Handle is first arg, URL value is last arg (skip CURLOPT_URL constant). let handle_val = args.first().and_then(|a| a.first().copied()); let handle_val = match handle_val { Some(v) => v, None => return false, }; // Collect taint from all args except the handle (args[0]) let mut url_caps = Cap::empty(); let mut url_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for arg_vals in args.iter().skip(1) { for &v in arg_vals { if let Some(taint) = state.get(v) { url_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut url_origins, *orig); } } } } // Also check info.taint.uses for identifiers that aren't callee, handle, or CURLOPT_URL // in case arg_uses was empty and SSA lowering put all uses into a single group if url_caps.is_empty() { // Fallback: look at all used SSA values except handle let used = inst_use_values(inst); for v in used { if v == handle_val { continue; } if let Some(taint) = state.get(v) { url_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut url_origins, *orig); } } } } if url_caps.is_empty() { return false; } // Merge URL taint into handle (monotone: caps OR, origins union) match state.get(handle_val) { Some(existing) => { let mut merged = existing.clone(); merged.caps |= url_caps; for orig in &url_origins { push_origin_bounded(&mut merged.origins, *orig); } state.set(handle_val, merged); } None => { state.set( handle_val, VarTaint { caps: url_caps, origins: url_origins, uses_summary: false, }, ); } } // Also write the inst's own value as non-tainted (no defines on this node) let _ = callee; true } /// Resolve a container index SSA operand to a `HeapSlot`. /// /// Uses the current function's `const_values` (from `SsaTaintTransfer`) to /// determine whether the index is a provably non-negative integer constant /// within `MAX_TRACKED_INDICES`. /// /// - Intraprocedural: guaranteed, each function's own const propagation /// results are used. /// - Inline callee analysis (k=1): guaranteed, `inline_analyse_callee()` /// sets `const_values: Some(&callee_body.opt.const_values)` on the child /// transfer, so callee-local constants are resolved. /// - Unknown / non-integer / out-of-bounds: falls back to `HeapSlot::Elements`. fn resolve_container_index(index_val: SsaValue, transfer: &SsaTaintTransfer) -> HeapSlot { use crate::ssa::heap::MAX_TRACKED_INDICES; if let Some(cv) = transfer.const_values { if let Some(crate::ssa::const_prop::ConstLattice::Int(n)) = cv.get(&index_val) { if *n >= 0 && (*n as u64) < MAX_TRACKED_INDICES as u64 { return HeapSlot::Index(*n as u64); } } } HeapSlot::Elements } /// Resolve the `HeapSlot` for a container operation given its `index_arg`. /// /// When `index_arg` is `Some(idx_pos)`, applies `arg_offset` and resolves /// the SSA value from `args`. Otherwise returns `HeapSlot::Elements`. fn resolve_op_slot( index_arg: Option, arg_offset: usize, args: &[SmallVec<[SsaValue; 2]>], transfer: &SsaTaintTransfer, ) -> HeapSlot { if let Some(idx_pos) = index_arg { let effective = idx_pos + arg_offset; if let Some(arg_vals) = args.get(effective) { if let Some(&v) = arg_vals.first() { return resolve_container_index(v, transfer); } } } HeapSlot::Elements } /// Handle container operations: propagate taint between receiver and arguments. /// /// **Store** operations (push, append, set, add, insert, etc.): /// Merge value-argument taint into receiver SSA value. /// /// **Load** operations (pop, get, join, shift, values, etc.): /// Propagate receiver taint to the instruction's result value. /// /// Returns `true` if the operation was handled and the caller should skip /// default propagation. fn try_container_propagation( inst: &SsaInst, _info: &NodeInfo, args: &[SmallVec<[SsaValue; 2]>], receiver: &Option, state: &mut SsaTaintState, transfer: &SsaTaintTransfer, callee: &str, ssa: &SsaBody, ) -> bool { let lang = transfer.lang; use crate::ssa::pointsto::{ContainerOp, classify_container_op}; let op = match classify_container_op(callee, lang) { Some(op) => op, None => return false, }; // Resolve the container SSA value. // Languages with `Kind::CallMethod` (Java, Ruby, PHP, Rust, etc.) set // `receiver` explicitly. For languages like JS/TS where method calls are // `Kind::CallFn`, the receiver is embedded in the args. We find it by // looking for an SSA value whose var_name matches the receiver portion // of the dotted callee (e.g. "items" from "items.push"). let resolve_container = |recv: &Option| -> Option { if let Some(v) = *recv { return Some(v); } // Go append: no receiver, arg 0 is the slice if lang == Lang::Go { return args.first().and_then(|a| a.first().copied()); } // For dotted callees like "items.push", find the SSA value for "items" let dot_pos = callee.rfind('.')?; let receiver_name = &callee[..dot_pos]; // Search all arg groups for an SSA value with matching var_name for arg_group in args { for &v in arg_group { if let Some(def) = ssa.value_defs.get(v.0 as usize) { if def.var_name.as_deref() == Some(receiver_name) { return Some(v); } } } } None }; match op { ContainerOp::Store { value_args, index_arg, } => { let container_val = match resolve_container(receiver) { Some(v) => v, None => return false, }; // For Go `append`, args[0] is the slice itself and value args // follow at index 1. For method-style container ops the receiver // is a separate channel on `SsaOp::Call.receiver`, so `args` // contains positional arguments only. let arg_offset = if lang == Lang::Go && receiver.is_none() { 1usize } else { 0 }; // Resolve index argument to HeapSlot (Index(n) or Elements). let slot = resolve_op_slot(index_arg, arg_offset, args, transfer); // Collect taint from value argument(s) let mut val_caps = Cap::empty(); let mut val_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for &arg_idx in &value_args { let effective_idx = arg_idx + arg_offset; if let Some(arg_vals) = args.get(effective_idx) { for &v in arg_vals { if let Some(taint) = state.get(v) { val_caps |= taint.caps; for orig in &taint.origins { push_origin_bounded(&mut val_origins, *orig); } } } } } if val_caps.is_empty() { return true; // Container op handled, but no taint to propagate } // When points-to info available, store through heap objects if let Some(pts) = lookup_pts(transfer, container_val) { state.heap.store_set(&pts, slot, val_caps, &val_origins); // For Go append, result also points to same heap objects if lang == Lang::Go && receiver.is_none() { if let Some(ht) = state.heap.load_set(&pts, HeapSlot::Elements) { state.set( inst.value, VarTaint { caps: ht.caps, origins: ht.origins, uses_summary: false, }, ); } } return true; } // Fallback: direct SSA value taint (no pts info for this container) merge_taint_into(state, container_val, val_caps, &val_origins); // For Go append, the result is the new slice, propagate merged taint if lang == Lang::Go && receiver.is_none() { if let Some(merged) = state.get(container_val) { state.set(inst.value, merged.clone()); } } true } ContainerOp::Load { index_arg } => { let container_val = match resolve_container(receiver) { Some(v) => v, None => return false, }; // Resolve index argument to HeapSlot. // For Go container ops, args[0] is the container itself (value args // start at 1). For method-style calls the receiver is a separate // channel, so `args` holds positional arguments from index 0. let arg_offset = if lang == Lang::Go && receiver.is_none() { 1usize } else { 0 }; let slot = resolve_op_slot(index_arg, arg_offset, args, transfer); // When points-to info available, load from heap objects if let Some(pts) = lookup_pts(transfer, container_val) { if let Some(ht) = state.heap.load_set(&pts, slot) { state.set( inst.value, VarTaint { caps: ht.caps, origins: ht.origins, uses_summary: false, }, ); } return true; } // Fallback: direct SSA value taint if let Some(taint) = state.get(container_val) { state.set(inst.value, taint.clone()); } true } ContainerOp::Writeback { dest_arg } => { // Receiver carries the source taint (e.g. // `json.NewDecoder(r.Body).Decode(&dest)`, the decoder's // receiver chain is tainted by `r.Body`). Propagate that taint // into the call's destination argument so downstream sinks see // the flow through the decoded struct. // // Go method calls lower to `Kind::CallFn` with the receiver // implicit in the dotted callee text (`d.Decode`), there's no // explicit `receiver` channel and no slice-as-arg-0 convention // (unlike Go's `append`), so the existing `resolve_container` // helper either returns the wrong value or `None` here. Look // up the receiver SSA value by var-name from the callee prefix. // Detect a chained-call receiver shape (`a.b(c).d(e)`) where // the receiver of the writeback method is itself a call // expression, so its return value never gets a separate SSA // value and there is no `var_name` to look up. // // For `json.NewDecoder(r.Body).Decode(emoji)` the callee text // is `"json.NewDecoder(r.Body).Decode"`: parens appear inside // the dotted prefix. In that case we fall back to unioning // the taint of every implicit-arg group (the synth-source // bindings produced by `walk_chain_inner_call_args`) and // treating that as the receiver taint. let chain_shape = { let dot_pos = callee.rfind('.'); match dot_pos { Some(p) => callee[..p].contains('('), None => false, } }; let recv_val = if let Some(v) = *receiver { Some(v) } else if !chain_shape && let Some(dot_pos) = callee.rfind('.') { let recv_name = &callee[..dot_pos]; let mut found = None; 'outer: for arg_group in args { for &v in arg_group { if let Some(def) = ssa.value_defs.get(v.0 as usize) { if def.var_name.as_deref() == Some(recv_name) { found = Some(v); break 'outer; } } } } if found.is_none() { // Receiver isn't in args (Go CallFn). Search the SSA // body for a value whose var_name matches the receiver. for (idx, def) in ssa.value_defs.iter().enumerate() { if def.var_name.as_deref() == Some(recv_name) { found = Some(SsaValue(idx as u32)); break; } } } found } else { None }; let recv_taint = if let Some(v) = recv_val { if let Some(t) = state.get(v) { t.clone() } else if chain_shape { // Receiver SSA value found but carries no direct // taint, fall through to chain-shape arg union. let mut caps = Cap::empty(); let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for (idx, arg_group) in args.iter().enumerate() { if idx == dest_arg { continue; } for &v in arg_group { if let Some(t) = state.get(v) { caps |= t.caps; for orig in &t.origins { push_origin_bounded(&mut origins, *orig); } } } } if caps.is_empty() { return true; } VarTaint { caps, origins, uses_summary: false, } } else { return true; // claimed but receiver carries no taint } } else if chain_shape { // Sum taint across every arg group except the destination // arg. In the chained shape, synth-source bindings // emitted by `walk_chain_inner_call_args` land in the // implicit-arg slot (`info.taint.uses` → `args[N]`); the // dest_arg itself is the writeback's destination, never // a source. let mut caps = Cap::empty(); let mut origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for (idx, arg_group) in args.iter().enumerate() { if idx == dest_arg { continue; } for &v in arg_group { if let Some(t) = state.get(v) { caps |= t.caps; for orig in &t.origins { push_origin_bounded(&mut origins, *orig); } } } } if caps.is_empty() { return true; } VarTaint { caps, origins, uses_summary: false, } } else { if std::env::var("NYX_DEBUG_WRITEBACK").is_ok() { eprintln!(" writeback: no receiver SSA value for callee {callee:?}"); } return false; }; // For method-call form, the receiver is implicit in the callee // string and `args` holds positional args starting at 0. Taint // the destination at three layers: (1) the SSA-value level so // downstream uses of the pointer itself see taint; (2) the // heap-Elements slot via the simpler-tier `lookup_pts` channel; // and (3) the field-cell channel via `pointer_facts.pt(v)` with // [`FieldId::ELEM`] as a tainted-at-all-fields wildcard so // subsequent `dest.Field` projections (which read through the // higher-tier `pointer_facts.pt(receiver)` channel, see the // `SsaOp::FieldProj` arm) inherit the taint. Without (3), CVE // shapes like `json.NewDecoder(r.Body).Decode(&dest)` followed // by `os.Remove(filepath.Join(_, dest.Name))` left the dest // field channel empty even though the heap was tainted; the // [`FieldId::ELEM`] wildcard bridges the two channels and is // read back by the `FieldProj` arm's ELEM fallback. if let Some(arg_vals) = args.get(dest_arg) { for &v in arg_vals { merge_taint_into(state, v, recv_taint.caps, &recv_taint.origins); if let Some(pts) = lookup_pts(transfer, v) { state.heap.store_set( &pts, crate::ssa::heap::HeapSlot::Elements, recv_taint.caps, &recv_taint.origins, ); } if let Some(pf) = transfer.pointer_facts { let pt_arg = pf.pt(v); if !pt_arg.is_empty() && !pt_arg.is_top() { let cell_taint = recv_taint.clone(); for loc in pt_arg.iter() { let key = crate::taint::ssa_transfer::state::FieldTaintKey { loc, field: crate::ssa::ir::FieldId::ANY_FIELD, }; state.add_field(key, cell_taint.clone(), false, false); } } } } } true } } } /// Find the container receiver SSA value for a container operation. /// Reuses the same logic as `try_container_propagation`'s resolve_container. fn find_container_receiver( callee: &str, receiver: &Option, args: &[SmallVec<[SsaValue; 2]>], ssa: &SsaBody, lang: Lang, ) -> Option { if let Some(v) = *receiver { return Some(v); } if lang == Lang::Go { return args.first().and_then(|a| a.first().copied()); } let dot_pos = callee.rfind('.')?; let receiver_name = &callee[..dot_pos]; for arg_group in args { for &v in arg_group { if let Some(def) = ssa.value_defs.get(v.0 as usize) { if def.var_name.as_deref() == Some(receiver_name) { return Some(v); } } } } None } /// Look up points-to set for an SSA value, checking both the static /// pre-pass result and the dynamic inter-procedural set. fn lookup_pts(transfer: &SsaTaintTransfer, v: SsaValue) -> Option { if let Some(pts_result) = transfer.points_to { if let Some(pts) = pts_result.get(v) { return Some(pts.clone()); } } if let Some(dyn_ref) = transfer.dynamic_pts { if let Some(pts) = dyn_ref.borrow().get(&v) { return Some(pts.clone()); } } None } /// Merge taint caps and origins into an existing SSA value's taint (monotone). fn merge_taint_into( state: &mut SsaTaintState, target: SsaValue, caps: Cap, origins: &SmallVec<[TaintOrigin; 2]>, ) { match state.get(target) { Some(existing) => { let mut merged = existing.clone(); merged.caps |= caps; for orig in origins { push_origin_bounded(&mut merged.origins, *orig); } state.set(target, merged); } None => { state.set( target, VarTaint { caps, origins: origins.clone(), uses_summary: false, }, ); } } } /// Resolve sink caps from labels or callee summary. /// Resolved sink information: aggregate caps plus optional per-parameter detail. struct SinkInfo { caps: Cap, /// When non-empty, only these caller argument positions flow to sinks. /// Each entry is (param_index, per_param_sink_caps). /// Empty = check all arguments (label-based sinks, or no per-param info). param_to_sink: Vec<(usize, Cap)>, /// Per-parameter [`SinkSite`] records carried from the callee summary, /// mirroring `param_to_sink` by parameter index. Empty for label-based /// sinks and for cap-only summaries that do not retain source /// coordinates. Used to attribute findings to the dangerous /// callee-internal instruction. param_to_sink_sites: Vec<(usize, SmallVec<[SinkSite; 1]>)>, /// Per-parameter gate-filter cap masks lifted from the callee's /// inner multi-gate sink call sites. Mirrors /// [`crate::summary::ssa_summary::SsaFuncSummary::param_to_gate_filters`]. /// When non-empty, the dispatcher in [`collect_block_events`] /// expands one filter pass per `(param_idx, label_caps)` entry so /// a wrapper carrying multiple gate classes (e.g. SSRF on the URL /// arg + DATA_EXFIL on the body arg) attributes findings per cap /// instead of joining them. param_to_gate_filters: Vec<(usize, Cap)>, } fn resolve_sink_info(info: &NodeInfo, transfer: &SsaTaintTransfer) -> SinkInfo { let label_sink_caps = info.taint.labels.iter().fold(Cap::empty(), |acc, lbl| { if let DataLabel::Sink(caps) = lbl { acc | *caps } else { acc } }); if !label_sink_caps.is_empty() { return SinkInfo { caps: label_sink_caps, param_to_sink: vec![], param_to_sink_sites: vec![], param_to_gate_filters: vec![], }; } let caller_func = info.ast.enclosing_func.as_deref().unwrap_or(""); // The sink-label path needs an arity hint so we do not match a // same-name/different-arity overload in another namespace. // `arg_uses.len()` is the positional-argument count, the receiver is a // separate channel on `info.call.receiver`, not prepended to `arg_uses`. let arity_hint = if info.call.arg_uses.is_empty() { None } else { Some(info.call.arg_uses.len()) }; let primary = info.call.callee.as_ref().and_then(|c| { resolve_callee_hinted(transfer, c, caller_func, info.call.call_ordinal, arity_hint) }); if let Some(r) = primary.filter(|r| !r.sink_caps.is_empty()) { return SinkInfo { caps: r.sink_caps, param_to_sink: r.param_to_sink, param_to_sink_sites: r.param_to_sink_sites, param_to_gate_filters: r.param_to_gate_filters, }; } // Fallback: when first_member_label rebound `info.call.callee` to an // inner source path (e.g. `helper(req.body.uri)` → callee="req.body.uri", // outer_callee="helper"), the inner-name lookup misses the actual // wrapper's summary. Retry with `outer_callee` so the wrapper's // `param_to_sink` summary fires for cross-function sink propagation. // Strict-additive: only fires when the primary inner-callee resolution // produced no sink caps; any positive primary result wins. Motivated // by CVE-2025-64430. if let Some(oc) = info.call.outer_callee.as_ref() { if let Some(r) = resolve_callee_hinted( transfer, oc, caller_func, info.call.call_ordinal, arity_hint, ) .filter(|r| !r.sink_caps.is_empty()) { return SinkInfo { caps: r.sink_caps, param_to_sink: r.param_to_sink, param_to_sink_sites: r.param_to_sink_sites, param_to_gate_filters: r.param_to_gate_filters, }; } } SinkInfo { caps: Cap::empty(), param_to_sink: vec![], param_to_sink_sites: vec![], param_to_gate_filters: vec![], } } /// Collect tainted SSA values at a sink instruction. /// /// When `param_to_sink` is non-empty, only arguments at those positions are /// checked, enables per-parameter sink precision from cross-file summaries. /// /// `positions_override` and `destination_override`, when `Some`, supersede /// `info.call.sink_payload_args` and `info.call.destination_uses` for this /// call. Used by the multi-gate sink dispatch in [`collect_block_events`] /// to attribute taint per-cap when a callee carries several gates (e.g. /// `fetch` SSRF on the URL position vs `DATA_EXFIL` on the body position). #[allow(clippy::too_many_arguments)] fn collect_tainted_sink_values( inst: &SsaInst, info: &NodeInfo, state: &SsaTaintState, sink_caps: Cap, ssa: &SsaBody, transfer: &SsaTaintTransfer, param_to_sink: &[(usize, Cap)], positions_override: Option<&[usize]>, destination_override: Option<&[String]>, ) -> Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)> { let mut result = Vec::new(); // Helper: check heap taint for an SSA value that may point to container(s). // At sinks we use Elements to conservatively see all indexed taint. let check_heap_taint = |v: SsaValue, result: &mut Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>| { if let Some(pts) = lookup_pts(transfer, v) { if let Some(ht) = state.heap.load_set(&pts, HeapSlot::Elements) { let effective = ht.caps & sink_caps; if !effective.is_empty() && !result.iter().any(|&(rv, _, _)| rv == v) { result.push((v, ht.caps, ht.origins)); } } } }; // Collect SSA values used by this instruction let used_values = inst_use_values(inst); // Priority 1: gated sink filtering (CFG-level sink_payload_args, or a // multi-gate per-filter override). The position list indexes into // positional args (no receiver offset); the receiver is a separate // channel via `SsaOp::Call.receiver`. // // Destination-aware narrowing: when a destination filter is set, // restrict sink-taint checks to SSA values whose `var_name` matches one // of the listed destination field identifiers. This silences // `fetch({url: fixed, body: tainted})` while still firing on // `fetch({url: tainted, body: fixed})`. let positions: Option<&[usize]> = positions_override.or(info.call.sink_payload_args.as_deref()); let destination_filter: Option<&[String]> = destination_override.or(info.call.destination_uses.as_deref()); if let Some(positions) = positions { if let SsaOp::Call { args, .. } = &inst.op { for &pos in positions { if let Some(arg_vals) = args.get(pos) { for &v in arg_vals { if let Some(names) = destination_filter { // Only proceed when this SSA value corresponds to // a declared destination field identifier. let var_name = ssa.def_of(v).var_name.as_deref(); let matches = var_name.is_some_and(|vn| names.iter().any(|n| n == vn)); if !matches { continue; } } if let Some(taint) = state.get(v) { if (taint.caps & sink_caps) != Cap::empty() { result.push((v, taint.caps, taint.origins.clone())); } } check_heap_taint(v, &mut result); } } } apply_field_aware_suppression(&mut result, inst, info, state, sink_caps, ssa); return result; } } // Priority 2: summary-based per-parameter sink filtering. // `param_to_sink` indices refer to the callee's positional parameter // positions and map directly onto `args`. The receiver channel is // handled via `receiver_to_sink` in the summary. if !param_to_sink.is_empty() { if let SsaOp::Call { args, .. } = &inst.op { for &(param_idx, per_param_caps) in param_to_sink { let effective_caps = per_param_caps & sink_caps; if effective_caps.is_empty() { continue; } if let Some(arg_vals) = args.get(param_idx) { for &v in arg_vals { if let Some(taint) = state.get(v) { if (taint.caps & effective_caps) != Cap::empty() && !result.iter().any(|&(rv, _, _)| rv == v) { result.push((v, taint.caps, taint.origins.clone())); } } check_heap_taint(v, &mut result); } } } apply_field_aware_suppression(&mut result, inst, info, state, sink_caps, ssa); return result; } } // Priority 3: aggregate fallback, check all used values for v in used_values { if let Some(taint) = state.get(v) { if (taint.caps & sink_caps) != Cap::empty() { result.push((v, taint.caps, taint.origins.clone())); } } check_heap_taint(v, &mut result); } apply_field_aware_suppression(&mut result, inst, info, state, sink_caps, ssa); result } /// Suppress plain-ident taint when a dotted-path field value used by the same /// instruction is untainted. Prevents false positives from base-ident bleed /// (e.g. `obj.safe = "const"; sink(obj.safe)` where `obj` is tainted). fn apply_field_aware_suppression( result: &mut Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)>, inst: &SsaInst, info: &NodeInfo, state: &SsaTaintState, sink_caps: Cap, ssa: &SsaBody, ) { if result.is_empty() { return; } let all_used = inst_use_values(inst); result.retain(|(v, _, _)| { let Some(base) = ssa.def_of(*v).var_name.as_deref() else { return true; }; // Only suppress plain idents (no dots) if base.contains('.') { return true; } let prefix = format!("{}.", base); // Collect callee-like names to exclude from field suppression. // Method call expressions like "items.join" (from inner calls within // this node's arguments) should NOT be treated as field accesses. let callee_name = match &inst.op { SsaOp::Call { callee, .. } => Some(callee.as_str()), _ => None, }; // Collect all field values matching "base.X" (excluding method-call // expressions and the callee itself). // // Phantom Param ops with dotted var_names (e.g. `u.String` for the // method ref in `u.String()`) represent free-identifier references // hoisted by SSA lowering, not real data field accesses. Owncast // CVE-2023-3188 hit this: `http.DefaultClient.Get(u.String())` // includes both `u` (tainted) and `u.String` (untainted phantom) // as uses; treating `u.String` as a clean field of `u` suppressed // the SSRF. But JS object-field FP guards (e.g. // `db.query(obj.safeField)` with `obj.unsafeField` tainted) need // the opposite, `obj.safeField` is a real field access and SHOULD // count as a clean field. The CFG distinguishes the two via // `arg_callees`: when an argument expression is itself a call, its // callee text is recorded; pure member-access args leave the slot // `None`. Skip phantoms whose var_name appears as an arg_callee // (the Go case), keep phantoms representing field reads (the JS // case) so suppression still fires. let field_values: SmallVec<[SsaValue; 4]> = all_used .iter() .copied() .filter(|&u| { if u == *v { return false; } let uname = match ssa.def_of(u).var_name.as_deref() { Some(n) => n, None => return false, }; if !uname.starts_with(&prefix) { return false; } if callee_name.is_some_and(|cn| uname == cn) { return false; } if is_likely_method_expression(uname) { return false; } if is_phantom_param_value(u, ssa) && info.arg_callees.iter().any(|c| c.as_deref() == Some(uname)) { return false; } true }) .collect(); // Suppress base only if there ARE field values AND ALL of them // are untainted for the relevant sink caps. let all_fields_clean = !field_values.is_empty() && field_values.iter().all(|&u| match state.get(u) { None => true, Some(t) => (t.caps & sink_caps).is_empty(), }); !all_fields_clean }); } /// Check whether an SSA value is defined by a phantom `Param` op (a free /// identifier like `u.String` hoisted by SSA lowering, not a real positional /// parameter). Used by field-aware suppression to skip method/function /// references that share a base name with a tainted variable. fn is_phantom_param_value(v: SsaValue, ssa: &SsaBody) -> bool { let def = ssa.def_of(v); let block = &ssa.blocks[def.block.0 as usize]; block .phis .iter() .chain(block.body.iter()) .find(|inst| inst.value == v) .is_some_and(|inst| matches!(inst.op, SsaOp::Param { .. } | SsaOp::SelfParam)) } /// Check if a dotted var_name looks like a method call expression rather than /// a field access. E.g., "items.join" where "join" is a method name, vs /// "obj.data" which is a field access. /// /// Used by field-aware suppression to avoid treating method call expressions /// as untainted field accesses (which would incorrectly suppress base-ident taint). fn is_likely_method_expression(name: &str) -> bool { // Check if the dotted name matches any Call callee in the SSA body, // or if its suffix is a known function/method name. let suffix = name.rsplit('.').next().unwrap_or(name); // Common method names that are unlikely to be data field names. // This is a heuristic; it doesn't need to be exhaustive because // false negatives just mean slightly more conservative (no suppression). matches!( suffix, "push" | "pop" | "shift" | "unshift" | "join" | "split" | "concat" | "slice" | "splice" | "map" | "filter" | "reduce" | "forEach" | "find" | "some" | "every" | "get" | "set" | "has" | "delete" | "add" | "remove" | "clear" | "keys" | "values" | "entries" | "toString" | "valueOf" | "send" | "write" | "end" | "render" | "redirect" | "append" | "extend" | "insert" | "update" | "items" | "call" | "apply" | "bind" | "then" | "catch" | "trim" | "replace" | "match" | "search" | "test" | "log" | "warn" | "error" | "info" | "debug" | "execute" | "query" | "fetch" | "request" ) } /// Get all SSA values used by an instruction. fn inst_use_values(inst: &SsaInst) -> Vec { match &inst.op { SsaOp::Phi(operands) => operands.iter().map(|(_, v)| *v).collect(), SsaOp::Assign(uses) => uses.to_vec(), SsaOp::Call { args, receiver, .. } => { let mut vals = Vec::new(); if let Some(rv) = receiver { vals.push(*rv); } for arg in args { vals.extend(arg.iter()); } vals } SsaOp::FieldProj { receiver, .. } => vec![*receiver], SsaOp::Source | SsaOp::Const(_) | SsaOp::Param { .. } | SsaOp::SelfParam | SsaOp::CatchParam | SsaOp::Nop | SsaOp::Undef => Vec::new(), } } // ── Alias-Aware Sanitization ──────────────────────────────────────────── /// After sanitizing `inst`, propagate the sanitization to must-aliased field paths. /// /// When `alias.data` is sanitized and `alias` and `obj` are base aliases (from /// copy propagation), this function also sanitizes `obj.data` in the taint state. /// For plain idents (no dot), sanitizing `alias` also sanitizes `obj`. fn propagate_sanitization_to_aliases( inst: &SsaInst, state: &mut SsaTaintState, sanitizer_bits: Cap, aliases: &crate::ssa::alias::BaseAliasResult, ssa: &SsaBody, ) { let var_name = match inst.var_name.as_deref() { Some(n) => n, None => return, }; // Split into base and suffix: "alias.data" → ("alias", ".data"); "alias" → ("alias", "") let (base, suffix) = match var_name.find('.') { Some(pos) => (&var_name[..pos], &var_name[pos..]), None => (var_name, ""), }; let alias_bases = match aliases.aliases_of(base) { Some(bases) => bases, None => return, }; // Collect SsaValues to sanitize (avoid borrowing state while iterating). let to_sanitize: SmallVec<[SsaValue; 8]> = state .values .iter() .filter_map(|&(v, ref t)| { if t.caps.is_empty() { return None; } let vdef_name = ssa.value_defs.get(v.0 as usize)?.var_name.as_deref()?; // For each alias base, check if the value's var_name matches // the aliased field path. for alias_base in alias_bases { if alias_base == base { continue; // skip self, already sanitized } let target = if suffix.is_empty() { // Plain ident: look for exact match on alias base alias_base.as_str() } else { // Can't construct target without allocation; check inline "" }; if suffix.is_empty() { if vdef_name == target { return Some(v); } } else { // Dotted path: check if vdef_name == "{alias_base}{suffix}" if vdef_name.len() == alias_base.len() + suffix.len() && vdef_name.starts_with(alias_base.as_str()) && vdef_name.ends_with(suffix) { return Some(v); } } } None }) .collect(); for v in to_sanitize { if let Some(taint) = state.get(v) { let new_caps = taint.caps & !sanitizer_bits; if new_caps.is_empty() { state.remove(v); } else { state.set( v, VarTaint { caps: new_caps, origins: taint.origins.clone(), uses_summary: taint.uses_summary, }, ); } } } } // ── Alias-Aware Taint Propagation ─────────────────────────────────────── /// After taint assignment to `inst`, propagate taint to must-aliased field paths. /// /// When `obj.data` receives taint and `obj` and `alias` are base aliases (from /// copy propagation), this function also taints `alias.data` in the taint state. /// For plain idents (no dot), tainting `obj` also taints `alias`. /// /// Uses only the existing `BaseAliasResult` alias groups, no new alias inference. fn propagate_taint_to_aliases( inst: &SsaInst, state: &mut SsaTaintState, taint_caps: Cap, taint_origins: &SmallVec<[TaintOrigin; 2]>, aliases: &crate::ssa::alias::BaseAliasResult, ssa: &SsaBody, ) { let var_name = match inst.var_name.as_deref() { Some(n) => n, None => return, }; // Split into base and suffix: "obj.data" → ("obj", ".data"); "obj" → ("obj", "") let (base, suffix) = match var_name.find('.') { Some(pos) => (&var_name[..pos], &var_name[pos..]), None => (var_name, ""), }; let alias_bases = match aliases.aliases_of(base) { Some(bases) => bases, None => return, }; // Collect SsaValues to taint. Iterate value_defs (not state.values) because // target alias values may not yet be in the taint state. let to_taint: SmallVec<[SsaValue; 8]> = ssa .value_defs .iter() .enumerate() .filter_map(|(idx, vdef)| { let vdef_name = vdef.var_name.as_deref()?; for alias_base in alias_bases { if alias_base == base { continue; // skip self, already tainted } if suffix.is_empty() { // Plain ident: look for exact match on alias base if vdef_name == alias_base.as_str() { return Some(SsaValue(idx as u32)); } } else { // Dotted path: check if vdef_name == "{alias_base}{suffix}" if vdef_name.len() == alias_base.len() + suffix.len() && vdef_name.starts_with(alias_base.as_str()) && vdef_name.ends_with(suffix) { return Some(SsaValue(idx as u32)); } } } None }) .collect(); for v in to_taint { if let Some(existing) = state.get(v) { // Union caps and origins into existing taint let merged_caps = existing.caps | taint_caps; let mut merged_origins = existing.origins.clone(); for orig in taint_origins { push_origin_bounded(&mut merged_origins, *orig); } state.set( v, VarTaint { caps: merged_caps, origins: merged_origins, uses_summary: existing.uses_summary, }, ); } else { // No existing taint, set fresh state.set( v, VarTaint { caps: taint_caps, origins: taint_origins.clone(), uses_summary: false, }, ); } } } // ── SSA-Level Precision Helpers ────────────────────────────────────────── /// Check if all argument SSA values of a call instruction are known constants. fn all_args_const( inst: &SsaInst, const_values: &HashMap, ) -> bool { let used = inst_use_values(inst); if used.is_empty() { return false; // no args → not a call or nothing to suppress } used.iter().all(|v| { matches!( const_values.get(v), Some( crate::ssa::const_prop::ConstLattice::Str(_) | crate::ssa::const_prop::ConstLattice::Int(_) | crate::ssa::const_prop::ConstLattice::Bool(_) | crate::ssa::const_prop::ConstLattice::Null ) ) }) } /// Try to resolve a callee using the receiver's inferred type. /// /// When the callee string is `"client.send"` and the receiver SSA value is typed /// as `HttpClient`, constructs `"HttpClient.send"` and checks label rules. /// Returns the matched labels (source/sanitizer/sink) if any. /// /// Resolution order: /// 1. Static type from [`TypeFactResult`] (constructor/const inference) /// 2. Flow-sensitive type from [`PathEnv`] (branch narrowing, casts) fn resolve_type_qualified_labels( callee: &str, receiver: SsaValue, type_facts: Option<&crate::ssa::type_facts::TypeFactResult>, path_env: Option<&constraint::PathEnv>, lang: Lang, extra_labels: Option<&[crate::labels::RuntimeLabelRule]>, ssa: Option<&SsaBody>, ) -> SmallVec<[DataLabel; 2]> { // Candidate method names: the last segment after `.`, plus segments peeled // back through trailing identity-preserving methods (`unwrap`, `expect`, // `await`, etc.). For chain text like `conn.execute(&sql, []).unwrap` the // direct last segment is `unwrap`; the real sink verb is `execute`. // `normalize_chained_call_for_classify` strips paren groups; the walk // peels back through identity methods. let method_candidates = method_candidates_from_chain(callee, lang); // Receiver candidates: the immediate SSA receiver, plus any ancestor // reached by walking back through intermediate `SsaOp::Call.receiver` // chains (Rust parses `conn.execute(&sql, []).unwrap()` as one outer // call whose receiver is another call, and so on). We stop once we find // a typed value or run out of receivers. let receiver_candidates = receiver_candidates_for_type_lookup(receiver, ssa, lang); // 1. Try static type first (existing behavior) if let Some(tf) = type_facts { for rv in &receiver_candidates { if let Some(receiver_type) = tf.get_type(*rv) { if let Some(prefix) = receiver_type.label_prefix() { for method in &method_candidates { let qualified = format!("{}.{}", prefix, method); let labels = crate::labels::classify_all(lang.as_str(), &qualified, extra_labels); if !labels.is_empty() { return labels; } } } } } } // 2. Try flow-sensitive type from PathEnv if let Some(env) = path_env { for rv in &receiver_candidates { let types = env.get(*rv).types; if let Some(kind) = types.as_singleton() { if let Some(prefix) = kind.label_prefix() { for method in &method_candidates { let qualified = format!("{}.{}", prefix, method); let labels = crate::labels::classify_all(lang.as_str(), &qualified, extra_labels); if !labels.is_empty() { return labels; } } } } } } SmallVec::new() } /// Walk back through `SsaOp::Call.receiver` and `SsaOp::FieldProj.receiver` /// chains to collect candidate SSA values for type-fact lookup. /// /// Two motivating shapes: /// - Rust chained methods: `conn.execute(x).unwrap()` is one outer call /// whose receiver is itself a call. The stable base identifier /// (`conn`) is several `Call.receiver` hops up. /// - `FieldProj` decomposition: `c.client.send(req)` lowers through /// `v_client = FieldProj(v_c, "client")`, so the typed root (`c`) /// sits one `FieldProj.receiver` hop above `v_client`. /// /// FieldProj walking runs for every language. Call-receiver walking is /// Rust-only, other languages handle method nesting at AST level. fn receiver_candidates_for_type_lookup( start: SsaValue, ssa: Option<&SsaBody>, lang: Lang, ) -> SmallVec<[SsaValue; 4]> { let mut out: SmallVec<[SsaValue; 4]> = SmallVec::new(); out.push(start); let Some(body) = ssa else { return out; }; let mut current = start; for _ in 0..8 { // Find the instruction defining `current`. let mut next_receiver: Option = None; 'scan: for block in &body.blocks { for inst in block.phis.iter().chain(block.body.iter()) { if inst.value == current { match &inst.op { // FieldProj receiver chain, universal. SsaOp::FieldProj { receiver, .. } => { next_receiver = Some(*receiver); } // Rust-only: chain through nested Call receivers // (`conn.execute(x).unwrap()` parsed as one outer call). SsaOp::Call { receiver: Some(rv), .. } if matches!(lang, Lang::Rust) => { next_receiver = Some(*rv); } _ => {} } break 'scan; } } } match next_receiver { Some(rv) if !out.contains(&rv) => { out.push(rv); current = rv; } _ => break, } } out } /// Extract candidate method names from a chained-call callee text. /// /// Tree-sitter constructs `a.foo(x).bar()` as nested method-call nodes. The /// CFG records the outermost callee text (here `a.foo(x).bar`), which means /// the last `.`-segment is the terminal method (`bar`). When the terminal /// is an identity-preserving method (`.unwrap()`, `.expect()`, `.await`, /// `.clone()`, etc.), the *real* sink verb is the preceding segment. This /// helper walks back through identity methods to return all plausible /// terminals in priority order (most-specific first). fn method_candidates_from_chain(callee: &str, lang: Lang) -> SmallVec<[String; 4]> { let mut out: SmallVec<[String; 4]> = SmallVec::new(); // Normalize: strip `(...)` groups so we index into `.`-segments directly. // Use the same normalization used for label classification so this mirrors // matcher behavior. let normalized = crate::labels::normalize_chained_call_for_classify(callee); let segments: Vec<&str> = normalized.split('.').collect(); if segments.is_empty() { return out; } // Walk from the end, peeling identity methods. let mut i = segments.len(); while i > 0 { let seg = segments[i - 1]; if !seg.is_empty() { out.push(seg.to_string()); } if matches!(lang, Lang::Rust) && crate::ssa::type_facts::is_identity_method(seg) { i -= 1; continue; } break; } out } /// Suppress sinks from known non-sink callees (e.g., `System.out.println` in Java). /// /// These are callees whose suffix matches a broad sink rule but whose /// receiver is known to be safe (console output, not HTTP response). fn suppress_known_safe_callees(sink_caps: Cap, callee: &str, lang: Lang) -> Cap { match lang { Lang::Java => { if callee.starts_with("System.out.") || callee.starts_with("System.err.") { sink_caps & !Cap::HTML_ESCAPE } else { sink_caps } } _ => sink_caps, } } /// Check if a sink is type-safe (e.g., SQL injection or path traversal with int-typed argument). /// /// Suppresses findings when all argument values are known to be integer-typed, /// since integer values cannot carry SQL injection or path traversal payloads. /// Delegates to the shared [`crate::ssa::type_facts::is_type_safe_for_sink`] /// helper so the structural `cfg-unguarded-sink` analysis agrees on the /// suppression rule. fn is_type_safe_for_sink( inst: &SsaInst, sink_caps: Cap, type_facts: &crate::ssa::type_facts::TypeFactResult, ) -> bool { let used = inst_use_values(inst); crate::ssa::type_facts::is_type_safe_for_sink(&used, sink_caps, type_facts) } // ── Centralized Type-Sink Compatibility Helpers ────────────────────────── /// Check if a [`TypeKind`] is safe for a given sink capability. /// /// Returns `true` if the type cannot carry the payload required by the sink. /// Policy: Int/Bool values cannot carry injection payloads (SQL, code, path). /// String-typed values CAN carry injection payloads, casts to String do NOT /// make a value safe. fn type_safe_for_taint_sink(kind: &crate::ssa::type_facts::TypeKind, cap: Cap) -> bool { use crate::ssa::type_facts::TypeKind; match kind { TypeKind::Int | TypeKind::Bool => { cap.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::CODE_EXEC | Cap::SHELL_ESCAPE) } _ => false, } } /// Check if a receiver type is incompatible with a sink label's requirements. /// /// Returns the Cap bits that should be REMOVED because the receiver type /// proves the sink doesn't apply. For example, `HTML_ESCAPE` sinks require /// an HTTP-response-like receiver, if the receiver is known to be /// Int/Bool/String, `HTML_ESCAPE` doesn't apply. fn receiver_incompatible_sink_caps(kind: &crate::ssa::type_facts::TypeKind, sink_caps: Cap) -> Cap { use crate::ssa::type_facts::TypeKind; let mut remove = Cap::empty(); // HTML_ESCAPE / OPEN_REDIRECT / HEADER_INJECTION all require an HTTP // response-like receiver: each is a write-side rule that fires when // attacker data is rendered into / written onto the response stream // (`*.send` / `*.redirect` / `*.setHeader` / etc.). Receivers proven // to be a different class — directory-service connections (LDAP), // database connections, file handles, in-memory collections, query- // builder objects, URL values, HTTP clients (request-side), and so on // — cannot host these sinks even when a same-named matcher // (`*.send`, `*.set`, `*.append`) attaches the label by suffix. let response_like_caps = Cap::HTML_ESCAPE | Cap::OPEN_REDIRECT | Cap::HEADER_INJECTION; if sink_caps.intersects(response_like_caps) { match kind { TypeKind::HttpResponse => {} // compatible TypeKind::Unknown | TypeKind::Object => {} // could be response _ => { remove |= sink_caps & response_like_caps; } } } // LDAP_INJECTION strictly requires a directory-service receiver. // Non-LdapClient receivers carrying the cap by accident (e.g. a // generic `*.search` suffix matcher firing on a Vec/HashMap) get the // bit stripped. Unknown/Object stay untouched so type-fact gaps // don't silently drop real sinks. if sink_caps.intersects(Cap::LDAP_INJECTION) { match kind { TypeKind::LdapClient => {} // compatible TypeKind::Unknown | TypeKind::Object => {} // could be ldap _ => { remove |= Cap::LDAP_INJECTION; } } } // Injection sinks require string-like payload if type_safe_for_taint_sink(kind, sink_caps) { remove |= sink_caps & (Cap::SQL_QUERY | Cap::FILE_IO | Cap::CODE_EXEC); } remove } /// Check if all argument values of an instruction have types that are safe /// for the given sink (path-sensitive, via [`PathEnv`]). fn is_path_type_safe_for_sink(inst: &SsaInst, sink_caps: Cap, env: &constraint::PathEnv) -> bool { let type_suppressible = Cap::SQL_QUERY | Cap::FILE_IO | Cap::CODE_EXEC; if !sink_caps.intersects(type_suppressible) { return false; } let used = inst_use_values(inst); if used.is_empty() { return false; } used.iter().all(|v| match env.get(*v).types.as_singleton() { Some(ref kind) => type_safe_for_taint_sink(kind, sink_caps), None => false, // Multiple possible types → not safe }) } // ── Abstract-Domain Sink Suppression ──────────────────────────────────── /// Check if abstract domain facts prove a sink is safe. /// /// SSRF: string prefix with locked host. /// SQL_QUERY / FILE_IO: dual gate, type-proven Int AND bounded interval on all /// tainted leaf values. Traces back through Assign chains to find original /// tainted data (e.g., `parseInt(x)` inside `"SELECT ..." + parseInt(x) * 10`). /// /// NOTE: FILE_IO string prefix suppression intentionally omitted. /// A prefix like "/app/static/" does not prevent path traversal /// (e.g., "/app/static/../../etc/passwd"). The string domain cannot /// prove absence of "../" in the attacker-controlled suffix. fn is_abstract_safe_for_sink( inst: &SsaInst, sink_caps: Cap, abs: &AbstractState, type_facts: Option<&crate::ssa::type_facts::TypeFactResult>, static_map: Option<&crate::ssa::static_map::StaticMapResult>, state: &SsaTaintState, ssa: &SsaBody, cfg: &Cfg, ) -> bool { let used = inst_use_values(inst); if used.is_empty() { return false; } // SSRF, string prefix with locked host if sink_caps.intersects(Cap::SSRF) { // Inline template-literal prefix attached to the CFG node directly // (covers sinks whose URL is a template literal argument without an // intermediate Assign to seed the abstract domain). let node_info = &cfg[inst.cfg_node]; if let Some(prefix) = node_info.string_prefix.as_deref() { let synthetic = crate::abstract_interp::StringFact::from_prefix(prefix); if is_string_safe_for_ssrf(&synthetic) { return true; } } if used .iter() .all(|v| is_string_safe_for_ssrf(&abs.get(*v).string)) { return true; } } // DATA_EXFIL, destination allowlist via configured trusted prefixes. // Mirrors the SSRF prefix-lock above but consults the user-configured // [detectors.data_exfil] table's trusted_destinations key. Strict- // additive: when no destinations are configured this is a no-op. if sink_caps.intersects(Cap::DATA_EXFIL) && is_inst_data_exfil_destination_trusted(inst, abs, cfg) { return true; } // SHELL_ESCAPE, static-map finite-domain safety. When every tainted // payload value is proved by the static-HashMap-lookup analysis to come // from a bounded set of metacharacter-free literals, the call cannot // carry shell injection regardless of how the attacker influenced the // lookup key. Only fires when the value appears in `static_map.finite_ // string_values`, not for arbitrary single-literal exact facts, those // already have their own constant-argument suppression path and we // must not over-apply shell-safety to unrelated const-prop bare-string // artefacts (e.g. Python `commands = []`). if sink_caps.intersects(Cap::SHELL_ESCAPE) && is_static_map_shell_safe(&used, static_map) { return true; } // HTML_ESCAPE / FILE_IO type-only gate: an integer's decimal // representation is always digits (with optional leading `-`), which // never contain HTML metacharacters (`<`, `>`, `"`, `'`, `&`, `/`, // `:`) nor path metacharacters (`/`, `\`, `.`). Magnitude is // irrelevant — a large value doesn't introduce metachars, so both // sink classes use a type-only leaf check rather than the SQL/SHELL // dual gate below. Closes the sudo-rs RUSTSEC-2023-0069 patched FP // where `let uid: u32 = user.parse()?; path.push(uid.to_string())` // was flagged as a path-traversal FILE_IO sink despite the SSA // value being unambiguously typed as a numeric uid. if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) { if let Some(tf) = type_facts { let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg); if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) { return true; } } } // Dual gate: SQL_QUERY / SHELL_ESCAPE with proven Int type AND bounded // interval. Both conditions required: type proves the value IS an // integer (not a string that happened to parse), interval proves it's // bounded (not arbitrary). Traces through Assign chains so // "const_string + tainted_int" is caught. SQL_QUERY keeps the bound // requirement because RUSTSEC-2024-0363-style binary-protocol overflow // requires a 4 GiB+ payload; SHELL_ESCAPE keeps it because a // multi-line decimal can still trip newline-sensitive shell parsing. if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) { if let Some(tf) = type_facts { let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg); if !leaves.is_empty() && leaves .iter() .all(|v| tf.is_int(*v) && abs.get(*v).interval.is_proven_bounded()) { return true; } } } // PathFact gate: FILE_IO with every tainted leaf's PathFact // proving `dotdot = No && absolute = No`. Sanitisers documented in // rs-safe-0** (Rust `.contains("..")` rejection, `fs::canonicalize` // + `starts_with` guard, `Component::Normal` iterator filter) flow // through to the leaf values via PathFact; this check is the single // point at which the axis conjunction suppresses the sink. if sink_caps.intersects(Cap::FILE_IO) && is_path_safe_for_sink(inst, state, ssa, cfg, abs) { return true; } false } /// Check every tainted leaf flowing into `inst`'s used values carries a /// PathFact proving it cannot perform path traversal. /// /// Core gate for the rs-safe-0** FP closure plus the canonicalised+rooted /// shape (see [`PathFact::is_path_traversal_safe`]). Traces through /// Assign chains so `Path::new(sanitised)` still resolves to the /// sanitised string's fact. fn is_path_safe_for_sink( inst: &SsaInst, state: &SsaTaintState, ssa: &SsaBody, cfg: &Cfg, abs: &AbstractState, ) -> bool { let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg); if leaves.is_empty() { return false; } let safe = leaves .iter() .all(|v| abs.get(*v).path.is_path_traversal_safe()); if safe { // Publish the suppression to the file-level set so the // state-analysis pass can suppress `state-unauthed-access` on // the same sink, once the taint engine has proved the // user-controlled input cannot escape into a privileged // location, the auth concern is structurally reduced. let span = cfg[inst.cfg_node].ast.span; crate::taint::ssa_transfer::state::record_path_safe_suppressed_span(span); } safe } /// Check if call arguments prove a sink is safe via abstract domain. fn is_call_abstract_safe( inst: &SsaInst, args: &[SmallVec<[SsaValue; 2]>], sink_caps: Cap, abs: &AbstractState, type_facts: Option<&crate::ssa::type_facts::TypeFactResult>, static_map: Option<&crate::ssa::static_map::StaticMapResult>, state: &SsaTaintState, ssa: &SsaBody, cfg: &Cfg, ) -> bool { // SSRF, check if the URL argument (first arg) has a safe prefix. if sink_caps.intersects(Cap::SSRF) { // Inline template-literal prefix from the call AST itself // (e.g. `axios.get(\`https://host/…${x}\`)` has no intermediate Assign // to seed a StringFact, check the node-attached prefix directly). let node_info = &cfg[inst.cfg_node]; if let Some(prefix) = node_info.string_prefix.as_deref() { let synthetic = crate::abstract_interp::StringFact::from_prefix(prefix); if is_string_safe_for_ssrf(&synthetic) { return true; } } if let Some(first_arg) = args.first() { if !first_arg.is_empty() && first_arg .iter() .all(|v| is_string_safe_for_ssrf(&abs.get(*v).string)) { return true; } } } // DATA_EXFIL, destination-allowlist match. Mirrors the SSRF arm above // for the Call path. Strict-additive: a no-op when // detectors.data_exfil.trusted_destinations is empty. if sink_caps.intersects(Cap::DATA_EXFIL) && is_call_data_exfil_destination_trusted(inst, args, abs, cfg) { return true; } // SHELL_ESCAPE, static-map finite-domain safety on every non-empty arg // group. Mirrors the non-Call path so suppression fires regardless of // which branch the sink detector took. if sink_caps.intersects(Cap::SHELL_ESCAPE) && !args.is_empty() { let all_values: Vec = args.iter().flat_map(|g| g.iter().copied()).collect(); if !all_values.is_empty() && is_static_map_shell_safe(&all_values, static_map) { return true; } } // HTML_ESCAPE / FILE_IO type-only gate (same as non-Call path): digits // never contain HTML metacharacters or path-traversal metacharacters // regardless of magnitude, so an integer payload is safe for these // sink classes without requiring a bounded interval. Closes the // RUSTSEC-2023-0069 patched FP for cross-function summary-resolved // path sinks like `open_for_user(uid)`. if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) { if let Some(tf) = type_facts { let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg); if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) { return true; } } } // Dual gate for Call sinks: SQL_QUERY / SHELL_ESCAPE keep the bounded- // interval requirement (see is_abstract_safe_for_sink for the // rationale). if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) { if let Some(tf) = type_facts { let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg); if !leaves.is_empty() && leaves .iter() .all(|v| tf.is_int(*v) && abs.get(*v).interval.is_proven_bounded()) { return true; } } } // PathFact gate (Call path): mirrors non-Call suppression so the gate // fires regardless of which sink-detection branch produces the event. if sink_caps.intersects(Cap::FILE_IO) && is_path_safe_for_sink(inst, state, ssa, cfg, abs) { return true; } false } /// Maximum backwards trace depth through Assign chains. const MAX_TRACE_DEPTH: usize = 8; /// Trace backwards through Assign chains to find the leaf tainted SSA values. /// /// When a tainted value is a binary operation (e.g., string concatenation of /// `"SELECT ..." + offset`), the concat result is String-typed but the tainted /// operand (`offset`) may be Int-typed and bounded. This function finds those /// leaf tainted values so dual-gate suppression can check them directly. fn trace_tainted_leaf_values( inst: &SsaInst, state: &SsaTaintState, ssa: &SsaBody, cfg: &Cfg, ) -> SmallVec<[SsaValue; 4]> { let mut leaves = SmallVec::new(); let used = inst_use_values(inst); for &v in &used { if state.get(v).is_some() { trace_single_leaf(v, state, ssa, cfg, &mut leaves, 0); } } leaves } fn trace_single_leaf( v: SsaValue, state: &SsaTaintState, ssa: &SsaBody, cfg: &Cfg, leaves: &mut SmallVec<[SsaValue; 4]>, depth: usize, ) { if depth >= MAX_TRACE_DEPTH || leaves.len() >= 16 { leaves.push(v); return; } // Find the instruction defining v by scanning its block. let vd = &ssa.value_defs[v.0 as usize]; let block = &ssa.blocks[vd.block.0 as usize]; let inst = match block.body.iter().find(|i| i.value == v) { Some(i) => i, None => { // Phi or not found in body, treat as leaf leaves.push(v); return; } }; // Numeric-length reads (`arr.length`, `map.size`, `vec.len()`, ...) yield // an integer whose decimal representation cannot contain injection // metacharacters. Treat the result as a leaf so the dual-gate / HTML- // escape type check sees the Int-typed length value rather than tracing // through to the underlying container (which is typically String-typed // and would defeat suppression). if cfg .node_weight(inst.cfg_node) .is_some_and(|ni| ni.is_numeric_length_access) { leaves.push(v); return; } match &inst.op { SsaOp::Assign(uses) if uses.len() >= 2 => { // Numeric binary operations (bitwise, arithmetic except Add, comparisons) // always produce integers, treat the result as a leaf rather than tracing // through to the string-typed operands. Add is excluded because it may be // string concatenation. let bin_op = cfg.node_weight(inst.cfg_node).and_then(|ni| ni.bin_op); let is_numeric_op = matches!( bin_op, Some( crate::cfg::BinOp::Sub | crate::cfg::BinOp::Mul | crate::cfg::BinOp::Div | crate::cfg::BinOp::Mod | crate::cfg::BinOp::BitAnd | crate::cfg::BinOp::BitOr | crate::cfg::BinOp::BitXor | crate::cfg::BinOp::LeftShift | crate::cfg::BinOp::RightShift | crate::cfg::BinOp::Eq | crate::cfg::BinOp::NotEq | crate::cfg::BinOp::Lt | crate::cfg::BinOp::LtEq | crate::cfg::BinOp::Gt | crate::cfg::BinOp::GtEq ) ); if is_numeric_op { leaves.push(v); return; } let mut found = false; for &u in uses { if state.get(u).is_some() { trace_single_leaf(u, state, ssa, cfg, leaves, depth + 1); found = true; } } if !found { leaves.push(v); } } SsaOp::Call { callee, args, .. } if is_stringify_callee(callee) => { // String-producing conversion of already-bounded values. Trace // through the arguments so the dual-gate check sees the upstream // Int/bounded leaves. Examples: `x.to_string()`, `format!(...)`. let mut found = false; for arg in args { for &u in arg { if state.get(u).is_some() { trace_single_leaf(u, state, ssa, cfg, leaves, depth + 1); found = true; } } } if !found { leaves.push(v); } } SsaOp::Call { callee, .. } if crate::ssa::type_facts::is_int_producing_callee(callee) => { // Int-producing conversion (`str.parse::()`, `Atoi`, // `parseInt`, ...). Tracing past the Call would land on the // String-typed source and defeat the type-only HTML/FILE_IO // suppression below — but the Call's *result* is unambiguously // numeric, so the value itself is the right leaf. Mirrors the // is_numeric_length_access stop-leaf at the top of this fn. leaves.push(v); } SsaOp::Call { args, .. } => { // For a Call whose node is not itself a Source (so the Call // introduces no fresh attacker-controlled taint), trace through // the arguments to find the upstream tainted leaves. The Call's // return taint is a function of its args under this // classification, so the leaves are the Call's inputs. Source- // labeled Calls keep the default leaf behavior, tracing past // them would erase the Source and over-suppress. let is_source = cfg .node_weight(inst.cfg_node) .map(|ni| { ni.taint .labels .iter() .any(|l| matches!(l, crate::labels::DataLabel::Source(_))) }) .unwrap_or(false); // PathFact-proven sanitisation: when the abstract state has // recorded a non-Top [`PathFact`] on this Call's result , // typically because cross-function inline analysis narrowed // the return path's `dotdot` / `absolute` axis, the Call // is the *proof point*. Tracing past it would land on the // upstream source (whose PathFact is still Top) and defeat // the narrowing. Push the Call result as a leaf so // `is_path_safe_for_sink` reads the proven fact directly. // // Strictly additive, only fires when the abstract domain // proves a non-Top fact, so source-labeled Calls (already // caught above) and unrelated calls fall back to the // existing trace-through-args behaviour. let proves_path_safe = state.abstract_state.as_ref().is_some_and(|abs_state| { let f = abs_state.get(v).path; !f.is_top() && f.is_path_traversal_safe() }); if is_source || proves_path_safe { leaves.push(v); } else { let mut found = false; for arg in args { for &u in arg { if state.get(u).is_some() { trace_single_leaf(u, state, ssa, cfg, leaves, depth + 1); found = true; } } } if !found { leaves.push(v); } } } SsaOp::Assign(uses) if uses.len() == 1 => { // Single-use Assign: pass through to the source value's leaf. // Covers the common pattern where SSA lowering emits both a Call // form carrying a sink expression and an outer Assign that binds // the Call's value to the defined variable, without this, the // Assign's tracing stops at the wrapped Call (String-typed by // default) and loses the Int / bounded leaf already known through // the Call's args. let u = uses[0]; if state.get(u).is_some() { trace_single_leaf(u, state, ssa, cfg, leaves, depth + 1); } else { leaves.push(v); } } _ => { leaves.push(v); } } } /// Call verbs that convert a value to a String without introducing attacker- /// controlled metacharacters. Used by [`trace_single_leaf`] to peek past the /// String-typed result when the upstream value is Int/bounded. /// /// Normalizes the callee (strips `(…)` groups) and peels trailing identity /// methods so chains like `.to_string().as_str()` resolve correctly. fn is_stringify_callee(callee: &str) -> bool { let base = crate::ssa::type_facts::peel_identity_suffix(callee); let suffix = base.rsplit(['.', ':']).next().unwrap_or(&base); matches!( suffix, "to_string" | "to_owned" | "format" | "String" | "str" ) } /// Return `true` when every value in `values` was proved by the static-map /// analysis to be drawn from a finite set of metacharacter-free literals. /// Returns `false` when `static_map` is `None`, when any value is missing, /// or when any value's bounded set contains a shell metacharacter, the /// predicate is conservative, so a missing entry never suppresses. fn is_static_map_shell_safe( values: &[SsaValue], static_map: Option<&crate::ssa::static_map::StaticMapResult>, ) -> bool { let Some(sm) = static_map else { return false; }; if values.is_empty() { return false; } values.iter().all(|v| match sm.finite_string_values.get(v) { Some(set) if !set.is_empty() => set .iter() .all(|s| crate::abstract_interp::string_domain::is_shell_safe_literal(s)), _ => false, }) } /// `DATA_EXFIL` destination-allowlist match. /// /// Returns `true` when `prefix` (the proven static prefix of an outbound /// destination URL, sourced from either the abstract string domain or an /// inline literal seen by CFG) starts with one of the user-configured /// trusted destinations. Used by the abstract sink-suppression code to /// drop the [`Cap::DATA_EXFIL`] bit on legitimate forwarding pipelines /// (telemetry, internal APIs, analytics) without affecting other caps on /// the same call. /// /// Match semantics: a trusted destination entry is treated as a string /// prefix. An empty entry never matches (empty prefix would match /// every URL, which is never a useful allowlist). Entries should be /// origin-pinned (e.g. `https://api.internal/`) so partial-host /// collisions cannot occur. fn is_string_prefix_trusted_destination(prefix: &str, trusted: &[String]) -> bool { if prefix.is_empty() { return false; } trusted .iter() .any(|t| !t.is_empty() && prefix.starts_with(t.as_str())) } /// Check whether the call site's destination argument (positional arg 0) is /// a known trusted destination per /// [`crate::utils::detector_options::DataExfilDetectorOptions::trusted_destinations`]. /// /// Returns `true` when the URL argument has a static prefix matching one /// of the configured trusted entries. Three sources are consulted in /// order: /// /// 1. The CFG node's syntactic literal (`info.call.arg_string_literals[0]`), /// populated for any positional argument that is a syntactic string /// literal at the call site. Catches the common case /// `fetch('https://api.internal/...', {...})` whose URL never enters /// the abstract domain because it is not bound to an identifier. /// 2. The inline template-literal prefix attached to the call node /// directly (matches the SSRF prefix-lock fallback). /// 3. The abstract string-domain prefix of arg 0's SSA value group. /// Catches identifier-bound URLs like /// `let url = \`https://api.internal/${id}\`; fetch(url, {...})`. /// /// Returns `false` when no trusted destinations are configured. fn is_call_data_exfil_destination_trusted( inst: &SsaInst, args: &[SmallVec<[SsaValue; 2]>], abs: &AbstractState, cfg: &Cfg, ) -> bool { let opts = crate::utils::detector_options::current(); let trusted = &opts.data_exfil.trusted_destinations; if trusted.is_empty() { return false; } let node_info = &cfg[inst.cfg_node]; if let Some(Some(lit)) = node_info.call.arg_string_literals.first() { if is_string_prefix_trusted_destination(lit, trusted) { return true; } } if let Some(prefix) = node_info.string_prefix.as_deref() { if is_string_prefix_trusted_destination(prefix, trusted) { return true; } } if let Some(first_arg) = args.first() { if !first_arg.is_empty() && first_arg.iter().all(|v| { abs.get(*v) .string .prefix .as_deref() .is_some_and(|p| is_string_prefix_trusted_destination(p, trusted)) }) { return true; } } false } /// Non-Call variant of [`is_call_data_exfil_destination_trusted`]: used by /// [`is_abstract_safe_for_sink`] where the destination is read off the /// instruction's own used SSA values rather than a positional Call arg /// list. Falls back to the node-attached `string_prefix` when no abstract /// fact is available. fn is_inst_data_exfil_destination_trusted(inst: &SsaInst, abs: &AbstractState, cfg: &Cfg) -> bool { let opts = crate::utils::detector_options::current(); let trusted = &opts.data_exfil.trusted_destinations; if trusted.is_empty() { return false; } let node_info = &cfg[inst.cfg_node]; if let Some(prefix) = node_info.string_prefix.as_deref() { if is_string_prefix_trusted_destination(prefix, trusted) { return true; } } let used = inst_use_values(inst); if used.is_empty() { return false; } used.iter().all(|v| { abs.get(*v) .string .prefix .as_deref() .is_some_and(|p| is_string_prefix_trusted_destination(p, trusted)) }) } /// SSRF safety: prefix includes scheme + full host + path separator. /// /// Soundness: if the prefix contains `scheme://host/`, the attacker cannot /// control the destination host. They can only influence the path/query, /// which is not SSRF. fn is_string_safe_for_ssrf(sf: &crate::abstract_interp::StringFact) -> bool { let prefix = match &sf.prefix { Some(p) => p.as_str(), None => return false, }; // Absolute-path prefix (e.g. "/projects/..."), internal redirect, not open redirect. // The leading "/" locks the path to the same origin; the attacker cannot control the scheme // or host, so this is not an SSRF vector. if prefix.starts_with('/') { return true; } if let Some(after_scheme) = prefix.find("://") { let host_and_rest = &prefix[after_scheme + 3..]; if let Some(slash_pos) = host_and_rest.find('/') { return slash_pos > 0; // non-empty host + path separator } } false } /// Resolve a bare or qualified callee string to a local [`FuncKey`] by /// scanning `local_summaries` (already FuncKey-keyed). /// /// Resolution is deliberately identity-aware: /// /// 1. Filter by `(lang, namespace, name)`, these always participate in the /// identity hash, so the candidate set is guaranteed to be the /// same-file same-leaf-name definitions. /// 2. If `container_hint` is supplied (e.g. the `obj` in `obj.method`), /// narrow to candidates whose [`FuncKey::container`] matches. /// 3. If exactly one candidate remains, return its key. /// /// Returns `None` when zero or multiple candidates remain, callers should /// then fall through to their own ambiguity policy instead of accidentally /// picking an arbitrary definition. /// Split a raw callee string into a `(namespace_qualifier, receiver_var)` /// pair. /// /// * `"env::var"` → `(Some("env"), None)` /// * `"std::io::File::open"` → `(Some("File"), None)`, leaf's immediate /// container is kept so qualified lookup can match /// `File::open`. Deeper module prefixes are discarded here; the call /// graph's Rust-specific resolver handles full paths via the use map. /// * `"obj.method"` → `(None, Some("obj"))` /// * `"a.b.method"` → `(None, Some("b"))`, immediate object hop. /// * `"foo"` → `(None, None)` /// /// `::` is treated as a namespace separator and produces a /// `namespace_qualifier`; `.` is treated as a method receiver and /// produces a `receiver_var`. When both separators appear, the /// last-used one wins, matching the leaf-extraction rule in /// [`callee_leaf_name`]. fn split_qualifier(raw: &str) -> (Option<&str>, Option<&str>) { if let Some(pos) = raw.rfind("::") { let prefix = &raw[..pos]; let last = prefix.rsplit("::").next().unwrap_or(prefix); return (if last.is_empty() { None } else { Some(last) }, None); } if let Some(pos) = raw.rfind('.') { let prefix = &raw[..pos]; let last = prefix.rsplit('.').next().unwrap_or(prefix); return (None, if last.is_empty() { None } else { Some(last) }); } (None, None) } /// Look up the caller's own container by matching its name in /// `local_summaries`. Used so bare self-calls (`foo()` inside a class /// method) prefer same-class candidates over free functions. fn caller_container_for(transfer: &SsaTaintTransfer, caller_func: &str) -> Option { if caller_func.is_empty() { return None; } let mut containers: Vec<&str> = transfer .local_summaries .keys() .filter(|k| k.lang == transfer.lang && k.name == caller_func) .map(|k| k.container.as_str()) .filter(|c| !c.is_empty()) .collect(); containers.sort(); containers.dedup(); if containers.len() == 1 { Some(containers[0].to_string()) } else { None } } /// Query-based equivalent of [`resolve_local_func_key`]. /// /// Prefers `receiver_type` → `namespace_qualifier` → `caller_container` /// in that order before falling back to a uniqueness check on the leaf /// name. Keeps behaviour parity with the top-level resolver so /// intra-file lookups apply the same qualified-first policy. pub(crate) fn resolve_local_func_key_query( local_summaries: &FuncSummaries, q: &CalleeQuery<'_>, ) -> Option { let all: Vec<&FuncKey> = local_summaries .keys() .filter(|k| k.name == q.name && k.lang == q.caller_lang) .collect(); if all.is_empty() { return None; } let arity_matches = |k: &FuncKey| match q.arity { Some(a) => k.arity == Some(a), None => true, }; let pick_with_container = |container: &str| -> Option { if container.is_empty() { return None; } let narrowed: Vec<&FuncKey> = all .iter() .copied() .filter(|k| k.container == container) .filter(|k| arity_matches(k)) .collect(); if narrowed.len() == 1 { Some(narrowed[0].clone()) } else { None } }; if let Some(rt) = q.receiver_type { if let Some(k) = pick_with_container(rt) { return Some(k); } // Authoritative miss, do not silently pick a different container. return None; } if let Some(nq) = q.namespace_qualifier { if let Some(k) = pick_with_container(nq) { return Some(k); } } if let Some(cc) = q.caller_container { if let Some(k) = pick_with_container(cc) { return Some(k); } } let arity_filtered: Vec<&FuncKey> = all.iter().copied().filter(|k| arity_matches(k)).collect(); if arity_filtered.len() == 1 { return Some(arity_filtered[0].clone()); } if let Some(rv) = q.receiver_var { if let Some(k) = pick_with_container(rv) { return Some(k); } } // Bare-call free-function preference, mirrors // `GlobalSummaries::resolve_callee` step 5.5. When the call is // syntactically bare (no receiver, no namespace qualifier, no // authoritative receiver type) and exactly one arity-matched local // candidate is a free function (empty container), it is the // unambiguous target: class methods cannot be invoked with // bare-call syntax from outside their own class (self-calls are // handled by the `caller_container` branch above). if q.receiver_type.is_none() && q.namespace_qualifier.is_none() && q.receiver_var.is_none() { let empty: Vec<&FuncKey> = arity_filtered .iter() .copied() .filter(|k| k.container.is_empty()) .collect(); if empty.len() == 1 { return Some(empty[0].clone()); } } None } pub(crate) fn resolve_local_func_key( local_summaries: &FuncSummaries, lang: Lang, _namespace: &str, leaf_name: &str, container_hint: Option<&str>, ) -> Option { // `local_summaries` is file-local; every entry shares the same namespace // (raw file path from `build_cfg`). We do not filter by namespace here so // callers can pass whichever form they have (raw or normalized). let mut candidates: Vec<&FuncKey> = local_summaries .keys() .filter(|k| k.name == leaf_name && k.lang == lang) .collect(); if candidates.is_empty() { return None; } if candidates.len() > 1 { if let Some(container) = container_hint { let narrowed: Vec<&FuncKey> = candidates .iter() .copied() .filter(|k| k.container == container) .collect(); if narrowed.len() == 1 { return Some(narrowed[0].clone()); } candidates = narrowed; } } if candidates.len() == 1 { Some(candidates[0].clone()) } else { None } } // ── Callee Resolution (mirrors TaintTransfer::resolve_callee) ─────────── struct ResolvedSummary { source_caps: Cap, sanitizer_caps: Cap, sink_caps: Cap, /// Per-parameter sink caps: (param_index, caps). When non-empty, only /// arguments at these positions flow to internal sinks, enables positional /// and capability-aware filtering instead of aggregate-only detection. param_to_sink: Vec<(usize, Cap)>, /// Per-parameter [`SinkSite`] records mirroring `param_to_sink` by index. /// Populated when the underlying summary carried source-coordinate /// context (SSA and global `FuncSummary` paths). Empty for label, /// local-summary, and interop paths where no [`SinkSite`] was /// retained; in that case `param_to_sink` alone still drives sink /// detection. param_to_sink_sites: Vec<(usize, SmallVec<[SinkSite; 1]>)>, /// Per-parameter gate-filter cap masks lifted from the callee's /// inner multi-gate sink call sites. Mirrors /// [`crate::summary::ssa_summary::SsaFuncSummary::param_to_gate_filters`]. /// /// Each `(param_idx, label_caps)` entry says "this caller-side /// parameter flows to a callee-internal gated sink whose narrowed /// caps are `label_caps`". When non-empty, the multi-gate dispatch /// in [`collect_block_events`] expands one filter pass per entry so /// the emitted event's `sink_caps` reflect the gate-specific cap /// rather than the aggregate union, preserving SSRF-vs-DATA_EXFIL /// (and similar) attribution through wrapper functions. /// /// Empty for label, local-summary, FuncSummary, and interop paths, /// these forms do not retain per-gate cap detail. param_to_gate_filters: Vec<(usize, Cap)>, propagates_taint: bool, propagating_params: Vec, /// Parameter indices whose container identity flows to return value. param_container_to_return: Vec, /// (src_param, container_param) pairs: src taint stored into container. param_to_container_store: Vec<(usize, usize)>, /// Inferred return type from cross-file SSA summary. return_type: Option, /// Abstract domain fact for the return value. return_abstract: Option, /// Internal source taint flows to a call of parameter N with these caps. source_to_callback: Vec<(usize, Cap)>, /// How receiver (`self`/`this`) taint flows to the return value. /// Matches `SsaFuncSummary::receiver_to_return` semantics. #[allow(dead_code)] receiver_to_return: Option, /// Caps that receiver taint reaches at internal sinks. #[allow(dead_code)] receiver_to_sink: Cap, /// Per-parameter abstract-domain transfer channels. /// /// Populated only when the callee was resolved via an SSA summary /// (`convert_ssa_to_resolved`). The label, local-summary, interop /// and coarse `FuncSummary` paths carry `Vec::new()` because those /// forms do not record abstract-domain behaviour. Applied at the /// call site to synthesise an abstract return value from the /// caller's knowledge of each argument. abstract_transfer: Vec<(usize, crate::abstract_interp::AbstractTransfer)>, /// Per-parameter return-path decomposition. /// /// Populated only when the callee was resolved via an SSA summary /// and the summary carries ≥2 distinct return-path predicate gates. /// When present, summary application at the call site consults the /// caller's [`SsaTaintState::predicates`] and applies only entries /// whose predicate gate is consistent with the caller's validated /// set, recovering callee-internal path splits that the aggregate /// [`Self::sanitizer_caps`] / [`Self::propagating_params`] view /// otherwise erases. Empty for non-SSA resolution paths. param_return_paths: Vec<( usize, smallvec::SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>, )>, /// Parameter-granularity points-to summary. /// /// Populated only via `convert_ssa_to_resolved`; other resolution /// paths leave it empty (they do not derive alias edges). Empty / /// default means "no aliasing beyond what param_to_container_store /// already captures", the caller treats the call as a pure /// taint-through-signature edge. points_to: crate::summary::points_to::PointsToSummary, /// Field-granularity per-parameter points-to summary. Populated /// only via `convert_ssa_to_resolved` when the SSA summary carries /// `field_points_to` records. Applied at the caller call site by /// `apply_field_points_to_writes`. field_points_to: crate::summary::points_to::FieldPointsToSummary, /// Parameter indices whose taint flow to the return is fully /// validated by a dominating predicate inside the callee on every /// return path. Mirrors /// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`]. /// Populated only via `convert_ssa_to_resolved`; other resolution /// paths leave it empty (label / coarse-FuncSummary forms cannot /// express per-path predicate validation). validated_params_to_return: Vec, } fn resolve_callee( transfer: &SsaTaintTransfer, callee: &str, caller_func: &str, call_ordinal: u32, ) -> Option { resolve_callee_hinted(transfer, callee, caller_func, call_ordinal, None) } /// Like [`resolve_callee`] but accepts an `arity_hint` that narrows the /// candidate set to functions with a matching parameter count. /// /// Used by the call-graph / SSA-transfer paths when the caller knows the /// number of positional arguments at this site, this eliminates false /// resolution to same-name siblings with different arities (e.g. /// `encode(x)` vs `encode(x, opts)` in the same namespace). fn resolve_callee_hinted( transfer: &SsaTaintTransfer, callee: &str, caller_func: &str, call_ordinal: u32, arity_hint: Option, ) -> Option { resolve_callee_full( transfer, callee, caller_func, call_ordinal, arity_hint, None, ) } /// Like [`resolve_callee_hinted`] but accepts an authoritative /// `receiver_type` (class/impl name) derived from the SSA receiver /// value's [`TypeKind::label_prefix`]. When supplied, qualified /// lookup uses this name first and refuses to fall through to a /// leaf-name collision on miss (see /// [`GlobalSummaries::resolve_callee`] step 1). fn resolve_callee_typed( transfer: &SsaTaintTransfer, callee: &str, caller_func: &str, call_ordinal: u32, arity_hint: Option, receiver: Option, ) -> Option { let receiver_type = receiver_type_prefix(transfer, receiver); resolve_callee_full( transfer, callee, caller_func, call_ordinal, arity_hint, receiver_type, ) } /// Extract a qualified receiver-type name (e.g. `"HttpClient"`) for the /// SSA receiver value, when type facts can infer it. Returns `None` /// for built-in `Int`/`String`/unknown types that have no class prefix. fn receiver_type_prefix( transfer: &SsaTaintTransfer, receiver: Option, ) -> Option<&'static str> { let v = receiver?; let tf = transfer.type_facts?; let kind = tf.get_type(v)?; kind.label_prefix() } fn resolve_callee_full( transfer: &SsaTaintTransfer, callee: &str, caller_func: &str, call_ordinal: u32, arity_hint: Option, receiver_type: Option<&str>, ) -> Option { // Use leaf name for map/index lookups (FuncKey.name is always leaf). let normalized = callee_leaf_name(callee); // Split the raw callee into structured qualifier hints. A `::` // prefix is a namespace qualifier (authoritative-ish); a `.` // prefix is the syntactic receiver variable, which we treat as a // soft hint. let (namespace_qualifier, receiver_var) = split_qualifier(callee); // -2) Import alias resolution: if the callee matches an aliased import // (e.g. `fetchUserCmd` → `getInput` from `./source`), resolve using the // original exported name instead. This fires before all other resolution // so that downstream steps see the canonical symbol name. if let Some(bindings) = transfer.import_bindings { if let Some(binding) = bindings.get(normalized) { // Recursively resolve using the original name, preserving the // arity hint (the import alias does not change call arity). return resolve_callee_hinted( transfer, &binding.original, caller_func, call_ordinal, arity_hint, ); } } // -1) Callback resolution: if the callee name matches a parameter that was // bound to a specific function at the call site, resolve that function instead. if let Some(cb) = transfer.callback_bindings { if let Some(real_key) = cb.get(normalized) { // Try to resolve the actual function via FuncKey-keyed SSA summaries if let Some(ssa_sums) = transfer.ssa_summaries { if let Some(ssa_sum) = ssa_sums.get(real_key) { return Some(convert_ssa_to_resolved_for_caller( ssa_sum, Some(transfer.namespace), )); } } // Try local summaries (already FuncKey-keyed) if let Some(ls) = transfer.local_summaries.get(real_key) { return Some(ResolvedSummary { source_caps: ls.source_caps, sanitizer_caps: ls.sanitizer_caps, sink_caps: ls.sink_caps, param_to_sink: ls .tainted_sink_params .iter() .map(|&i| (i, ls.sink_caps)) .collect(), param_to_sink_sites: vec![], propagates_taint: !ls.propagating_params.is_empty(), propagating_params: ls.propagating_params.clone(), param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), param_to_gate_filters: vec![], validated_params_to_return: vec![], }); } // Try label classification for the bound function (by leaf name). // Consult both flat rules (`classify_all`) and gated sinks: a // callback bound to a gated sink (e.g. passing // `child_process.exec` directly as the callback) still needs to // surface its `Sink` capability so the source/callback pairing // logic can match `param_to_sink` against the caller's source. // The gate's `payload_args` translate directly into // `param_to_sink` index entries. let labels = crate::labels::classify_all( transfer.lang.as_str(), &real_key.name, transfer.extra_labels, ); let gate_matches = crate::labels::classify_gated_sink( transfer.lang.as_str(), &real_key.name, |_| None, |_| None, |_| false, ); if !labels.is_empty() || !gate_matches.is_empty() { let mut source_caps = Cap::empty(); let mut sanitizer_caps = Cap::empty(); let mut sink_caps = Cap::empty(); let mut param_to_sink: Vec<(usize, Cap)> = vec![]; for lbl in &labels { match lbl { DataLabel::Source(bits) => source_caps |= *bits, DataLabel::Sanitizer(bits) => sanitizer_caps |= *bits, DataLabel::Sink(bits) => sink_caps |= *bits, } } for gm in gate_matches.iter() { if let DataLabel::Sink(bits) = gm.label { sink_caps |= bits; // Map the gate's payload_args to per-param sink entries // so source-to-callback pairing can match by index. // Skip the dynamic-activation sentinel — without a // concrete arity we can't enumerate positions here. if gm.payload_args != crate::labels::ALL_ARGS_PAYLOAD { for &idx in gm.payload_args { param_to_sink.push((idx, bits)); } } } } return Some(ResolvedSummary { source_caps, sanitizer_caps, sink_caps, param_to_sink, param_to_sink_sites: vec![], propagates_taint: false, propagating_params: vec![], param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), param_to_gate_filters: vec![], validated_params_to_return: vec![], }); } } } // Caller-container hint: when the caller lives inside a class/impl, // its own container resolves bare self-calls correctly instead of // collapsing into an unrelated same-leaf definition. let caller_container_opt = caller_container_for(transfer, caller_func); let caller_container: Option<&str> = caller_container_opt.as_deref(); // Build the structured query once and reuse across the same-language // resolution steps (0.5 and 2). let build_query = || CalleeQuery { name: normalized, caller_lang: transfer.lang, caller_namespace: transfer.namespace, caller_container, receiver_type, namespace_qualifier, receiver_var, arity: arity_hint, }; // 0) Precise SSA summaries (intra-file, per-parameter transforms). // // Resolve the callee string to a local `FuncKey` via the already- // FuncKey-keyed `local_summaries` index, then consult `ssa_summaries` by // the same key. This preserves container/arity/disambig identity so two // same-name definitions in the same file never share an SSA summary. // // Namespace fallback: `lower_all_functions_from_bodies` rewrites // every SSA summary key's `namespace` to the caller-relative // namespace (for cross-file consistency in `GlobalSummaries`), // while `local_summaries` keys keep the raw file path that // `build_cfg` wrote. When the exact-key lookup misses, fall back // to a namespace-tolerant scan that matches every other FuncKey // field (lang/container/name/arity/disambig/kind), this recovers // intra-file SSA summary lookups in single-file or non-indexed // scans where the two namespaces disagree by construction. if let Some(ssa_sums) = transfer.ssa_summaries { if let Some(key) = resolve_local_func_key_query(transfer.local_summaries, &build_query()) { if let Some(ssa_sum) = ssa_sums.get(&key) { return Some(convert_ssa_to_resolved(ssa_sum)); } if let Some((_, ssa_sum)) = ssa_sums.iter().find(|(k, _)| { k.lang == key.lang && k.container == key.container && k.name == key.name && k.arity == key.arity && k.disambig == key.disambig && k.kind == key.kind }) { return Some(convert_ssa_to_resolved(ssa_sum)); } } } // 0.5) Cross-file SSA summaries (GlobalSummaries.ssa_by_key) with // optional class-hierarchy fan-out. // // When the call has an authoritative receiver type AND // `GlobalSummaries::install_hierarchy` has been called AND the // type has recorded sub-types whose `method` overrides exist, the // taint engine sees ALL implementers, not just the super-type's // own definition. This is the runtime counterpart of the // call-graph builder's `resolve_with_hierarchy` step, without // it, virtual dispatch through a super-type silently lost // sub-type sources / sinks. if let Some(gs) = transfer.global_summaries { let widened = gs.resolve_callee_widened(&build_query()); match widened.len() { 0 => {} 1 => { if let Some(ssa_sum) = gs.get_ssa(&widened[0]) { return Some(convert_ssa_to_resolved_for_caller( ssa_sum, Some(transfer.namespace), )); } } _ => { // Hierarchy fan-out: union per-implementer SSA // summaries with "any-impl" semantics so the caller // sees taint from every reachable concrete target. let mut accum: Option = None; let mut covered: usize = 0; for key in &widened { if let Some(ssa_sum) = gs.get_ssa(key) { let r = convert_ssa_to_resolved_for_caller(ssa_sum, Some(transfer.namespace)); accum = Some(match accum { None => r, Some(a) => merge_resolved_summaries_fanout(a, r), }); covered += 1; } } if covered > 0 { tracing::debug!( callee = %callee, impls = covered, widened_total = widened.len(), "hierarchy fan-out: SSA summaries unioned at call site" ); return accum; } // None of the widened keys had SSA summaries, fall // through to step 2 (FuncSummary path) which may have // hierarchy-widened FuncSummary entries. } } } // 1) Local (same-file), lookup via canonical FuncKey using the // same qualified-first policy as the global resolver. if let Some(key) = resolve_local_func_key_query(transfer.local_summaries, &build_query()) { if let Some(ls) = transfer.local_summaries.get(&key) { return Some(ResolvedSummary { source_caps: ls.source_caps, sanitizer_caps: ls.sanitizer_caps, sink_caps: ls.sink_caps, param_to_sink: ls .tainted_sink_params .iter() .map(|&i| (i, ls.sink_caps)) .collect(), param_to_sink_sites: vec![], propagates_taint: !ls.propagating_params.is_empty(), propagating_params: ls.propagating_params.clone(), param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), param_to_gate_filters: vec![], validated_params_to_return: vec![], }); } } else { // Multiple same-name local candidates with no disambiguating // container hint: refuse to pick one rather than fall through to a // less precise global summary that might be the wrong definition. let ambiguous_local = transfer .local_summaries .keys() .filter(|k| k.name == normalized && k.lang == transfer.lang) .count() > 1; if ambiguous_local { return None; } } // 2) Global same-language (FuncSummary path) with class-hierarchy // fan-out. Same semantics as step 0.5 but on coarse FuncSummary // entries, the SSA path missed because no implementer had an SSA // summary, so we widen the FuncSummary lookup symmetrically. if let Some(gs) = transfer.global_summaries { let widened = gs.resolve_callee_widened(&build_query()); let convert = |fs: &crate::summary::FuncSummary| ResolvedSummary { source_caps: fs.source_caps(), sanitizer_caps: fs.sanitizer_caps(), sink_caps: fs.sink_caps(), param_to_sink: fs .tainted_sink_params .iter() .map(|&i| (i, fs.sink_caps())) .collect(), // Carry [`SinkSite`]s from the global FuncSummary // so cross-file findings can attribute to the // callee-internal dangerous instruction. param_to_sink_sites: fs.param_to_sink.clone(), propagates_taint: fs.propagates_any(), propagating_params: fs.propagating_params.clone(), param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), param_to_gate_filters: vec![], validated_params_to_return: vec![], }; match widened.len() { 0 => {} 1 => { if let Some(fs) = gs.get(&widened[0]) { return Some(convert(fs)); } } _ => { let mut accum: Option = None; let mut covered: usize = 0; for key in &widened { if let Some(fs) = gs.get(key) { let r = convert(fs); accum = Some(match accum { None => r, Some(a) => merge_resolved_summaries_fanout(a, r), }); covered += 1; } } if covered > 0 { tracing::debug!( callee = %callee, impls = covered, widened_total = widened.len(), "hierarchy fan-out: FuncSummaries unioned at call site" ); return accum; } } } } // 3) Interop edges for edge in transfer.interop_edges { if edge.from.caller_lang == transfer.lang && edge.from.caller_namespace == transfer.namespace && edge.from.callee_symbol == callee && (edge.from.caller_func.is_empty() || edge.from.caller_func == caller_func) && (edge.from.ordinal == 0 || edge.from.ordinal == call_ordinal) && let Some(gs) = transfer.global_summaries && let Some(fs) = gs.get_for_interop(&edge.to) { return Some(ResolvedSummary { source_caps: fs.source_caps(), sanitizer_caps: fs.sanitizer_caps(), sink_caps: fs.sink_caps(), param_to_sink: fs .tainted_sink_params .iter() .map(|&i| (i, fs.sink_caps())) .collect(), param_to_sink_sites: fs.param_to_sink.clone(), propagates_taint: fs.propagates_any(), propagating_params: fs.propagating_params.clone(), param_container_to_return: vec![], param_to_container_store: vec![], return_type: None, return_abstract: None, source_to_callback: vec![], receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer: vec![], param_return_paths: vec![], points_to: Default::default(), field_points_to: Default::default(), param_to_gate_filters: vec![], validated_params_to_return: vec![], }); } } None } /// Compute the effective sanitizer bits that apply at the call site for a /// specific parameter, narrowed by the caller's predicate state. /// /// When the resolved summary carries `param_return_paths` for `param_idx`: /// filter the entries by predicate consistency with the caller's current /// `SsaTaintState` (`validated_must` + `predicates`). Compatible entries /// are joined with the **intersection-of-strip-bits** rule: the caller does /// not know which return path the callee took, so only bits stripped on /// EVERY compatible path can be considered cleared. /// /// Falls back to `resolved.sanitizer_caps` (the aggregate) when: /// * the summary has no per-path data for this parameter; /// * every path is predicate-compatible (the narrowing adds no information); /// * no path is predicate-compatible (conservative: keep aggregate). fn effective_param_sanitizer( resolved: &ResolvedSummary, param_idx: usize, state: &SsaTaintState, ) -> Cap { use crate::summary::ssa_summary::TaintTransform; let paths = match resolved .param_return_paths .iter() .find(|(i, _)| *i == param_idx) { Some((_, p)) => p, None => return resolved.sanitizer_caps, }; // Caller-side predicate envelope: union of known_true / known_false bits // observed across the caller's tracked variables. A path is // compatible if its required bits (known_true / known_false) do not // contradict this envelope. let mut caller_kt: u8 = 0; let mut caller_kf: u8 = 0; for (_, pred) in &state.predicates { caller_kt |= pred.known_true; caller_kf |= pred.known_false; } let mut compatible: smallvec::SmallVec<[&_; 2]> = smallvec::SmallVec::new(); for path in paths { // Contradiction tests: // * path demands bit B true while caller has evidence B is false // * path demands bit B false while caller has evidence B is true // In either case the caller cannot possibly be on this return path. if path.known_true & caller_kf != 0 { continue; } if path.known_false & caller_kt != 0 { continue; } compatible.push(path); } if compatible.is_empty() { // No path applies, the caller's predicate state contradicts every // recorded return. Fall back to the aggregate rather than // synthesise a sanitiser from zero data. return resolved.sanitizer_caps; } // Intersection of strip-bits across compatible paths. Identity // contributes the empty set (nothing stripped); AddBits contributes // nothing to the sanitiser either. let mut common = Cap::all(); let mut saw_any = false; for path in &compatible { match &path.transform { TaintTransform::StripBits(bits) => { common &= *bits; saw_any = true; } TaintTransform::Identity => { common = Cap::empty(); saw_any = true; } TaintTransform::AddBits(_) => { // AddBits doesn't contribute to sanitation; the intersection // is still taken over zero strip contribution. common = Cap::empty(); saw_any = true; } } } if !saw_any { resolved.sanitizer_caps } else { common } } /// Convert an `SsaFuncSummary` to the existing `ResolvedSummary` format. fn convert_ssa_to_resolved( ssa_sum: &crate::summary::ssa_summary::SsaFuncSummary, ) -> ResolvedSummary { convert_ssa_to_resolved_for_caller(ssa_sum, None) } fn convert_ssa_to_resolved_for_caller( ssa_sum: &crate::summary::ssa_summary::SsaFuncSummary, caller_namespace: Option<&str>, ) -> ResolvedSummary { use crate::summary::ssa_summary::TaintTransform; let propagating_params: Vec = ssa_sum .param_to_return .iter() .map(|(idx, _)| *idx) .collect(); // Compute effective sanitizer caps: union of StripBits across all params let mut sanitizer_caps = Cap::empty(); for (_, transform) in &ssa_sum.param_to_return { if let TaintTransform::StripBits(bits) = transform { sanitizer_caps |= *bits; } } // Compute effective sink caps: union across all params let sink_caps = ssa_sum.total_param_sink_caps(); let param_to_sink = ssa_sum.param_to_sink_caps(); // Carry the full SinkSite lists through so the taint engine can // attribute cross-file findings to the callee-internal sink. Sites // with coordinates of `(0, 0)` (cap-only, no tree/bytes context at // extraction time) remain in the list but contribute no primary // location, the emission site filters by `SinkSite::line != 0`. // // Strip same-file sites when `caller_namespace` is supplied: the // caller's own taint analysis already produces a finding at the // callee's internal sink (e.g. closure body's `eval(q)` finding at // pass-1 lexical containment), so promoting `primary_location` at // the call site to the same line collides with that finding under // [`crate::commands::scan::deduplicate_taint_flows`] and silently // drops the call-site finding. Cross-file sites are preserved // (the other file's analysis can't be deduped against this one). let param_to_sink_sites = if let Some(caller_ns) = caller_namespace { ssa_sum .param_to_sink .iter() .map(|(idx, sites)| { let filtered: SmallVec<[crate::summary::SinkSite; 1]> = sites .iter() .filter(|s| s.file_rel.is_empty() || s.file_rel != caller_ns) .cloned() .collect(); (*idx, filtered) }) .filter(|(_, sites)| !sites.is_empty()) .collect() } else { ssa_sum.param_to_sink.clone() }; ResolvedSummary { source_caps: ssa_sum.source_caps, sanitizer_caps, sink_caps, param_to_sink, param_to_sink_sites, propagates_taint: !propagating_params.is_empty(), propagating_params, param_container_to_return: ssa_sum.param_container_to_return.clone(), param_to_container_store: ssa_sum.param_to_container_store.clone(), return_type: ssa_sum.return_type.clone(), return_abstract: ssa_sum.return_abstract.clone(), source_to_callback: ssa_sum.source_to_callback.clone(), receiver_to_return: ssa_sum.receiver_to_return.clone(), receiver_to_sink: ssa_sum.receiver_to_sink, abstract_transfer: ssa_sum.abstract_transfer.clone(), param_return_paths: ssa_sum.param_return_paths.clone(), points_to: ssa_sum.points_to.clone(), field_points_to: ssa_sum.field_points_to.clone(), param_to_gate_filters: ssa_sum.param_to_gate_filters.clone(), validated_params_to_return: ssa_sum.validated_params_to_return.to_vec(), } } /// Merge two [`ResolvedSummary`] values into a single "any-implementer" /// summary suitable for use at a virtual-dispatch call site whose /// receiver static type fans out to multiple concrete implementers via /// [`crate::callgraph::TypeHierarchyIndex`]. /// /// Semantics, designed to keep the engine sound under fan-out: /// /// * **Caps that *grow* the taint signal** /// (`source_caps`, `sink_caps`, `receiver_to_sink`, /// `propagates_taint`), **OR**. Any implementer that introduces /// the cap is a valid runtime target, so the union conservatively /// covers every dispatch outcome. /// * **`sanitizer_caps`**, **AND**. Only bits sanitized by *every* /// implementer can be considered cleared at the call site, since /// the dispatch could land on the implementer that doesn't /// sanitize. /// * **Per-parameter vectors** (`param_to_sink`, `propagating_params`, /// `param_container_to_return`, `param_to_container_store`, /// `source_to_callback`), **union**. An impl that contributes a /// propagation/sink at parameter N is a valid runtime path; missing /// impls do not subtract. /// * **`param_to_sink_sites`**, concatenated per-parameter (dedup /// on `SinkSite::PartialEq`). Each site is independently /// emittable; the dedup avoids reporting the same callee-internal /// sink twice. /// * **SSA-precision fields** (`return_type`, `return_abstract`, /// `receiver_to_return`, `abstract_transfer`, `param_return_paths`, /// `points_to`), **drop on disagreement**. These describe the /// precise behavior of *one* function body; merging two /// incompatible bodies yields a meaningless composite. Identity /// is preserved when both sides agree exactly (string equality or /// PartialEq), keeping single-impl cases lossless. fn merge_resolved_summaries_fanout( mut acc: ResolvedSummary, r: ResolvedSummary, ) -> ResolvedSummary { // Caps + booleans acc.source_caps |= r.source_caps; acc.sanitizer_caps &= r.sanitizer_caps; acc.sink_caps |= r.sink_caps; acc.propagates_taint |= r.propagates_taint; acc.receiver_to_sink |= r.receiver_to_sink; // param_to_sink: union per-parameter caps for (idx, caps) in r.param_to_sink { if let Some(slot) = acc.param_to_sink.iter_mut().find(|(i, _)| *i == idx) { slot.1 |= caps; } else { acc.param_to_sink.push((idx, caps)); } } // param_to_sink_sites: union per-parameter site lists with PartialEq // dedup, so the same callee-internal sink isn't reported twice when // multiple impls share an inherited definition. for (idx, sites) in r.param_to_sink_sites { if let Some(slot) = acc.param_to_sink_sites.iter_mut().find(|(i, _)| *i == idx) { for site in sites { if !slot.1.iter().any(|s| s == &site) { slot.1.push(site); } } } else { acc.param_to_sink_sites.push((idx, sites)); } } // propagating_params: union (any propagator wins) for p in r.propagating_params { if !acc.propagating_params.contains(&p) { acc.propagating_params.push(p); } } for p in r.param_container_to_return { if !acc.param_container_to_return.contains(&p) { acc.param_container_to_return.push(p); } } for pair in r.param_to_container_store { if !acc.param_to_container_store.contains(&pair) { acc.param_to_container_store.push(pair); } } // source_to_callback: union per-parameter caps (mirrors param_to_sink) for (idx, caps) in r.source_to_callback { if let Some(slot) = acc.source_to_callback.iter_mut().find(|(i, _)| *i == idx) { slot.1 |= caps; } else { acc.source_to_callback.push((idx, caps)); } } // param_to_gate_filters: dedup-union (idx, caps) pairs. Each // implementer may carry its own per-position cap split; the union // preserves cap attribution from any implementer reachable via // virtual dispatch. for (idx, caps) in r.param_to_gate_filters { if !acc .param_to_gate_filters .iter() .any(|&(i, c)| i == idx && c == caps) { acc.param_to_gate_filters.push((idx, caps)); } } // SSA-precision fields: drop on any disagreement. if acc.return_type != r.return_type { acc.return_type = None; } if acc.return_abstract != r.return_abstract { acc.return_abstract = None; } if acc.receiver_to_return != r.receiver_to_return { acc.receiver_to_return = None; } if acc.abstract_transfer != r.abstract_transfer { acc.abstract_transfer = Vec::new(); } if acc.param_return_paths != r.param_return_paths { acc.param_return_paths = Vec::new(); } if acc.points_to != r.points_to { acc.points_to = Default::default(); } acc }