//! SSA function-summary and container-flow extraction. //! //! Extracted from the monolithic `ssa_transfer.rs`. Contains: //! * [`extract_ssa_func_summary`], runs per-parameter taint probes and //! synthesises an [`crate::summary::ssa_summary::SsaFuncSummary`] with //! source caps, return transforms, per-path transforms, and sink site //! attribution. //! * [`extract_container_flow_summary`], structural scan for //! `param_container_to_return` + `param_to_container_store` pairs. //! * Private helpers for predicate-hash summarisation, abstract-transfer //! derivation, callback source detection, and return-type inference. use super::events::extract_sink_arg_positions; use super::state::{BindingKey, SsaTaintState}; use super::{ SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, run_ssa_taint_full, transfer_block, transfer_inst, }; use crate::cfg::{BodyId, Cfg, FuncSummaries}; use crate::labels::{Cap, SourceKind}; use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator}; use crate::summary::GlobalSummaries; use crate::symbol::Lang; use crate::taint::domain::{TaintOrigin, VarTaint}; use petgraph::graph::NodeIndex; use smallvec::SmallVec; use std::collections::{HashMap, HashSet}; /// Maximum number of parameters to probe for summary extraction. /// Functions with more params fall back to legacy `FuncSummary`. const MAX_PROBE_PARAMS: usize = 8; /// Extract a precise per-parameter `SsaFuncSummary` from an already-lowered SSA body. /// /// For each parameter (up to `MAX_PROBE_PARAMS`), runs a taint probe by seeding /// that parameter with `Cap::all()` via `global_seed` and observing what caps /// survive to return positions and which sinks fire. A final probe with no params /// tainted detects intrinsic source caps. #[allow(clippy::too_many_arguments)] pub fn extract_ssa_func_summary( ssa: &SsaBody, cfg: &Cfg, local_summaries: &FuncSummaries, global_summaries: Option<&GlobalSummaries>, lang: Lang, namespace: &str, interner: &crate::state::symbol::SymbolInterner, param_count: usize, module_aliases: Option<&HashMap>>, locator: Option<&crate::summary::SinkSiteLocator<'_>>, formal_param_names: Option<&[String]>, formal_destructured_fields: Option<&[Vec]>, ) -> crate::summary::ssa_summary::SsaFuncSummary { extract_ssa_func_summary_full( ssa, cfg, local_summaries, global_summaries, lang, namespace, interner, param_count, module_aliases, locator, formal_param_names, None, formal_destructured_fields, ) } /// Like [`extract_ssa_func_summary`] but allows passing an in-progress /// `ssa_summaries` map so the per-parameter probes can resolve callee /// SSA summaries via step 0 of `resolve_callee_full`. /// /// This enables transitive cross-function summary propagation: when a /// caller's body references a callee whose summary was just augmented /// by the closure-capture lift pass, the caller's probe sees the /// augmented `param_to_sink` and can propagate it onto the caller's /// own summary. Used by `lower_all_functions_from_bodies`'s second /// extraction pass after `augment_summaries_with_child_sinks`. #[allow(clippy::too_many_arguments)] pub fn extract_ssa_func_summary_full( ssa: &SsaBody, cfg: &Cfg, local_summaries: &FuncSummaries, global_summaries: Option<&GlobalSummaries>, lang: Lang, namespace: &str, interner: &crate::state::symbol::SymbolInterner, param_count: usize, module_aliases: Option<&HashMap>>, locator: Option<&crate::summary::SinkSiteLocator<'_>>, formal_param_names: Option<&[String]>, ssa_summaries: Option< &HashMap, >, // Per-parameter destructured-binding sibling names. Entry `i` is // the list of field names destructured by the same call-site arg // slot as the primary `formal_param_names[i]`, excluding the // primary name. Empty vec for non-destructured params; `None` for // callers that don't carry destructure info (legacy / test paths). // Drives the destructured-arg expansion in the per-param probe so // taint flow through sibling bindings is visible to summary // extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi). formal_destructured_fields: Option<&[Vec]>, ) -> crate::summary::ssa_summary::SsaFuncSummary { use crate::summary::SinkSite; use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform}; let effective_params = param_count.min(MAX_PROBE_PARAMS); // Collect (param_index, var_name, ssa_value) from the SSA body let mut param_info: Vec<(usize, String, SsaValue)> = Vec::new(); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { if let SsaOp::Param { index } = &inst.op { if *index < effective_params { if let Some(name) = inst.var_name.as_ref() { param_info.push((*index, name.clone(), inst.value)); } } } } } // Identify return-reaching blocks let return_blocks: Vec = ssa .blocks .iter() .enumerate() .filter(|(_, b)| matches!(b.terminator, Terminator::Return(_))) .map(|(i, _)| i) .collect(); // Collect all param SSA values to exclude from return cap collection. // Param values persist with their seeded taint throughout the function , // we only want caps on derived values (call results, assigns) at return. let all_param_values: std::collections::HashSet = param_info.iter().map(|(_, _, v)| *v).collect(); // Per-return-block observation captured alongside the aggregate return // caps. Each entry records one return block's exit state, caps // contributed on that path, path-predicate hash, known_true/false bits, // and the return SSA value's abstract fact, so the per-param loop can // emit one [`ReturnPathTransform`] per distinct predicate gate. struct ReturnBlockObs { /// Caps at the return SSA value (or joined live values for /// implicit returns) on this block's exit. derived_caps: Cap, /// Caps collected from parameter values reaching this return /// (passthrough fallback). param_caps: Cap, /// Deterministic hash of the predicate gate at this return. /// `0` means "no predicate gate", an unguarded return. predicate_hash: u64, /// `PredicateSummary::known_true` bits intersected across all /// tracked variables at this return. Encoded via /// [`crate::taint::domain::predicate_kind_bit`]. known_true: u8, /// `PredicateSummary::known_false` bits at this return. known_false: u8, /// Abstract fact on the return SSA value at this return (None /// when Top or abstract interp disabled). abstract_value: Option, /// [`crate::abstract_interp::PathFact`] on the return SSA value /// at this block's exit. Top when abstract interp is disabled /// or no narrowing was proved on this path. path_fact: crate::abstract_interp::PathFact, /// Inner [`PathFact`] when the rv on this path is a one-arg /// variant constructor; [`None`] otherwise. variant_inner_fact: Option, /// `true` when the per-param probe's seeded parameter var_name /// is in this return block's exit `validated_must`. `false` /// for the baseline (no-seed) probe and for params not /// validated on this path. Drives /// `validated_params_to_return` summary extraction. param_validated_must: bool, } // Helper: run a taint probe with a given global_seed and return // the aggregate return caps, sink events, joined return abstract, // and the per-return-block observation list used to derive // per-return-path transforms. // // `probe_param_names` lists the seeded parameter's `var_name` // plus any destructured-binding siblings sharing the slot // (`None` for the baseline source-caps probe). When non-empty, // each return-block observation records whether ANY of those // names is in the exit state's `validated_must`, which feeds // `validated_params_to_return` summary extraction below. The // any-name semantics matches the slot-wide model: a destructured // formal `({ a, b, c })` represents one call-site slot, and any // sibling reaching `validated_must` proves the slot's caps were // narrowed before reaching the return. let run_probe = |seed: HashMap, probe_param_names: Option<&[&str]>| -> ( Cap, Vec, Option, Vec, ) { let seed_ref = if seed.is_empty() { None } else { Some(&seed) }; let transfer = SsaTaintTransfer { lang, namespace, interner, local_summaries, global_summaries, interop_edges: &[], owner_body_id: BodyId(0), parent_body_id: None, global_seed: seed_ref, param_seed: None, receiver_seed: None, const_values: None, type_facts: None, ssa_summaries, extra_labels: None, base_aliases: None, callee_bodies: None, inline_cache: None, context_depth: 0, callback_bindings: None, points_to: None, dynamic_pts: None, import_bindings: None, promisify_aliases: None, module_aliases, static_map: None, auto_seed_handler_params: false, cross_file_bodies: None, pointer_facts: None, }; let (events, block_states) = run_ssa_taint_full(ssa, cfg, &transfer); // Collect surviving caps at return blocks. // Separate param values from derived values: derived values give // more precise transforms (they reflect function-internal sanitization). // If only param values reach return → pure passthrough (Identity). let mut total_derived_caps = Cap::empty(); let mut total_param_caps = Cap::empty(); // Extract abstract value of the return SSA value. let mut return_abstract: Option = None; // Per-return-block observations for per-path transforms. let mut per_return: Vec = Vec::with_capacity(return_blocks.len()); for &bid in &return_blocks { if let Some(entry) = &block_states[bid] { let empty_induction = HashSet::new(); let exit = transfer_block( &ssa.blocks[bid], cfg, ssa, &transfer, entry.clone(), &empty_induction, None, ); let ret_val = match &ssa.blocks[bid].terminator { Terminator::Return(rv) => rv.as_ref().copied(), _ => None, }; let mut block_derived_caps = Cap::empty(); let mut block_param_caps = Cap::empty(); if let Some(rv) = ret_val { // Explicit return value: use only its taint for derived_caps. // If rv has no taint entry, this block contributes no derived caps. if let Some(taint) = exit.get(rv) { if all_param_values.contains(&rv) { block_param_caps |= taint.caps; } else { block_derived_caps |= taint.caps; } } // When rv is not a param value, also collect param taint as a // fallback. The SSA terminator's rv may point to the last body // instruction (e.g. push/append result) rather than the actual // return expression (the container parameter itself). This fires // both when rv is tainted (derived) and when rv is untainted // (the push result may have no taint but the param does). // Skip when rv IS a param (already handled above) or when rv is // a Const (provably untainted constant return). let rv_is_const = ssa.blocks[bid] .body .iter() .chain(ssa.blocks[bid].phis.iter()) .any(|inst| inst.value == rv && matches!(inst.op, SsaOp::Const(_))); if !all_param_values.contains(&rv) && !rv_is_const { for (val, taint) in &exit.values { if all_param_values.contains(val) { block_param_caps |= taint.caps; } } } } else { // Return(None): implicit return, fall back to all live values. for (val, taint) in &exit.values { if all_param_values.contains(val) { block_param_caps |= taint.caps; } else { block_derived_caps |= taint.caps; } } } total_derived_caps |= block_derived_caps; total_param_caps |= block_param_caps; // Abstract return: use terminator's return value when available, // fall back to last instruction heuristic for Return(None). let mut block_abs: Option = None; let mut block_path_fact = crate::abstract_interp::PathFact::top(); let mut block_variant_inner: Option = None; if let Some(ref abs) = exit.abstract_state { let abs_rv = ret_val.or_else(|| { ssa.blocks[bid] .body .last() .or_else(|| ssa.blocks[bid].phis.last()) .map(|inst| inst.value) }); if let Some(rv) = abs_rv { let av = abs.get(rv); block_path_fact = av.path.clone(); if !av.is_top() { block_abs = Some(av.clone()); return_abstract = Some(match return_abstract { None => av, Some(prev) => prev.join(&av), }); } block_variant_inner = detect_variant_inner_fact(rv, ssa, &exit); } } // Derive a predicate hash + known-true/false // intersection across tracked variables at this return. // The hash is stable across runs for a given predicate // shape so call sites can compare paths deterministically. let (predicate_hash, known_true, known_false) = summarise_return_predicates(&exit); let param_validated_must = match probe_param_names { Some(names) => names.iter().any(|name| match interner.get(name) { Some(sym) => exit.validated_must.contains(sym), None => false, }), None => false, }; per_return.push(ReturnBlockObs { derived_caps: block_derived_caps, param_caps: block_param_caps, predicate_hash, known_true, known_false, abstract_value: block_abs, path_fact: block_path_fact, variant_inner_fact: block_variant_inner, param_validated_must, }); } } // Prefer derived caps; fall back to param caps for passthrough functions let return_caps = if !total_derived_caps.is_empty() { total_derived_caps } else { total_param_caps }; // Drop return_abstract if it joined to Top let return_abstract = return_abstract.filter(|v| !v.is_top()); (return_caps, events, return_abstract, per_return) }; // Probe with no params tainted → detect source_caps + return abstract. // Abstract values don't depend on taint seeding, so the baseline probe // captures the function's intrinsic abstract return value. let (baseline_return_caps, _baseline_events, return_abstract, baseline_obs) = run_probe(HashMap::new(), None); let source_caps = baseline_return_caps; // Per-return-path PathFact decomposition derived from the baseline // probe (no seeded taint). Abstract facts on the return rv are // independent of taint seeding, they describe the function's // intrinsic narrowing, so the baseline run captures them without // per-param noise. // // Emitted only when ≥2 return-block entries have distinct predicate // hashes *and* at least one entry carries non-Top signal (fact or // variant_inner_fact). A uniform all-Top list adds bytes without // helping any caller. let mut return_path_facts: SmallVec<[crate::summary::ssa_summary::PathFactReturnEntry; 2]> = SmallVec::new(); if baseline_obs.len() >= 2 { let mut merged: SmallVec<[crate::summary::ssa_summary::PathFactReturnEntry; 2]> = SmallVec::new(); for obs in &baseline_obs { let entry = crate::summary::ssa_summary::PathFactReturnEntry { predicate_hash: obs.predicate_hash, known_true: obs.known_true, known_false: obs.known_false, path_fact: obs.path_fact.clone(), variant_inner_fact: obs.variant_inner_fact.clone(), }; crate::summary::ssa_summary::merge_path_fact_return_paths(&mut merged, &[entry]); } let distinct_hashes = merged .iter() .map(|e| e.predicate_hash) .collect::>(); let has_signal = merged .iter() .any(|e| !e.path_fact.is_top() || e.variant_inner_fact.is_some()); if distinct_hashes.len() >= 2 && has_signal { return_path_facts = merged; } } // Probe each param let mut param_to_return = Vec::new(); let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new(); let mut param_to_sink_param = Vec::new(); // Per-param gate-filter cap masks lifted from inner multi-gate sink calls. // Populated when the per-param probe reaches a sink whose CFG node carries // [`crate::cfg::CallMeta::gate_filters`] with more than one entry, the // multi-gate dispatch in `collect_block_events` has already cap-narrowed // `event.sink_caps` to the matching gate's `label_caps`, so we record the // pair as-is. Cross-file callers consume this list to preserve per-position // cap attribution through wrapper functions like // `fn forward(url, body) { fetch(url, {body}) }`. let mut param_to_gate_filters: Vec<(usize, Cap)> = Vec::new(); // Per-param return-path decomposition. Populated only when the param // has ≥2 distinct return-block predicate hashes, a single-return-path // callee is already precise via `param_to_return`. let mut param_return_paths: Vec<( usize, SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>, )> = Vec::new(); // Parameter indices whose taint flow to the return is fully // validated by a dominating predicate on every return path. // Populated below by checking each per-param probe's return-block // exit states for `validated_must` containing the param's // var_name. Empty when no parameter is validated. let mut validated_params_to_return: SmallVec<[usize; 2]> = SmallVec::new(); for &(idx, ref var_name, _ssa_val) in ¶m_info { let mut seed = HashMap::new(); let origin = TaintOrigin { node: NodeIndex::new(0), // synthetic origin for probing source_kind: SourceKind::UserInput, source_span: None, }; let probe_taint = VarTaint { caps: Cap::all(), origins: SmallVec::from_elem(origin, 1), uses_summary: false, }; seed.insert( BindingKey::new(var_name.as_str(), BodyId(0)), probe_taint.clone(), ); // Destructured-arg sibling expansion. When the formal at slot // `idx` destructures an object pattern (`({ column, operator, // value })`), the SSA body emits a separate [`SsaOp::Param`] // for every destructured binding (sequential indices > slot // count, since the closure-capture pass treats them as // free-identifier reads). The call-site only passes ONE arg // for the slot, so the engine never seeds the sibling Param // ops at runtime — but the per-parameter SUMMARY probe must // model "if this slot is tainted then every binding it // produced is tainted too". Seed each sibling's `var_name` // with the same caps the primary received. The probe-level // `validated_must` check below treats the slot as validated // when ANY sibling lands in `validated_must` on a return path. // // Closes the residual gap behind CVE-2026-25544 (PayloadCMS // `@payloadcms/drizzle` SQLi via `createJSONQuery({ value })`): // the validator helper `sanitizeValue(value, operator)` lives // inside the body and the probe needs to see `value` flow // through the `validated_params_to_return` channel before // suppressing the caller's sink. let slot_siblings: &[String] = formal_destructured_fields .and_then(|d| d.get(idx)) .map(|v| v.as_slice()) .unwrap_or(&[]); for sib in slot_siblings { seed.insert( BindingKey::new(sib.as_str(), BodyId(0)), probe_taint.clone(), ); } // Phantom-Param prefix seeding. SSA lowering of arrow / nested // function bodies often exposes free-identifier member-access // expressions (e.g. `file._source.uri`) as their own // [`SsaOp::Param`] ops with composite `var_name`s like // `"file._source.uri"`. These phantom Params are the values // actually used as call arguments, not the formal-param SSA // value the seed targets. Without this, the per-param probe // misses cross-call sinks because the call's arg SSA value is // a phantom Param with no seed entry, so `transfer_inst::Param` // leaves it untainted and `collect_tainted_sink_values` // observes empty caps despite the formal param being seeded. // // Seed every phantom Param whose `var_name` begins with // `formal_var_name + "."` with the same caps the formal param // received: semantically "if `file` is tainted, then every // observable field path on `file` is tainted too". Bounded // by SSA size; cap-equivalent to direct seeding. Mirror this // for each destructured sibling (`value.foo` / `column.name` // member-projections inside the body). let prefixes: Vec = std::iter::once(var_name.clone()) .chain(slot_siblings.iter().cloned()) .map(|n| format!("{}.", n)) .collect(); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { if let SsaOp::Param { .. } = &inst.op { if let Some(name) = inst.var_name.as_ref() { if prefixes.iter().any(|p| name.starts_with(p)) { seed.insert( BindingKey::new(name.as_str(), BodyId(0)), probe_taint.clone(), ); } } } } } // Build slot-wide name list for the validated_must check. // Primary first, then siblings, then heap-allocated owned // copies — `run_probe` only borrows for its inner loop. let mut slot_names: Vec<&str> = Vec::with_capacity(1 + slot_siblings.len()); slot_names.push(var_name.as_str()); for sib in slot_siblings { slot_names.push(sib.as_str()); } let (return_caps, events, _, per_return_obs) = run_probe(seed, Some(slot_names.as_slice())); // Subtract baseline source_caps, we only want param-contributed caps let param_return_caps = return_caps & !source_caps; if !param_return_caps.is_empty() { let stripped = Cap::all() & !param_return_caps; let transform = if stripped.is_empty() { TaintTransform::Identity } else { TaintTransform::StripBits(stripped) }; param_to_return.push((idx, transform)); } // Validated-param-to-return detection. // // When the per-param probe shows that the parameter's // `var_name` is in `validated_must` on every return path that // *carries the parameter's contributed caps*, record the // parameter as validated. The caller will mark each tainted // argument passed to this position — and the call's own // return value — as `validated_must` / `validated_may`, the // same way an inline `if (!regex.test(x)) throw` would // validate the surviving branch. // // Conservative gating: // * Skip when the param contributes no caps to the return, // a degenerate "validated but irrelevant" record. // * Skip when no return block was observed (probes that // diverged or hit `MAX_PROBE_PARAMS`). // * Require validation on every return path that *carries // param caps to the return*. Branches that return // constants (e.g. `if (x === null) return 'NULL'`) carry // no param taint and don't need a validation predicate. // * Require ≥1 path that actually validates the param. if !param_return_caps.is_empty() && !per_return_obs.is_empty() { let mut any_carrying_path = false; let all_carrying_validated = per_return_obs.iter().all(|obs| { let carries = !(obs.derived_caps & !source_caps).is_empty() || !(obs.param_caps & !source_caps).is_empty(); if carries { any_carrying_path = true; obs.param_validated_must } else { true } }); if any_carrying_path && all_carrying_validated { validated_params_to_return.push(idx); } } // Derive per-return-path decomposition. For each // observed return block, derive a `ReturnPathTransform` mirroring // the aggregate logic (prefer derived caps, fall back to param // caps, strip baseline source caps). Only emit when ≥2 distinct // predicate hashes are present, a single-hash summary adds no // signal over the aggregate `param_to_return`. if per_return_obs.len() >= 2 { let mut per_path: SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]> = SmallVec::new(); for obs in &per_return_obs { let block_return_caps = if !obs.derived_caps.is_empty() { obs.derived_caps } else { obs.param_caps }; let block_contributed = block_return_caps & !source_caps; let transform_kind = if block_contributed.is_empty() { // No caps on this path, param does not reach return // under this predicate. A `StripBits(all)` records // "all bits cleared" so downstream join preserves the // disparity with other paths. TaintTransform::StripBits(Cap::all()) } else { let stripped = Cap::all() & !block_contributed; if stripped.is_empty() { TaintTransform::Identity } else { TaintTransform::StripBits(stripped) } }; crate::summary::ssa_summary::merge_return_paths( &mut per_path, &[crate::summary::ssa_summary::ReturnPathTransform { transform: transform_kind, path_predicate_hash: obs.predicate_hash, known_true: obs.known_true, known_false: obs.known_false, abstract_contribution: obs.abstract_value.clone(), }], ); } // Only record when ≥2 distinct predicate gates survived // the dedup (a single-entry vector is no finer than the // aggregate `param_to_return` and wastes bytes on disk). let distinct_hashes = per_path .iter() .map(|e| e.path_predicate_hash) .collect::>(); if distinct_hashes.len() >= 2 { param_return_paths.push((idx, per_path)); } } // Collect sink caps + primary-location sites from events + per-arg-position detail. // // Skip events flagged `all_validated`: every tainted SSA value // that reached the sink was already proved validated by a // dominating predicate (AllowlistCheck / TypeCheck / // ValidationCall, including the indirect-validator branch // narrowing for `validate*` / `is_valid*` callees). Those // events would have been dropped by `ssa_events_to_findings` at // the per-file finding step; carrying them into // `param_to_sink` / `param_to_sink_param` re-publishes a sink // attribution callers can no longer suppress, because the // caller can't see the validator that lives inside the // callee body. // // Strict-additive: `all_validated` is set only when every // tainted operand at the sink has its `var_name` in // `state.validated_may`, single-path single-validator helpers // cleanly skip; mixed-tainted-with-some-unvalidated events // still propagate. Closes the helper-summary precision gap // surfaced by Novu CVE GHSA-4x48-cgf9-q33f. let mut param_sites: SmallVec<[SinkSite; 1]> = SmallVec::new(); for event in &events { if event.all_validated { continue; } for pos in extract_sink_arg_positions(event, ssa) { param_to_sink_param.push((idx, pos, event.sink_caps)); } // Per-position gate-filter cap lifting. // // When the sink callee carries multiple gate filters (e.g. `fetch` // is both an SSRF gate on the URL arg and a `DATA_EXFIL` gate on // the body arg), the multi-gate dispatch has already filtered // `event.sink_caps` down to the specific gate's `label_caps` for // this probe. Recording `(idx, event.sink_caps)` preserves that // narrowing across the function-summary boundary so a caller of // the wrapper splits SSRF from DATA_EXFIL findings instead of // joining them under a single union. // // Single-gate / no-gate sinks are skipped, the existing // `param_to_sink` machinery already records those without // per-position cap conflict. if !event.sink_caps.is_empty() && cfg[event.sink_node].call.gate_filters.len() > 1 && !param_to_gate_filters .iter() .any(|&(i, c)| i == idx && c == event.sink_caps) { param_to_gate_filters.push((idx, event.sink_caps)); } if event.sink_caps.is_empty() { continue; } let site = match locator { Some(loc) => { loc.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps) } None => SinkSite::cap_only(event.sink_caps), }; let key = site.dedup_key(); if !param_sites.iter().any(|s| s.dedup_key() == key) { param_sites.push(site); } } if !param_sites.is_empty() { param_to_sink.push((idx, param_sites)); } } let (param_container_to_return, param_to_container_store) = extract_container_flow_summary(ssa, lang, effective_params); // Parameter-granularity points-to summary. let points_to = crate::ssa::param_points_to::analyse_param_points_to( ssa, ¶m_info, effective_params, formal_param_names, Some(lang), ); // Infer return type: scan return-reaching blocks for constructor calls. let return_type = infer_summary_return_type(ssa, lang); // Detect source_to_callback: internal source taint flowing to calls of // parameter functions (e.g., `fn apply(f) { let x = source(); f(x); }`). // Re-runs the baseline probe internally to get accurate taint state. let source_to_callback = if !source_caps.is_empty() && !param_info.is_empty() { let baseline_transfer = SsaTaintTransfer { lang, namespace, interner, local_summaries, global_summaries, interop_edges: &[], owner_body_id: BodyId(0), parent_body_id: None, global_seed: None, param_seed: None, receiver_seed: None, const_values: None, type_facts: None, ssa_summaries, extra_labels: None, base_aliases: None, callee_bodies: None, inline_cache: None, context_depth: 0, callback_bindings: None, points_to: None, dynamic_pts: None, import_bindings: None, promisify_aliases: None, module_aliases: None, static_map: None, auto_seed_handler_params: false, cross_file_bodies: None, pointer_facts: None, }; detect_source_to_callback_from_states( ssa, cfg, source_caps, ¶m_info, &baseline_transfer, ) } else { vec![] }; // Per-parameter abstract-domain transfers. // // Derived structurally from the SSA body, no additional taint probes. // Three-step inference per parameter: // 1. Identity: return SSA value at every return block traces back to // this parameter (possibly through assigns / phi merges all feeding // from the same param). // 2. Callee-intrinsic bound: baseline `return_abstract` carries a // concrete fact (bounded interval or known prefix) that holds // regardless of caller input, record it once per parameter as // `Clamped` / `LiteralPrefix` so the caller sees the bound even // when it has no abstract info on its own argument. // 3. Top: default; the entry is omitted (empty transfer is meaningless). let abstract_transfer = derive_abstract_transfer(ssa, ¶m_info, return_abstract.as_ref()); SsaFuncSummary { param_to_return, param_to_sink, source_caps, param_to_sink_param, param_to_gate_filters, param_container_to_return, param_to_container_store, return_type, return_abstract, source_to_callback, receiver_to_return: None, receiver_to_sink: Cap::empty(), abstract_transfer, param_return_paths, return_path_facts, points_to, // extension, empty until the field-granularity // extractor is wired (`NYX_POINTER_ANALYSIS=1` only). Default // path stays bit-identical to today. field_points_to: crate::summary::points_to::FieldPointsToSummary::empty(), // Populated post-extraction in // `taint::lower_all_functions_from_bodies` once SSA optimisation // has computed `opt.type_facts`. Empty here means the // extractor itself doesn't carry receiver-type info, the // caller patches it in. typed_call_receivers: Vec::new(), validated_params_to_return, } } /// Derive a deterministic predicate-hash + known-true/false intersection /// for a return-block exit state. /// /// The hash combines the sorted `(SymbolId, known_true, known_false)` tuples /// from the state's `predicates` list with the validated_must bitmask. Two /// return blocks whose predicate gates are observationally identical produce /// the same hash; the intersection of known_true/false gives the bits that /// hold on every path into each return block. /// /// Returns `(0, 0, 0)` for a Top state (no predicates tracked). pub(super) fn summarise_return_predicates(state: &SsaTaintState) -> (u64, u8, u8) { use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; if state.predicates.is_empty() && state.validated_must.is_empty() { return (0, 0, 0); } let mut h = DefaultHasher::new(); // Validated-must contributes deterministically via bits(). state.validated_must.bits().hash(&mut h); // Sort by SymbolId (predicates list is already sorted by SsaTaintState // invariants, but hash-input stability matters here). let mut sorted: smallvec::SmallVec<[(u32, u8, u8); 4]> = state .predicates .iter() .map(|(id, s)| (id.0, s.known_true, s.known_false)) .collect(); sorted.sort_by_key(|(id, _, _)| *id); for (id, kt, kf) in &sorted { id.hash(&mut h); kt.hash(&mut h); kf.hash(&mut h); } let hash = h.finish(); // Intersect known_true / known_false across all tracked variables: // the bits that hold for EVERY predicate-tracked var at this return. let known_true = sorted .iter() .map(|(_, kt, _)| *kt) .fold(u8::MAX, |a, b| a & b); let known_false = sorted .iter() .map(|(_, _, kf)| *kf) .fold(u8::MAX, |a, b| a & b); // Use `1` for the "no predicates but validated_must non-empty" case to // avoid colliding with the unguarded sentinel (0). let hash = if hash == 0 { 1 } else { hash }; (hash, known_true, known_false) } /// Derive per-parameter [`AbstractTransfer`] entries for a function's SSA /// body. /// /// `return_abstract` is the callee's intrinsic baseline (from the no-seed /// probe). When present, it describes a fact that holds for the return /// regardless of parameter input, so it can be attached as a /// `Clamped` / `LiteralPrefix` transform to every parameter that flows to /// the return. /// /// Identity detection is structural: walk the return values back through /// [`SsaOp::Assign`] / [`SsaOp::Phi`] chains (bounded) and check whether /// every leaf resolves to the same [`SsaOp::Param`]. The trace is cheap /// and can only produce `Identity` for passthrough callees, anything /// more complex degrades to the baseline fact or `Top`. fn derive_abstract_transfer( ssa: &SsaBody, param_info: &[(usize, String, SsaValue)], return_abstract: Option<&crate::abstract_interp::AbstractValue>, ) -> Vec<(usize, crate::abstract_interp::AbstractTransfer)> { use crate::abstract_interp::{AbstractTransfer, IntervalTransfer, StringTransfer}; if param_info.is_empty() { return Vec::new(); } // Build a lookup from SsaValue → defining op by scanning the body once. let mut defs: HashMap = HashMap::new(); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { defs.insert(inst.value, &inst.op); } } // Trace an SSA value backwards to the single source parameter index it // resolves to, if any. Returns `None` when the trace diverges, hits a // non-pass-through op, or exceeds the depth bound. fn trace_to_param( v: SsaValue, defs: &HashMap, depth: usize, ) -> Option { const MAX_DEPTH: usize = 8; if depth > MAX_DEPTH { return None; } match defs.get(&v)? { SsaOp::Param { index } => Some(*index), SsaOp::Assign(ops) if ops.len() == 1 => trace_to_param(ops[0], defs, depth + 1), SsaOp::Phi(preds) => { let mut result: Option = None; for (_, pv) in preds { let p = trace_to_param(*pv, defs, depth + 1)?; match result { None => result = Some(p), Some(existing) if existing == p => {} Some(_) => return None, } } result } _ => None, } } // For every return block, trace its return value and record which // parameter (if any) it resolves to. If all return blocks agree on the // same parameter index, that parameter has `Identity`. If they disagree // (or some don't resolve), no parameter gets `Identity` and we fall // back to baseline-derived forms. let mut identity_param: Option = None; let mut identity_consistent = true; for block in &ssa.blocks { if let Terminator::Return(Some(rv)) = &block.terminator { let traced = trace_to_param(*rv, &defs, 0); match (identity_param, traced) { (None, Some(p)) => identity_param = Some(p), (Some(existing), Some(p)) if existing == p => {} _ => { identity_consistent = false; break; } } } } // Derive a baseline-invariant transform from `return_abstract`. This is // the "callee intrinsic" fact that always holds, each parameter that // flows to the return gets it attached as the conservative transfer. let baseline_invariant: Option = return_abstract.map(|av| { let interval = match (av.interval.lo, av.interval.hi) { (Some(lo), Some(hi)) if lo <= hi => IntervalTransfer::Clamped { lo, hi }, _ => IntervalTransfer::Top, }; let string = match &av.string.prefix { Some(p) if !p.is_empty() => StringTransfer::literal_prefix(p), _ => StringTransfer::Unknown, }; AbstractTransfer { interval, string } }); let mut result: Vec<(usize, AbstractTransfer)> = Vec::new(); for (idx, _, _) in param_info { let mut transfer = AbstractTransfer::top(); if identity_consistent && identity_param == Some(*idx) { transfer.interval = IntervalTransfer::Identity; transfer.string = StringTransfer::Identity; } else if let Some(base) = baseline_invariant.as_ref() { // Baseline intrinsic bound applies to every parameter that could // reach the return. We conservatively attach it to all params //, at apply time the caller meets it with the real return // abstract (also from this same summary), so double-counting // would collapse to the tighter of the two. transfer = base.clone(); } if !transfer.is_top() { result.push((*idx, transfer)); } } result } /// Detect callback patterns where internal source taint flows to a call of a /// parameter function. Re-runs the baseline probe internally to get accurate /// taint state at each instruction point. /// /// Returns `(param_index_of_callee, source_caps)` pairs. fn detect_source_to_callback_from_states( ssa: &SsaBody, cfg: &Cfg, source_caps: Cap, param_info: &[(usize, String, SsaValue)], transfer: &SsaTaintTransfer, ) -> Vec<(usize, Cap)> { use crate::ssa::ir::SsaOp; // Map param var_name → param_index let param_name_to_index: HashMap<&str, usize> = param_info .iter() .map(|(idx, name, _)| (name.as_str(), *idx)) .collect(); // Run taint analysis to get converged block states let (_events, block_states) = run_ssa_taint_full(ssa, cfg, transfer); let mut result: Vec<(usize, Cap)> = vec![]; for (bid, block) in ssa.blocks.iter().enumerate() { let Some(entry_state) = &block_states[bid] else { continue; }; // Replay block transfer to get accurate taint state at each instruction let mut state = entry_state.clone(); for inst in &block.body { // Apply transfer for this instruction to advance state transfer_inst(inst, cfg, ssa, transfer, &mut state); // After transfer: check if this is a call to a param with tainted args if let SsaOp::Call { callee, args, .. } = &inst.op { if let Some(¶m_idx) = param_name_to_index.get(callee.as_str()) { let any_arg_tainted = args.iter().any(|arg_vals| { arg_vals .iter() .any(|v| state.get(*v).is_some_and(|t| !t.caps.is_empty())) }); if any_arg_tainted && !result.iter().any(|(idx, _)| *idx == param_idx) { result.push((param_idx, source_caps)); } } } } } result } /// Infer the return type of a function from its SSA body by checking whether /// return-reaching blocks produce values from known constructor/factory calls. fn infer_summary_return_type( ssa: &SsaBody, lang: Lang, ) -> Option { // Find blocks with Return terminators, then look at the last defined value // in those blocks, if it's a Call with a known constructor, that's our type. for block in &ssa.blocks { if !matches!(block.terminator, Terminator::Return(_)) { continue; } // Only inspect the very last instruction in the returning block. if let Some(inst) = block.body.last() && let SsaOp::Call { callee, .. } = &inst.op && let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee) { return Some(ty); } } None } // ── Inter-procedural container flow detection (structural SSA analysis) ── /// Build a map from SsaValue to its defining instruction. fn build_inst_map(ssa: &SsaBody) -> HashMap)> { let mut map = HashMap::new(); for block in &ssa.blocks { for inst in block.phis.iter().chain(block.body.iter()) { // Store the op and optionally the receiver for calls map.insert(inst.value, (inst.op.clone(), None)); } } map } /// Trace an SSA value back through Assign/Phi chains to find if it originates /// from a `Param { index }`. Returns `Some(index)` if a param is found. /// Does NOT trace through Call, Const, Source, or other non-identity ops. fn trace_to_param( v: SsaValue, ssa: &SsaBody, inst_map: &HashMap)>, visited: &mut HashSet, ) -> Option { if !visited.insert(v) { return None; } let (op, _) = inst_map.get(&v)?; match op { SsaOp::Param { index } => Some(*index), SsaOp::Assign(uses) => { for u in uses { if let Some(idx) = trace_to_param(*u, ssa, inst_map, visited) { return Some(idx); } } None } SsaOp::Phi(operands) => { for (_, op_val) in operands { if let Some(idx) = trace_to_param(*op_val, ssa, inst_map, visited) { return Some(idx); } } None } // Don't trace through Call (new identity), Const, Source, Nop, CatchParam _ => None, } } /// Detect inter-procedural container flow patterns from SSA structure: /// - `param_container_to_return`: params whose container identity flows to return /// - `param_to_container_store`: (src_param, container_param) pairs where src taint /// is stored into container_param's contents pub(crate) fn extract_container_flow_summary( ssa: &SsaBody, lang: Lang, formal_param_count: usize, ) -> (Vec, Vec<(usize, usize)>) { use crate::ssa::pointsto::{ContainerOp, classify_container_op}; let inst_map = build_inst_map(ssa); let mut container_to_return: HashSet = HashSet::new(); let mut container_store: Vec<(usize, usize)> = Vec::new(); // 1. param_container_to_return: trace Assign/Phi ops in return blocks to params. // // `trace_to_param` will happily return any `SsaOp::Param { index }`, but // scoped lowering synthesises `Param` ops for external captures (module // imports, free identifiers) at indices beyond the formal parameter count. // Those must not enter the summary, the key's arity only covers formal // params, and an out-of-range index trips `ssa_summary_fits_arity`, forcing // the reconciliation probe to generate a synthetic disambiguator that no // caller will ever look up. for block in &ssa.blocks { if !matches!(block.terminator, Terminator::Return(_)) { continue; } for inst in block.phis.iter().chain(block.body.iter()) { match &inst.op { // Only trace identity-preserving ops (Assign, Phi). // Skip Param (would cause false positives in single-block functions), // Call (new identity), Const, Source, Nop, CatchParam. SsaOp::Assign(_) | SsaOp::Phi(_) => { if let Some(idx) = trace_to_param(inst.value, ssa, &inst_map, &mut HashSet::new()) && idx < formal_param_count { container_to_return.insert(idx); } } _ => {} } } } // 2. param_to_container_store: find container Store calls, trace args to params for block in &ssa.blocks { for inst in block.body.iter() { if let SsaOp::Call { callee, args, receiver, .. } = &inst.op { let op = match classify_container_op(callee, lang) { Some(ContainerOp::Store { value_args, .. }) => value_args, _ => continue, }; // Resolve container SSA value. With the new call ABI, the // receiver is a separate channel and `args` contains only // positional arguments. For Go, container ops are plain // function calls (no receiver), so args[0] is the container. let container_val = if let Some(v) = *receiver { Some(v) } else if lang == Lang::Go { args.first().and_then(|a| a.first().copied()) } else if let Some(dot_pos) = callee.rfind('.') { let receiver_name = &callee[..dot_pos]; args.iter() .flat_map(|a| a.iter()) .find(|&&v| { ssa.value_defs .get(v.0 as usize) .and_then(|d| d.var_name.as_deref()) == Some(receiver_name) }) .copied() } else { None }; let container_val = match container_val { Some(v) => v, None => continue, }; // Trace container to positional param (SelfParam → None, so // when the container is the receiver we skip, the caller // tracks that via `receiver_to_container_store` if needed). // Same arity filter as above: reject synthetic Param ops that // were injected for free captures. let container_param = match trace_to_param(container_val, ssa, &inst_map, &mut HashSet::new()) { Some(idx) if idx < formal_param_count => idx, _ => continue, }; // Go container ops are plain function calls with the container // at args[0]; value args start at args[1]. Other languages // place the container on the receiver channel so args holds // only value args starting at index 0. let arg_offset = if lang == Lang::Go && receiver.is_none() { 1usize } else { 0 }; // Trace each value arg to param (same arity filter as above). for &va_idx in &op { let effective_idx = va_idx + arg_offset; if let Some(arg_vals) = args.get(effective_idx) { for &av in arg_vals { if let Some(src_param) = trace_to_param(av, ssa, &inst_map, &mut HashSet::new()) && src_param < formal_param_count && src_param != container_param && !container_store.contains(&(src_param, container_param)) { container_store.push((src_param, container_param)); } } } } } } } let mut ctr: Vec = container_to_return.into_iter().collect(); ctr.sort(); container_store.sort(); (ctr, container_store) }