mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* docs: Enhance module documentation across various files for clarity and completeness * fix: Remove unnecessary blank line in build.rs for cleaner code * docs: Update documentation to improve clarity and consistency in code comments
1263 lines
54 KiB
Rust
1263 lines
54 KiB
Rust
//! SSA function-summary and container-flow extraction.
|
|
//!
|
|
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
|
//! * [`extract_ssa_func_summary`], runs per-parameter taint probes and
|
|
//! synthesises an [`crate::summary::ssa_summary::SsaFuncSummary`] with
|
|
//! source caps, return transforms, per-path transforms, and sink site
|
|
//! attribution.
|
|
//! * [`extract_container_flow_summary`], structural scan for
|
|
//! `param_container_to_return` + `param_to_container_store` pairs.
|
|
//! * Private helpers for predicate-hash summarisation, abstract-transfer
|
|
//! derivation, callback source detection, and return-type inference.
|
|
|
|
use super::events::extract_sink_arg_positions;
|
|
use super::state::{BindingKey, SsaTaintState};
|
|
use super::{
|
|
SsaTaintEvent, SsaTaintTransfer, detect_variant_inner_fact, run_ssa_taint_full, transfer_block,
|
|
transfer_inst,
|
|
};
|
|
|
|
use crate::cfg::{BodyId, Cfg, FuncSummaries};
|
|
use crate::labels::{Cap, SourceKind};
|
|
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator};
|
|
use crate::summary::GlobalSummaries;
|
|
use crate::symbol::Lang;
|
|
use crate::taint::domain::{TaintOrigin, VarTaint};
|
|
use petgraph::graph::NodeIndex;
|
|
use smallvec::SmallVec;
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
/// Maximum number of parameters to probe for summary extraction.
|
|
/// Functions with more params fall back to legacy `FuncSummary`.
|
|
const MAX_PROBE_PARAMS: usize = 8;
|
|
|
|
/// Extract a precise per-parameter `SsaFuncSummary` from an already-lowered SSA body.
|
|
///
|
|
/// For each parameter (up to `MAX_PROBE_PARAMS`), runs a taint probe by seeding
|
|
/// that parameter with `Cap::all()` via `global_seed` and observing what caps
|
|
/// survive to return positions and which sinks fire. A final probe with no params
|
|
/// tainted detects intrinsic source caps.
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn extract_ssa_func_summary(
|
|
ssa: &SsaBody,
|
|
cfg: &Cfg,
|
|
local_summaries: &FuncSummaries,
|
|
global_summaries: Option<&GlobalSummaries>,
|
|
lang: Lang,
|
|
namespace: &str,
|
|
interner: &crate::state::symbol::SymbolInterner,
|
|
param_count: usize,
|
|
module_aliases: Option<&HashMap<SsaValue, SmallVec<[String; 2]>>>,
|
|
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
|
formal_param_names: Option<&[String]>,
|
|
formal_destructured_fields: Option<&[Vec<String>]>,
|
|
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
|
extract_ssa_func_summary_full(
|
|
ssa,
|
|
cfg,
|
|
local_summaries,
|
|
global_summaries,
|
|
lang,
|
|
namespace,
|
|
interner,
|
|
param_count,
|
|
module_aliases,
|
|
locator,
|
|
formal_param_names,
|
|
None,
|
|
formal_destructured_fields,
|
|
)
|
|
}
|
|
|
|
/// Like [`extract_ssa_func_summary`] but allows passing an in-progress
|
|
/// `ssa_summaries` map so the per-parameter probes can resolve callee
|
|
/// SSA summaries via step 0 of `resolve_callee_full`.
|
|
///
|
|
/// This enables transitive cross-function summary propagation: when a
|
|
/// caller's body references a callee whose summary was just augmented
|
|
/// by the closure-capture lift pass, the caller's probe sees the
|
|
/// augmented `param_to_sink` and can propagate it onto the caller's
|
|
/// own summary. Used by `lower_all_functions_from_bodies`'s second
|
|
/// extraction pass after `augment_summaries_with_child_sinks`.
|
|
#[allow(clippy::too_many_arguments)]
|
|
pub fn extract_ssa_func_summary_full(
|
|
ssa: &SsaBody,
|
|
cfg: &Cfg,
|
|
local_summaries: &FuncSummaries,
|
|
global_summaries: Option<&GlobalSummaries>,
|
|
lang: Lang,
|
|
namespace: &str,
|
|
interner: &crate::state::symbol::SymbolInterner,
|
|
param_count: usize,
|
|
module_aliases: Option<&HashMap<SsaValue, SmallVec<[String; 2]>>>,
|
|
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
|
formal_param_names: Option<&[String]>,
|
|
ssa_summaries: Option<
|
|
&HashMap<crate::symbol::FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
|
|
>,
|
|
// Per-parameter destructured-binding sibling names. Entry `i` is
|
|
// the list of field names destructured by the same call-site arg
|
|
// slot as the primary `formal_param_names[i]`, excluding the
|
|
// primary name. Empty vec for non-destructured params; `None` for
|
|
// callers that don't carry destructure info (legacy / test paths).
|
|
// Drives the destructured-arg expansion in the per-param probe so
|
|
// taint flow through sibling bindings is visible to summary
|
|
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
|
|
formal_destructured_fields: Option<&[Vec<String>]>,
|
|
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
|
use crate::summary::SinkSite;
|
|
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
|
|
|
let effective_params = param_count.min(MAX_PROBE_PARAMS);
|
|
|
|
// Collect (param_index, var_name, ssa_value) from the SSA body
|
|
let mut param_info: Vec<(usize, String, SsaValue)> = Vec::new();
|
|
for block in &ssa.blocks {
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
|
if let SsaOp::Param { index } = &inst.op {
|
|
if *index < effective_params {
|
|
if let Some(name) = inst.var_name.as_ref() {
|
|
param_info.push((*index, name.clone(), inst.value));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Identify return-reaching blocks
|
|
let return_blocks: Vec<usize> = ssa
|
|
.blocks
|
|
.iter()
|
|
.enumerate()
|
|
.filter(|(_, b)| matches!(b.terminator, Terminator::Return(_)))
|
|
.map(|(i, _)| i)
|
|
.collect();
|
|
|
|
// Collect all param SSA values to exclude from return cap collection.
|
|
// Param values persist with their seeded taint throughout the function ,
|
|
// we only want caps on derived values (call results, assigns) at return.
|
|
let all_param_values: std::collections::HashSet<SsaValue> =
|
|
param_info.iter().map(|(_, _, v)| *v).collect();
|
|
|
|
// Per-return-block observation captured alongside the aggregate return
|
|
// caps. Each entry records one return block's exit state, caps
|
|
// contributed on that path, path-predicate hash, known_true/false bits,
|
|
// and the return SSA value's abstract fact, so the per-param loop can
|
|
// emit one [`ReturnPathTransform`] per distinct predicate gate.
|
|
struct ReturnBlockObs {
|
|
/// Caps at the return SSA value (or joined live values for
|
|
/// implicit returns) on this block's exit.
|
|
derived_caps: Cap,
|
|
/// Caps collected from parameter values reaching this return
|
|
/// (passthrough fallback).
|
|
param_caps: Cap,
|
|
/// Deterministic hash of the predicate gate at this return.
|
|
/// `0` means "no predicate gate", an unguarded return.
|
|
predicate_hash: u64,
|
|
/// `PredicateSummary::known_true` bits intersected across all
|
|
/// tracked variables at this return. Encoded via
|
|
/// [`crate::taint::domain::predicate_kind_bit`].
|
|
known_true: u8,
|
|
/// `PredicateSummary::known_false` bits at this return.
|
|
known_false: u8,
|
|
/// Abstract fact on the return SSA value at this return (None
|
|
/// when Top or abstract interp disabled).
|
|
abstract_value: Option<crate::abstract_interp::AbstractValue>,
|
|
/// [`crate::abstract_interp::PathFact`] on the return SSA value
|
|
/// at this block's exit. Top when abstract interp is disabled
|
|
/// or no narrowing was proved on this path.
|
|
path_fact: crate::abstract_interp::PathFact,
|
|
/// Inner [`PathFact`] when the rv on this path is a one-arg
|
|
/// variant constructor; [`None`] otherwise.
|
|
variant_inner_fact: Option<crate::abstract_interp::PathFact>,
|
|
/// `true` when the per-param probe's seeded parameter var_name
|
|
/// is in this return block's exit `validated_must`. `false`
|
|
/// for the baseline (no-seed) probe and for params not
|
|
/// validated on this path. Drives
|
|
/// `validated_params_to_return` summary extraction.
|
|
param_validated_must: bool,
|
|
}
|
|
|
|
// Helper: run a taint probe with a given global_seed and return
|
|
// the aggregate return caps, sink events, joined return abstract,
|
|
// and the per-return-block observation list used to derive
|
|
// per-return-path transforms.
|
|
//
|
|
// `probe_param_names` lists the seeded parameter's `var_name`
|
|
// plus any destructured-binding siblings sharing the slot
|
|
// (`None` for the baseline source-caps probe). When non-empty,
|
|
// each return-block observation records whether ANY of those
|
|
// names is in the exit state's `validated_must`, which feeds
|
|
// `validated_params_to_return` summary extraction below. The
|
|
// any-name semantics matches the slot-wide model: a destructured
|
|
// formal `({ a, b, c })` represents one call-site slot, and any
|
|
// sibling reaching `validated_must` proves the slot's caps were
|
|
// narrowed before reaching the return.
|
|
let run_probe = |seed: HashMap<BindingKey, VarTaint>,
|
|
probe_param_names: Option<&[&str]>|
|
|
-> (
|
|
Cap,
|
|
Vec<SsaTaintEvent>,
|
|
Option<crate::abstract_interp::AbstractValue>,
|
|
Vec<ReturnBlockObs>,
|
|
) {
|
|
let seed_ref = if seed.is_empty() { None } else { Some(&seed) };
|
|
let transfer = SsaTaintTransfer {
|
|
lang,
|
|
namespace,
|
|
interner,
|
|
local_summaries,
|
|
global_summaries,
|
|
interop_edges: &[],
|
|
owner_body_id: BodyId(0),
|
|
parent_body_id: None,
|
|
global_seed: seed_ref,
|
|
param_seed: None,
|
|
receiver_seed: None,
|
|
const_values: None,
|
|
type_facts: None,
|
|
ssa_summaries,
|
|
extra_labels: None,
|
|
base_aliases: None,
|
|
callee_bodies: None,
|
|
inline_cache: None,
|
|
context_depth: 0,
|
|
callback_bindings: None,
|
|
points_to: None,
|
|
dynamic_pts: None,
|
|
import_bindings: None,
|
|
promisify_aliases: None,
|
|
module_aliases,
|
|
static_map: None,
|
|
auto_seed_handler_params: false,
|
|
cross_file_bodies: None,
|
|
pointer_facts: None,
|
|
};
|
|
|
|
let (events, block_states) = run_ssa_taint_full(ssa, cfg, &transfer);
|
|
|
|
// Collect surviving caps at return blocks.
|
|
// Separate param values from derived values: derived values give
|
|
// more precise transforms (they reflect function-internal sanitization).
|
|
// If only param values reach return → pure passthrough (Identity).
|
|
let mut total_derived_caps = Cap::empty();
|
|
let mut total_param_caps = Cap::empty();
|
|
// Extract abstract value of the return SSA value.
|
|
let mut return_abstract: Option<crate::abstract_interp::AbstractValue> = None;
|
|
// Per-return-block observations for per-path transforms.
|
|
let mut per_return: Vec<ReturnBlockObs> = Vec::with_capacity(return_blocks.len());
|
|
for &bid in &return_blocks {
|
|
if let Some(entry) = &block_states[bid] {
|
|
let empty_induction = HashSet::new();
|
|
let exit = transfer_block(
|
|
&ssa.blocks[bid],
|
|
cfg,
|
|
ssa,
|
|
&transfer,
|
|
entry.clone(),
|
|
&empty_induction,
|
|
None,
|
|
);
|
|
|
|
let ret_val = match &ssa.blocks[bid].terminator {
|
|
Terminator::Return(rv) => rv.as_ref().copied(),
|
|
_ => None,
|
|
};
|
|
|
|
let mut block_derived_caps = Cap::empty();
|
|
let mut block_param_caps = Cap::empty();
|
|
|
|
if let Some(rv) = ret_val {
|
|
// Explicit return value: use only its taint for derived_caps.
|
|
// If rv has no taint entry, this block contributes no derived caps.
|
|
if let Some(taint) = exit.get(rv) {
|
|
if all_param_values.contains(&rv) {
|
|
block_param_caps |= taint.caps;
|
|
} else {
|
|
block_derived_caps |= taint.caps;
|
|
}
|
|
}
|
|
// When rv is not a param value, also collect param taint as a
|
|
// fallback. The SSA terminator's rv may point to the last body
|
|
// instruction (e.g. push/append result) rather than the actual
|
|
// return expression (the container parameter itself). This fires
|
|
// both when rv is tainted (derived) and when rv is untainted
|
|
// (the push result may have no taint but the param does).
|
|
// Skip when rv IS a param (already handled above) or when rv is
|
|
// a Const (provably untainted constant return).
|
|
let rv_is_const = ssa.blocks[bid]
|
|
.body
|
|
.iter()
|
|
.chain(ssa.blocks[bid].phis.iter())
|
|
.any(|inst| inst.value == rv && matches!(inst.op, SsaOp::Const(_)));
|
|
if !all_param_values.contains(&rv) && !rv_is_const {
|
|
for (val, taint) in &exit.values {
|
|
if all_param_values.contains(val) {
|
|
block_param_caps |= taint.caps;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Return(None): implicit return, fall back to all live values.
|
|
for (val, taint) in &exit.values {
|
|
if all_param_values.contains(val) {
|
|
block_param_caps |= taint.caps;
|
|
} else {
|
|
block_derived_caps |= taint.caps;
|
|
}
|
|
}
|
|
}
|
|
|
|
total_derived_caps |= block_derived_caps;
|
|
total_param_caps |= block_param_caps;
|
|
|
|
// Abstract return: use terminator's return value when available,
|
|
// fall back to last instruction heuristic for Return(None).
|
|
let mut block_abs: Option<crate::abstract_interp::AbstractValue> = None;
|
|
let mut block_path_fact = crate::abstract_interp::PathFact::top();
|
|
let mut block_variant_inner: Option<crate::abstract_interp::PathFact> = None;
|
|
if let Some(ref abs) = exit.abstract_state {
|
|
let abs_rv = ret_val.or_else(|| {
|
|
ssa.blocks[bid]
|
|
.body
|
|
.last()
|
|
.or_else(|| ssa.blocks[bid].phis.last())
|
|
.map(|inst| inst.value)
|
|
});
|
|
if let Some(rv) = abs_rv {
|
|
let av = abs.get(rv);
|
|
block_path_fact = av.path.clone();
|
|
if !av.is_top() {
|
|
block_abs = Some(av.clone());
|
|
return_abstract = Some(match return_abstract {
|
|
None => av,
|
|
Some(prev) => prev.join(&av),
|
|
});
|
|
}
|
|
block_variant_inner = detect_variant_inner_fact(rv, ssa, &exit);
|
|
}
|
|
}
|
|
|
|
// Derive a predicate hash + known-true/false
|
|
// intersection across tracked variables at this return.
|
|
// The hash is stable across runs for a given predicate
|
|
// shape so call sites can compare paths deterministically.
|
|
let (predicate_hash, known_true, known_false) = summarise_return_predicates(&exit);
|
|
let param_validated_must = match probe_param_names {
|
|
Some(names) => names.iter().any(|name| match interner.get(name) {
|
|
Some(sym) => exit.validated_must.contains(sym),
|
|
None => false,
|
|
}),
|
|
None => false,
|
|
};
|
|
per_return.push(ReturnBlockObs {
|
|
derived_caps: block_derived_caps,
|
|
param_caps: block_param_caps,
|
|
predicate_hash,
|
|
known_true,
|
|
known_false,
|
|
abstract_value: block_abs,
|
|
path_fact: block_path_fact,
|
|
variant_inner_fact: block_variant_inner,
|
|
param_validated_must,
|
|
});
|
|
}
|
|
}
|
|
|
|
// Prefer derived caps; fall back to param caps for passthrough functions
|
|
let return_caps = if !total_derived_caps.is_empty() {
|
|
total_derived_caps
|
|
} else {
|
|
total_param_caps
|
|
};
|
|
|
|
// Drop return_abstract if it joined to Top
|
|
let return_abstract = return_abstract.filter(|v| !v.is_top());
|
|
|
|
(return_caps, events, return_abstract, per_return)
|
|
};
|
|
|
|
// Probe with no params tainted → detect source_caps + return abstract.
|
|
// Abstract values don't depend on taint seeding, so the baseline probe
|
|
// captures the function's intrinsic abstract return value.
|
|
let (baseline_return_caps, _baseline_events, return_abstract, baseline_obs) =
|
|
run_probe(HashMap::new(), None);
|
|
let source_caps = baseline_return_caps;
|
|
|
|
// Per-return-path PathFact decomposition derived from the baseline
|
|
// probe (no seeded taint). Abstract facts on the return rv are
|
|
// independent of taint seeding, they describe the function's
|
|
// intrinsic narrowing, so the baseline run captures them without
|
|
// per-param noise.
|
|
//
|
|
// Emitted only when ≥2 return-block entries have distinct predicate
|
|
// hashes *and* at least one entry carries non-Top signal (fact or
|
|
// variant_inner_fact). A uniform all-Top list adds bytes without
|
|
// helping any caller.
|
|
let mut return_path_facts: SmallVec<[crate::summary::ssa_summary::PathFactReturnEntry; 2]> =
|
|
SmallVec::new();
|
|
if baseline_obs.len() >= 2 {
|
|
let mut merged: SmallVec<[crate::summary::ssa_summary::PathFactReturnEntry; 2]> =
|
|
SmallVec::new();
|
|
for obs in &baseline_obs {
|
|
let entry = crate::summary::ssa_summary::PathFactReturnEntry {
|
|
predicate_hash: obs.predicate_hash,
|
|
known_true: obs.known_true,
|
|
known_false: obs.known_false,
|
|
path_fact: obs.path_fact.clone(),
|
|
variant_inner_fact: obs.variant_inner_fact.clone(),
|
|
};
|
|
crate::summary::ssa_summary::merge_path_fact_return_paths(&mut merged, &[entry]);
|
|
}
|
|
let distinct_hashes = merged
|
|
.iter()
|
|
.map(|e| e.predicate_hash)
|
|
.collect::<std::collections::HashSet<_>>();
|
|
let has_signal = merged
|
|
.iter()
|
|
.any(|e| !e.path_fact.is_top() || e.variant_inner_fact.is_some());
|
|
if distinct_hashes.len() >= 2 && has_signal {
|
|
return_path_facts = merged;
|
|
}
|
|
}
|
|
|
|
// Probe each param
|
|
let mut param_to_return = Vec::new();
|
|
let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new();
|
|
let mut param_to_sink_param = Vec::new();
|
|
// Per-param gate-filter cap masks lifted from inner multi-gate sink calls.
|
|
// Populated when the per-param probe reaches a sink whose CFG node carries
|
|
// [`crate::cfg::CallMeta::gate_filters`] with more than one entry, the
|
|
// multi-gate dispatch in `collect_block_events` has already cap-narrowed
|
|
// `event.sink_caps` to the matching gate's `label_caps`, so we record the
|
|
// pair as-is. Cross-file callers consume this list to preserve per-position
|
|
// cap attribution through wrapper functions like
|
|
// `fn forward(url, body) { fetch(url, {body}) }`.
|
|
let mut param_to_gate_filters: Vec<(usize, Cap)> = Vec::new();
|
|
// Per-param return-path decomposition. Populated only when the param
|
|
// has ≥2 distinct return-block predicate hashes, a single-return-path
|
|
// callee is already precise via `param_to_return`.
|
|
let mut param_return_paths: Vec<(
|
|
usize,
|
|
SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>,
|
|
)> = Vec::new();
|
|
// Parameter indices whose taint flow to the return is fully
|
|
// validated by a dominating predicate on every return path.
|
|
// Populated below by checking each per-param probe's return-block
|
|
// exit states for `validated_must` containing the param's
|
|
// var_name. Empty when no parameter is validated.
|
|
let mut validated_params_to_return: SmallVec<[usize; 2]> = SmallVec::new();
|
|
|
|
for &(idx, ref var_name, _ssa_val) in ¶m_info {
|
|
let mut seed = HashMap::new();
|
|
let origin = TaintOrigin {
|
|
node: NodeIndex::new(0), // synthetic origin for probing
|
|
source_kind: SourceKind::UserInput,
|
|
source_span: None,
|
|
};
|
|
let probe_taint = VarTaint {
|
|
caps: Cap::all(),
|
|
origins: SmallVec::from_elem(origin, 1),
|
|
uses_summary: false,
|
|
};
|
|
seed.insert(
|
|
BindingKey::new(var_name.as_str(), BodyId(0)),
|
|
probe_taint.clone(),
|
|
);
|
|
|
|
// Destructured-arg sibling expansion. When the formal at slot
|
|
// `idx` destructures an object pattern (`({ column, operator,
|
|
// value })`), the SSA body emits a separate [`SsaOp::Param`]
|
|
// for every destructured binding (sequential indices > slot
|
|
// count, since the closure-capture pass treats them as
|
|
// free-identifier reads). The call-site only passes ONE arg
|
|
// for the slot, so the engine never seeds the sibling Param
|
|
// ops at runtime — but the per-parameter SUMMARY probe must
|
|
// model "if this slot is tainted then every binding it
|
|
// produced is tainted too". Seed each sibling's `var_name`
|
|
// with the same caps the primary received. The probe-level
|
|
// `validated_must` check below treats the slot as validated
|
|
// when ANY sibling lands in `validated_must` on a return path.
|
|
//
|
|
// Closes the residual gap behind CVE-2026-25544 (PayloadCMS
|
|
// `@payloadcms/drizzle` SQLi via `createJSONQuery({ value })`):
|
|
// the validator helper `sanitizeValue(value, operator)` lives
|
|
// inside the body and the probe needs to see `value` flow
|
|
// through the `validated_params_to_return` channel before
|
|
// suppressing the caller's sink.
|
|
let slot_siblings: &[String] = formal_destructured_fields
|
|
.and_then(|d| d.get(idx))
|
|
.map(|v| v.as_slice())
|
|
.unwrap_or(&[]);
|
|
for sib in slot_siblings {
|
|
seed.insert(
|
|
BindingKey::new(sib.as_str(), BodyId(0)),
|
|
probe_taint.clone(),
|
|
);
|
|
}
|
|
|
|
// Phantom-Param prefix seeding. SSA lowering of arrow / nested
|
|
// function bodies often exposes free-identifier member-access
|
|
// expressions (e.g. `file._source.uri`) as their own
|
|
// [`SsaOp::Param`] ops with composite `var_name`s like
|
|
// `"file._source.uri"`. These phantom Params are the values
|
|
// actually used as call arguments, not the formal-param SSA
|
|
// value the seed targets. Without this, the per-param probe
|
|
// misses cross-call sinks because the call's arg SSA value is
|
|
// a phantom Param with no seed entry, so `transfer_inst::Param`
|
|
// leaves it untainted and `collect_tainted_sink_values`
|
|
// observes empty caps despite the formal param being seeded.
|
|
//
|
|
// Seed every phantom Param whose `var_name` begins with
|
|
// `formal_var_name + "."` with the same caps the formal param
|
|
// received: semantically "if `file` is tainted, then every
|
|
// observable field path on `file` is tainted too". Bounded
|
|
// by SSA size; cap-equivalent to direct seeding. Mirror this
|
|
// for each destructured sibling (`value.foo` / `column.name`
|
|
// member-projections inside the body).
|
|
let prefixes: Vec<String> = std::iter::once(var_name.clone())
|
|
.chain(slot_siblings.iter().cloned())
|
|
.map(|n| format!("{}.", n))
|
|
.collect();
|
|
for block in &ssa.blocks {
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
|
if let SsaOp::Param { .. } = &inst.op {
|
|
if let Some(name) = inst.var_name.as_ref() {
|
|
if prefixes.iter().any(|p| name.starts_with(p)) {
|
|
seed.insert(
|
|
BindingKey::new(name.as_str(), BodyId(0)),
|
|
probe_taint.clone(),
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Build slot-wide name list for the validated_must check.
|
|
// Primary first, then siblings, then heap-allocated owned
|
|
// copies — `run_probe` only borrows for its inner loop.
|
|
let mut slot_names: Vec<&str> = Vec::with_capacity(1 + slot_siblings.len());
|
|
slot_names.push(var_name.as_str());
|
|
for sib in slot_siblings {
|
|
slot_names.push(sib.as_str());
|
|
}
|
|
let (return_caps, events, _, per_return_obs) = run_probe(seed, Some(slot_names.as_slice()));
|
|
|
|
// Subtract baseline source_caps, we only want param-contributed caps
|
|
let param_return_caps = return_caps & !source_caps;
|
|
|
|
if !param_return_caps.is_empty() {
|
|
let stripped = Cap::all() & !param_return_caps;
|
|
let transform = if stripped.is_empty() {
|
|
TaintTransform::Identity
|
|
} else {
|
|
TaintTransform::StripBits(stripped)
|
|
};
|
|
param_to_return.push((idx, transform));
|
|
}
|
|
|
|
// Validated-param-to-return detection.
|
|
//
|
|
// When the per-param probe shows that the parameter's
|
|
// `var_name` is in `validated_must` on every return path that
|
|
// *carries the parameter's contributed caps*, record the
|
|
// parameter as validated. The caller will mark each tainted
|
|
// argument passed to this position — and the call's own
|
|
// return value — as `validated_must` / `validated_may`, the
|
|
// same way an inline `if (!regex.test(x)) throw` would
|
|
// validate the surviving branch.
|
|
//
|
|
// Conservative gating:
|
|
// * Skip when the param contributes no caps to the return,
|
|
// a degenerate "validated but irrelevant" record.
|
|
// * Skip when no return block was observed (probes that
|
|
// diverged or hit `MAX_PROBE_PARAMS`).
|
|
// * Require validation on every return path that *carries
|
|
// param caps to the return*. Branches that return
|
|
// constants (e.g. `if (x === null) return 'NULL'`) carry
|
|
// no param taint and don't need a validation predicate.
|
|
// * Require ≥1 path that actually validates the param.
|
|
if !param_return_caps.is_empty() && !per_return_obs.is_empty() {
|
|
let mut any_carrying_path = false;
|
|
let all_carrying_validated = per_return_obs.iter().all(|obs| {
|
|
let carries = !(obs.derived_caps & !source_caps).is_empty()
|
|
|| !(obs.param_caps & !source_caps).is_empty();
|
|
if carries {
|
|
any_carrying_path = true;
|
|
obs.param_validated_must
|
|
} else {
|
|
true
|
|
}
|
|
});
|
|
if any_carrying_path && all_carrying_validated {
|
|
validated_params_to_return.push(idx);
|
|
}
|
|
}
|
|
|
|
// Derive per-return-path decomposition. For each
|
|
// observed return block, derive a `ReturnPathTransform` mirroring
|
|
// the aggregate logic (prefer derived caps, fall back to param
|
|
// caps, strip baseline source caps). Only emit when ≥2 distinct
|
|
// predicate hashes are present, a single-hash summary adds no
|
|
// signal over the aggregate `param_to_return`.
|
|
if per_return_obs.len() >= 2 {
|
|
let mut per_path: SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]> =
|
|
SmallVec::new();
|
|
for obs in &per_return_obs {
|
|
let block_return_caps = if !obs.derived_caps.is_empty() {
|
|
obs.derived_caps
|
|
} else {
|
|
obs.param_caps
|
|
};
|
|
let block_contributed = block_return_caps & !source_caps;
|
|
let transform_kind = if block_contributed.is_empty() {
|
|
// No caps on this path, param does not reach return
|
|
// under this predicate. A `StripBits(all)` records
|
|
// "all bits cleared" so downstream join preserves the
|
|
// disparity with other paths.
|
|
TaintTransform::StripBits(Cap::all())
|
|
} else {
|
|
let stripped = Cap::all() & !block_contributed;
|
|
if stripped.is_empty() {
|
|
TaintTransform::Identity
|
|
} else {
|
|
TaintTransform::StripBits(stripped)
|
|
}
|
|
};
|
|
crate::summary::ssa_summary::merge_return_paths(
|
|
&mut per_path,
|
|
&[crate::summary::ssa_summary::ReturnPathTransform {
|
|
transform: transform_kind,
|
|
path_predicate_hash: obs.predicate_hash,
|
|
known_true: obs.known_true,
|
|
known_false: obs.known_false,
|
|
abstract_contribution: obs.abstract_value.clone(),
|
|
}],
|
|
);
|
|
}
|
|
// Only record when ≥2 distinct predicate gates survived
|
|
// the dedup (a single-entry vector is no finer than the
|
|
// aggregate `param_to_return` and wastes bytes on disk).
|
|
let distinct_hashes = per_path
|
|
.iter()
|
|
.map(|e| e.path_predicate_hash)
|
|
.collect::<std::collections::HashSet<_>>();
|
|
if distinct_hashes.len() >= 2 {
|
|
param_return_paths.push((idx, per_path));
|
|
}
|
|
}
|
|
|
|
// Collect sink caps + primary-location sites from events + per-arg-position detail.
|
|
//
|
|
// Skip events flagged `all_validated`: every tainted SSA value
|
|
// that reached the sink was already proved validated by a
|
|
// dominating predicate (AllowlistCheck / TypeCheck /
|
|
// ValidationCall, including the indirect-validator branch
|
|
// narrowing for `validate*` / `is_valid*` callees). Those
|
|
// events would have been dropped by `ssa_events_to_findings` at
|
|
// the per-file finding step; carrying them into
|
|
// `param_to_sink` / `param_to_sink_param` re-publishes a sink
|
|
// attribution callers can no longer suppress, because the
|
|
// caller can't see the validator that lives inside the
|
|
// callee body.
|
|
//
|
|
// Strict-additive: `all_validated` is set only when every
|
|
// tainted operand at the sink has its `var_name` in
|
|
// `state.validated_may`, single-path single-validator helpers
|
|
// cleanly skip; mixed-tainted-with-some-unvalidated events
|
|
// still propagate. Closes the helper-summary precision gap
|
|
// surfaced by Novu CVE GHSA-4x48-cgf9-q33f.
|
|
let mut param_sites: SmallVec<[SinkSite; 1]> = SmallVec::new();
|
|
for event in &events {
|
|
if event.all_validated {
|
|
continue;
|
|
}
|
|
for pos in extract_sink_arg_positions(event, ssa) {
|
|
param_to_sink_param.push((idx, pos, event.sink_caps));
|
|
}
|
|
// Per-position gate-filter cap lifting.
|
|
//
|
|
// When the sink callee carries multiple gate filters (e.g. `fetch`
|
|
// is both an SSRF gate on the URL arg and a `DATA_EXFIL` gate on
|
|
// the body arg), the multi-gate dispatch has already filtered
|
|
// `event.sink_caps` down to the specific gate's `label_caps` for
|
|
// this probe. Recording `(idx, event.sink_caps)` preserves that
|
|
// narrowing across the function-summary boundary so a caller of
|
|
// the wrapper splits SSRF from DATA_EXFIL findings instead of
|
|
// joining them under a single union.
|
|
//
|
|
// Single-gate / no-gate sinks are skipped, the existing
|
|
// `param_to_sink` machinery already records those without
|
|
// per-position cap conflict.
|
|
if !event.sink_caps.is_empty()
|
|
&& cfg[event.sink_node].call.gate_filters.len() > 1
|
|
&& !param_to_gate_filters
|
|
.iter()
|
|
.any(|&(i, c)| i == idx && c == event.sink_caps)
|
|
{
|
|
param_to_gate_filters.push((idx, event.sink_caps));
|
|
}
|
|
if event.sink_caps.is_empty() {
|
|
continue;
|
|
}
|
|
let site = match locator {
|
|
Some(loc) => {
|
|
loc.site_for_span(cfg[event.sink_node].classification_span(), event.sink_caps)
|
|
}
|
|
None => SinkSite::cap_only(event.sink_caps),
|
|
};
|
|
let key = site.dedup_key();
|
|
if !param_sites.iter().any(|s| s.dedup_key() == key) {
|
|
param_sites.push(site);
|
|
}
|
|
}
|
|
if !param_sites.is_empty() {
|
|
param_to_sink.push((idx, param_sites));
|
|
}
|
|
}
|
|
|
|
let (param_container_to_return, param_to_container_store) =
|
|
extract_container_flow_summary(ssa, lang, effective_params);
|
|
|
|
// Parameter-granularity points-to summary.
|
|
let points_to = crate::ssa::param_points_to::analyse_param_points_to(
|
|
ssa,
|
|
¶m_info,
|
|
effective_params,
|
|
formal_param_names,
|
|
Some(lang),
|
|
);
|
|
|
|
// Infer return type: scan return-reaching blocks for constructor calls.
|
|
let return_type = infer_summary_return_type(ssa, lang);
|
|
|
|
// Detect source_to_callback: internal source taint flowing to calls of
|
|
// parameter functions (e.g., `fn apply(f) { let x = source(); f(x); }`).
|
|
// Re-runs the baseline probe internally to get accurate taint state.
|
|
let source_to_callback = if !source_caps.is_empty() && !param_info.is_empty() {
|
|
let baseline_transfer = SsaTaintTransfer {
|
|
lang,
|
|
namespace,
|
|
interner,
|
|
local_summaries,
|
|
global_summaries,
|
|
interop_edges: &[],
|
|
owner_body_id: BodyId(0),
|
|
parent_body_id: None,
|
|
global_seed: None,
|
|
param_seed: None,
|
|
receiver_seed: None,
|
|
const_values: None,
|
|
type_facts: None,
|
|
ssa_summaries,
|
|
extra_labels: None,
|
|
base_aliases: None,
|
|
callee_bodies: None,
|
|
inline_cache: None,
|
|
context_depth: 0,
|
|
callback_bindings: None,
|
|
points_to: None,
|
|
dynamic_pts: None,
|
|
import_bindings: None,
|
|
promisify_aliases: None,
|
|
module_aliases: None,
|
|
static_map: None,
|
|
auto_seed_handler_params: false,
|
|
cross_file_bodies: None,
|
|
pointer_facts: None,
|
|
};
|
|
detect_source_to_callback_from_states(
|
|
ssa,
|
|
cfg,
|
|
source_caps,
|
|
¶m_info,
|
|
&baseline_transfer,
|
|
)
|
|
} else {
|
|
vec![]
|
|
};
|
|
|
|
// Per-parameter abstract-domain transfers.
|
|
//
|
|
// Derived structurally from the SSA body, no additional taint probes.
|
|
// Three-step inference per parameter:
|
|
// 1. Identity: return SSA value at every return block traces back to
|
|
// this parameter (possibly through assigns / phi merges all feeding
|
|
// from the same param).
|
|
// 2. Callee-intrinsic bound: baseline `return_abstract` carries a
|
|
// concrete fact (bounded interval or known prefix) that holds
|
|
// regardless of caller input, record it once per parameter as
|
|
// `Clamped` / `LiteralPrefix` so the caller sees the bound even
|
|
// when it has no abstract info on its own argument.
|
|
// 3. Top: default; the entry is omitted (empty transfer is meaningless).
|
|
let abstract_transfer = derive_abstract_transfer(ssa, ¶m_info, return_abstract.as_ref());
|
|
|
|
SsaFuncSummary {
|
|
param_to_return,
|
|
param_to_sink,
|
|
source_caps,
|
|
param_to_sink_param,
|
|
param_to_gate_filters,
|
|
param_container_to_return,
|
|
param_to_container_store,
|
|
return_type,
|
|
return_abstract,
|
|
source_to_callback,
|
|
receiver_to_return: None,
|
|
receiver_to_sink: Cap::empty(),
|
|
abstract_transfer,
|
|
param_return_paths,
|
|
return_path_facts,
|
|
points_to,
|
|
// extension, empty until the field-granularity
|
|
// extractor is wired (`NYX_POINTER_ANALYSIS=1` only). Default
|
|
// path stays bit-identical to today.
|
|
field_points_to: crate::summary::points_to::FieldPointsToSummary::empty(),
|
|
// Populated post-extraction in
|
|
// `taint::lower_all_functions_from_bodies` once SSA optimisation
|
|
// has computed `opt.type_facts`. Empty here means the
|
|
// extractor itself doesn't carry receiver-type info, the
|
|
// caller patches it in.
|
|
typed_call_receivers: Vec::new(),
|
|
validated_params_to_return,
|
|
}
|
|
}
|
|
|
|
/// Derive a deterministic predicate-hash + known-true/false intersection
|
|
/// for a return-block exit state.
|
|
///
|
|
/// The hash combines the sorted `(SymbolId, known_true, known_false)` tuples
|
|
/// from the state's `predicates` list with the validated_must bitmask. Two
|
|
/// return blocks whose predicate gates are observationally identical produce
|
|
/// the same hash; the intersection of known_true/false gives the bits that
|
|
/// hold on every path into each return block.
|
|
///
|
|
/// Returns `(0, 0, 0)` for a Top state (no predicates tracked).
|
|
pub(super) fn summarise_return_predicates(state: &SsaTaintState) -> (u64, u8, u8) {
|
|
use std::collections::hash_map::DefaultHasher;
|
|
use std::hash::{Hash, Hasher};
|
|
|
|
if state.predicates.is_empty() && state.validated_must.is_empty() {
|
|
return (0, 0, 0);
|
|
}
|
|
|
|
let mut h = DefaultHasher::new();
|
|
// Validated-must contributes deterministically via bits().
|
|
state.validated_must.bits().hash(&mut h);
|
|
// Sort by SymbolId (predicates list is already sorted by SsaTaintState
|
|
// invariants, but hash-input stability matters here).
|
|
let mut sorted: smallvec::SmallVec<[(u32, u8, u8); 4]> = state
|
|
.predicates
|
|
.iter()
|
|
.map(|(id, s)| (id.0, s.known_true, s.known_false))
|
|
.collect();
|
|
sorted.sort_by_key(|(id, _, _)| *id);
|
|
for (id, kt, kf) in &sorted {
|
|
id.hash(&mut h);
|
|
kt.hash(&mut h);
|
|
kf.hash(&mut h);
|
|
}
|
|
let hash = h.finish();
|
|
// Intersect known_true / known_false across all tracked variables:
|
|
// the bits that hold for EVERY predicate-tracked var at this return.
|
|
let known_true = sorted
|
|
.iter()
|
|
.map(|(_, kt, _)| *kt)
|
|
.fold(u8::MAX, |a, b| a & b);
|
|
let known_false = sorted
|
|
.iter()
|
|
.map(|(_, _, kf)| *kf)
|
|
.fold(u8::MAX, |a, b| a & b);
|
|
// Use `1` for the "no predicates but validated_must non-empty" case to
|
|
// avoid colliding with the unguarded sentinel (0).
|
|
let hash = if hash == 0 { 1 } else { hash };
|
|
(hash, known_true, known_false)
|
|
}
|
|
|
|
/// Derive per-parameter [`AbstractTransfer`] entries for a function's SSA
|
|
/// body.
|
|
///
|
|
/// `return_abstract` is the callee's intrinsic baseline (from the no-seed
|
|
/// probe). When present, it describes a fact that holds for the return
|
|
/// regardless of parameter input, so it can be attached as a
|
|
/// `Clamped` / `LiteralPrefix` transform to every parameter that flows to
|
|
/// the return.
|
|
///
|
|
/// Identity detection is structural: walk the return values back through
|
|
/// [`SsaOp::Assign`] / [`SsaOp::Phi`] chains (bounded) and check whether
|
|
/// every leaf resolves to the same [`SsaOp::Param`]. The trace is cheap
|
|
/// and can only produce `Identity` for passthrough callees, anything
|
|
/// more complex degrades to the baseline fact or `Top`.
|
|
fn derive_abstract_transfer(
|
|
ssa: &SsaBody,
|
|
param_info: &[(usize, String, SsaValue)],
|
|
return_abstract: Option<&crate::abstract_interp::AbstractValue>,
|
|
) -> Vec<(usize, crate::abstract_interp::AbstractTransfer)> {
|
|
use crate::abstract_interp::{AbstractTransfer, IntervalTransfer, StringTransfer};
|
|
|
|
if param_info.is_empty() {
|
|
return Vec::new();
|
|
}
|
|
|
|
// Build a lookup from SsaValue → defining op by scanning the body once.
|
|
let mut defs: HashMap<SsaValue, &SsaOp> = HashMap::new();
|
|
for block in &ssa.blocks {
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
|
defs.insert(inst.value, &inst.op);
|
|
}
|
|
}
|
|
|
|
// Trace an SSA value backwards to the single source parameter index it
|
|
// resolves to, if any. Returns `None` when the trace diverges, hits a
|
|
// non-pass-through op, or exceeds the depth bound.
|
|
fn trace_to_param(
|
|
v: SsaValue,
|
|
defs: &HashMap<SsaValue, &SsaOp>,
|
|
depth: usize,
|
|
) -> Option<usize> {
|
|
const MAX_DEPTH: usize = 8;
|
|
if depth > MAX_DEPTH {
|
|
return None;
|
|
}
|
|
match defs.get(&v)? {
|
|
SsaOp::Param { index } => Some(*index),
|
|
SsaOp::Assign(ops) if ops.len() == 1 => trace_to_param(ops[0], defs, depth + 1),
|
|
SsaOp::Phi(preds) => {
|
|
let mut result: Option<usize> = None;
|
|
for (_, pv) in preds {
|
|
let p = trace_to_param(*pv, defs, depth + 1)?;
|
|
match result {
|
|
None => result = Some(p),
|
|
Some(existing) if existing == p => {}
|
|
Some(_) => return None,
|
|
}
|
|
}
|
|
result
|
|
}
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
// For every return block, trace its return value and record which
|
|
// parameter (if any) it resolves to. If all return blocks agree on the
|
|
// same parameter index, that parameter has `Identity`. If they disagree
|
|
// (or some don't resolve), no parameter gets `Identity` and we fall
|
|
// back to baseline-derived forms.
|
|
let mut identity_param: Option<usize> = None;
|
|
let mut identity_consistent = true;
|
|
for block in &ssa.blocks {
|
|
if let Terminator::Return(Some(rv)) = &block.terminator {
|
|
let traced = trace_to_param(*rv, &defs, 0);
|
|
match (identity_param, traced) {
|
|
(None, Some(p)) => identity_param = Some(p),
|
|
(Some(existing), Some(p)) if existing == p => {}
|
|
_ => {
|
|
identity_consistent = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Derive a baseline-invariant transform from `return_abstract`. This is
|
|
// the "callee intrinsic" fact that always holds, each parameter that
|
|
// flows to the return gets it attached as the conservative transfer.
|
|
let baseline_invariant: Option<AbstractTransfer> = return_abstract.map(|av| {
|
|
let interval = match (av.interval.lo, av.interval.hi) {
|
|
(Some(lo), Some(hi)) if lo <= hi => IntervalTransfer::Clamped { lo, hi },
|
|
_ => IntervalTransfer::Top,
|
|
};
|
|
let string = match &av.string.prefix {
|
|
Some(p) if !p.is_empty() => StringTransfer::literal_prefix(p),
|
|
_ => StringTransfer::Unknown,
|
|
};
|
|
AbstractTransfer { interval, string }
|
|
});
|
|
|
|
let mut result: Vec<(usize, AbstractTransfer)> = Vec::new();
|
|
|
|
for (idx, _, _) in param_info {
|
|
let mut transfer = AbstractTransfer::top();
|
|
|
|
if identity_consistent && identity_param == Some(*idx) {
|
|
transfer.interval = IntervalTransfer::Identity;
|
|
transfer.string = StringTransfer::Identity;
|
|
} else if let Some(base) = baseline_invariant.as_ref() {
|
|
// Baseline intrinsic bound applies to every parameter that could
|
|
// reach the return. We conservatively attach it to all params
|
|
//, at apply time the caller meets it with the real return
|
|
// abstract (also from this same summary), so double-counting
|
|
// would collapse to the tighter of the two.
|
|
transfer = base.clone();
|
|
}
|
|
|
|
if !transfer.is_top() {
|
|
result.push((*idx, transfer));
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Detect callback patterns where internal source taint flows to a call of a
|
|
/// parameter function. Re-runs the baseline probe internally to get accurate
|
|
/// taint state at each instruction point.
|
|
///
|
|
/// Returns `(param_index_of_callee, source_caps)` pairs.
|
|
fn detect_source_to_callback_from_states(
|
|
ssa: &SsaBody,
|
|
cfg: &Cfg,
|
|
source_caps: Cap,
|
|
param_info: &[(usize, String, SsaValue)],
|
|
transfer: &SsaTaintTransfer,
|
|
) -> Vec<(usize, Cap)> {
|
|
use crate::ssa::ir::SsaOp;
|
|
|
|
// Map param var_name → param_index
|
|
let param_name_to_index: HashMap<&str, usize> = param_info
|
|
.iter()
|
|
.map(|(idx, name, _)| (name.as_str(), *idx))
|
|
.collect();
|
|
|
|
// Run taint analysis to get converged block states
|
|
let (_events, block_states) = run_ssa_taint_full(ssa, cfg, transfer);
|
|
|
|
let mut result: Vec<(usize, Cap)> = vec![];
|
|
for (bid, block) in ssa.blocks.iter().enumerate() {
|
|
let Some(entry_state) = &block_states[bid] else {
|
|
continue;
|
|
};
|
|
// Replay block transfer to get accurate taint state at each instruction
|
|
let mut state = entry_state.clone();
|
|
for inst in &block.body {
|
|
// Apply transfer for this instruction to advance state
|
|
transfer_inst(inst, cfg, ssa, transfer, &mut state);
|
|
|
|
// After transfer: check if this is a call to a param with tainted args
|
|
if let SsaOp::Call { callee, args, .. } = &inst.op {
|
|
if let Some(¶m_idx) = param_name_to_index.get(callee.as_str()) {
|
|
let any_arg_tainted = args.iter().any(|arg_vals| {
|
|
arg_vals
|
|
.iter()
|
|
.any(|v| state.get(*v).is_some_and(|t| !t.caps.is_empty()))
|
|
});
|
|
if any_arg_tainted && !result.iter().any(|(idx, _)| *idx == param_idx) {
|
|
result.push((param_idx, source_caps));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
result
|
|
}
|
|
|
|
/// Infer the return type of a function from its SSA body by checking whether
|
|
/// return-reaching blocks produce values from known constructor/factory calls.
|
|
fn infer_summary_return_type(
|
|
ssa: &SsaBody,
|
|
lang: Lang,
|
|
) -> Option<crate::ssa::type_facts::TypeKind> {
|
|
// Find blocks with Return terminators, then look at the last defined value
|
|
// in those blocks, if it's a Call with a known constructor, that's our type.
|
|
for block in &ssa.blocks {
|
|
if !matches!(block.terminator, Terminator::Return(_)) {
|
|
continue;
|
|
}
|
|
// Only inspect the very last instruction in the returning block.
|
|
if let Some(inst) = block.body.last()
|
|
&& let SsaOp::Call { callee, .. } = &inst.op
|
|
&& let Some(ty) = crate::ssa::type_facts::constructor_type(lang, callee)
|
|
{
|
|
return Some(ty);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
// ── Inter-procedural container flow detection (structural SSA analysis) ──
|
|
|
|
/// Build a map from SsaValue to its defining instruction.
|
|
fn build_inst_map(ssa: &SsaBody) -> HashMap<SsaValue, (SsaOp, Option<SsaValue>)> {
|
|
let mut map = HashMap::new();
|
|
for block in &ssa.blocks {
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
|
// Store the op and optionally the receiver for calls
|
|
map.insert(inst.value, (inst.op.clone(), None));
|
|
}
|
|
}
|
|
map
|
|
}
|
|
|
|
/// Trace an SSA value back through Assign/Phi chains to find if it originates
|
|
/// from a `Param { index }`. Returns `Some(index)` if a param is found.
|
|
/// Does NOT trace through Call, Const, Source, or other non-identity ops.
|
|
fn trace_to_param(
|
|
v: SsaValue,
|
|
ssa: &SsaBody,
|
|
inst_map: &HashMap<SsaValue, (SsaOp, Option<SsaValue>)>,
|
|
visited: &mut HashSet<SsaValue>,
|
|
) -> Option<usize> {
|
|
if !visited.insert(v) {
|
|
return None;
|
|
}
|
|
let (op, _) = inst_map.get(&v)?;
|
|
match op {
|
|
SsaOp::Param { index } => Some(*index),
|
|
SsaOp::Assign(uses) => {
|
|
for u in uses {
|
|
if let Some(idx) = trace_to_param(*u, ssa, inst_map, visited) {
|
|
return Some(idx);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
SsaOp::Phi(operands) => {
|
|
for (_, op_val) in operands {
|
|
if let Some(idx) = trace_to_param(*op_val, ssa, inst_map, visited) {
|
|
return Some(idx);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
// Don't trace through Call (new identity), Const, Source, Nop, CatchParam
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// Detect inter-procedural container flow patterns from SSA structure:
|
|
/// - `param_container_to_return`: params whose container identity flows to return
|
|
/// - `param_to_container_store`: (src_param, container_param) pairs where src taint
|
|
/// is stored into container_param's contents
|
|
pub(crate) fn extract_container_flow_summary(
|
|
ssa: &SsaBody,
|
|
lang: Lang,
|
|
formal_param_count: usize,
|
|
) -> (Vec<usize>, Vec<(usize, usize)>) {
|
|
use crate::ssa::pointsto::{ContainerOp, classify_container_op};
|
|
|
|
let inst_map = build_inst_map(ssa);
|
|
let mut container_to_return: HashSet<usize> = HashSet::new();
|
|
let mut container_store: Vec<(usize, usize)> = Vec::new();
|
|
|
|
// 1. param_container_to_return: trace Assign/Phi ops in return blocks to params.
|
|
//
|
|
// `trace_to_param` will happily return any `SsaOp::Param { index }`, but
|
|
// scoped lowering synthesises `Param` ops for external captures (module
|
|
// imports, free identifiers) at indices beyond the formal parameter count.
|
|
// Those must not enter the summary, the key's arity only covers formal
|
|
// params, and an out-of-range index trips `ssa_summary_fits_arity`, forcing
|
|
// the reconciliation probe to generate a synthetic disambiguator that no
|
|
// caller will ever look up.
|
|
for block in &ssa.blocks {
|
|
if !matches!(block.terminator, Terminator::Return(_)) {
|
|
continue;
|
|
}
|
|
for inst in block.phis.iter().chain(block.body.iter()) {
|
|
match &inst.op {
|
|
// Only trace identity-preserving ops (Assign, Phi).
|
|
// Skip Param (would cause false positives in single-block functions),
|
|
// Call (new identity), Const, Source, Nop, CatchParam.
|
|
SsaOp::Assign(_) | SsaOp::Phi(_) => {
|
|
if let Some(idx) =
|
|
trace_to_param(inst.value, ssa, &inst_map, &mut HashSet::new())
|
|
&& idx < formal_param_count
|
|
{
|
|
container_to_return.insert(idx);
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
// 2. param_to_container_store: find container Store calls, trace args to params
|
|
for block in &ssa.blocks {
|
|
for inst in block.body.iter() {
|
|
if let SsaOp::Call {
|
|
callee,
|
|
args,
|
|
receiver,
|
|
..
|
|
} = &inst.op
|
|
{
|
|
let op = match classify_container_op(callee, lang) {
|
|
Some(ContainerOp::Store { value_args, .. }) => value_args,
|
|
_ => continue,
|
|
};
|
|
|
|
// Resolve container SSA value. With the new call ABI, the
|
|
// receiver is a separate channel and `args` contains only
|
|
// positional arguments. For Go, container ops are plain
|
|
// function calls (no receiver), so args[0] is the container.
|
|
let container_val = if let Some(v) = *receiver {
|
|
Some(v)
|
|
} else if lang == Lang::Go {
|
|
args.first().and_then(|a| a.first().copied())
|
|
} else if let Some(dot_pos) = callee.rfind('.') {
|
|
let receiver_name = &callee[..dot_pos];
|
|
args.iter()
|
|
.flat_map(|a| a.iter())
|
|
.find(|&&v| {
|
|
ssa.value_defs
|
|
.get(v.0 as usize)
|
|
.and_then(|d| d.var_name.as_deref())
|
|
== Some(receiver_name)
|
|
})
|
|
.copied()
|
|
} else {
|
|
None
|
|
};
|
|
|
|
let container_val = match container_val {
|
|
Some(v) => v,
|
|
None => continue,
|
|
};
|
|
|
|
// Trace container to positional param (SelfParam → None, so
|
|
// when the container is the receiver we skip, the caller
|
|
// tracks that via `receiver_to_container_store` if needed).
|
|
// Same arity filter as above: reject synthetic Param ops that
|
|
// were injected for free captures.
|
|
let container_param =
|
|
match trace_to_param(container_val, ssa, &inst_map, &mut HashSet::new()) {
|
|
Some(idx) if idx < formal_param_count => idx,
|
|
_ => continue,
|
|
};
|
|
|
|
// Go container ops are plain function calls with the container
|
|
// at args[0]; value args start at args[1]. Other languages
|
|
// place the container on the receiver channel so args holds
|
|
// only value args starting at index 0.
|
|
let arg_offset = if lang == Lang::Go && receiver.is_none() {
|
|
1usize
|
|
} else {
|
|
0
|
|
};
|
|
|
|
// Trace each value arg to param (same arity filter as above).
|
|
for &va_idx in &op {
|
|
let effective_idx = va_idx + arg_offset;
|
|
if let Some(arg_vals) = args.get(effective_idx) {
|
|
for &av in arg_vals {
|
|
if let Some(src_param) =
|
|
trace_to_param(av, ssa, &inst_map, &mut HashSet::new())
|
|
&& src_param < formal_param_count
|
|
&& src_param != container_param
|
|
&& !container_store.contains(&(src_param, container_param))
|
|
{
|
|
container_store.push((src_param, container_param));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
let mut ctr: Vec<usize> = container_to_return.into_iter().collect();
|
|
ctr.sort();
|
|
container_store.sort();
|
|
(ctr, container_store)
|
|
}
|