use crate::abstract_interp::{AbstractTransfer, AbstractValue, PathFact}; use crate::labels::Cap; use crate::ssa::type_facts::TypeKind; use crate::summary::SinkSite; use crate::summary::points_to::{FieldPointsToSummary, PointsToSummary}; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; /// Per-parameter taint transform describing how taint flows through a function. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum TaintTransform { /// Parameter flows to return value unchanged. Identity, /// Parameter flows to return minus sanitizer bits. StripBits(Cap), /// Return value gains additional source bits regardless of input. AddBits(Cap), } /// Cap on per-parameter return-path entries. Overflow is joined into /// a single Top-predicate entry so callers always see a bounded vec. pub const MAX_RETURN_PATHS: usize = 8; /// One return-path entry in a per-parameter summary. Records the path /// predicate, the transform on that path, and optionally an abstract /// contribution. Callers apply only entries consistent with their /// caller-side path state. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ReturnPathTransform { /// Behavioural kind on this path (Identity / StripBits / AddBits). pub transform: TaintTransform, /// Deterministic hash of the path-predicate gate. `0` = no gate. /// Equivalent predicates collide and are joined. pub path_predicate_hash: u64, /// `known_true` predicate bits (bit 0 = NullCheck, 1 = EmptyCheck, /// 2 = ErrorCheck) that hold on every path into this return. pub known_true: u8, /// `known_false` bits at this return. pub known_false: u8, /// Abstract contribution when non-Top. Callers `meet` it with the /// caller-side abstract fact. #[serde(default, skip_serializing_if = "Option::is_none")] pub abstract_contribution: Option, } impl ReturnPathTransform { /// Dedup key. `abstract_contribution` is intentionally excluded ///, colliding entries join their abstract facts. pub fn dedup_key(&self) -> (u64, &TaintTransform, u8, u8) { ( self.path_predicate_hash, &self.transform, self.known_true, self.known_false, ) } } /// Merge `incoming` into `existing`, deduping by /// [`ReturnPathTransform::dedup_key`] and joining abstract contributions on /// collision. Caps the final vector at [`MAX_RETURN_PATHS`]; overflow is /// conservatively joined into a single Top-predicate entry. pub fn merge_return_paths( existing: &mut SmallVec<[ReturnPathTransform; 2]>, incoming: &[ReturnPathTransform], ) { for new_entry in incoming { let key = new_entry.dedup_key(); if let Some(slot) = existing.iter_mut().find(|e| e.dedup_key() == key) { slot.abstract_contribution = match ( slot.abstract_contribution.take(), &new_entry.abstract_contribution, ) { (Some(a), Some(b)) => Some(a.join(b)), (Some(a), None) => Some(a), (None, Some(b)) => Some(b.clone()), (None, None) => None, }; } else { existing.push(new_entry.clone()); } } if existing.len() > MAX_RETURN_PATHS { let mut joined = ReturnPathTransform { transform: TaintTransform::Identity, path_predicate_hash: 0, known_true: 0, known_false: 0, abstract_contribution: None, }; let mut strip_bits = Cap::all(); let mut add_bits = Cap::empty(); let mut saw_add = false; let mut abs: Option = None; let mut known_true = u8::MAX; let mut known_false = u8::MAX; for e in existing.iter() { match &e.transform { TaintTransform::Identity => { // Identity strips nothing; join intersects to empty. strip_bits = Cap::empty(); } TaintTransform::StripBits(bits) => strip_bits &= *bits, TaintTransform::AddBits(bits) => { add_bits |= *bits; saw_add = true; } } known_true &= e.known_true; known_false &= e.known_false; abs = match (abs, &e.abstract_contribution) { (None, None) => None, (Some(a), None) => Some(a), (None, Some(b)) => Some(b.clone()), (Some(a), Some(b)) => Some(a.join(b)), }; } joined.transform = if saw_add { TaintTransform::AddBits(add_bits) } else if strip_bits.is_empty() { TaintTransform::Identity } else { TaintTransform::StripBits(strip_bits) }; joined.known_true = known_true; joined.known_false = known_false; joined.abstract_contribution = abs; existing.clear(); existing.push(joined); } } /// Precise per-parameter SSA-derived function summary. /// /// Produced by running SSA taint analysis with each parameter individually /// seeded, then observing which caps survive to return/sink positions. /// This is more precise than the legacy `FuncSummary` bitmask approach /// because it can express per-parameter transforms (e.g., "param 0 flows /// to return but loses HTML_ESCAPE bits"). #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] pub struct SsaFuncSummary { /// Per-parameter flows to return value: (param_index, transform). pub param_to_return: Vec<(usize, TaintTransform)>, /// Per-parameter flows to internal sinks: each entry binds a parameter /// index to one or more [`SinkSite`]s inside this function's body. /// /// Carrying the callee's sink source-location through the summary /// enables primary sink-location attribution: cross-file findings /// attribute the finding to the actual dangerous instruction rather /// than to the call site. Each `SinkSite` records the bits (`cap`) it /// contributes, so consumers deriving a coarse `Cap` union across all /// sites for a given parameter remain behavior-compatible. #[serde(default)] pub param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)>, /// Source caps introduced regardless of parameters (e.g., function reads env). pub source_caps: Cap, /// Per-parameter flows to specific internal sink argument positions: /// (caller_param_index, sink_arg_position, sink_caps). #[serde(default)] pub param_to_sink_param: Vec<(usize, usize, Cap)>, /// Per-parameter gate-filter cap masks lifted from inner multi-gate /// sink call sites. /// /// When a function body contains a callee whose /// [`crate::cfg::CallMeta::gate_filters`] carries more than one entry /// (e.g. `fetch` is both an `SSRF` gate on the URL arg and a /// `DATA_EXFIL` gate on the body arg), the multi-gate dispatch in /// `collect_block_events` cap-narrows the event's /// `sink_caps` to the specific gate's `label_caps`. Each /// `(param_idx, label_caps)` entry records that this function's /// parameter `param_idx` flowed into a gated sink whose narrowed /// caps were `label_caps`. /// /// Cross-file callers consume this list to preserve per-position cap /// attribution through wrapper functions: a wrapper /// `fn forward(url, body) { fetch(url, {body}) }` records /// `[(0, SSRF), (1, DATA_EXFIL)]` so a caller of `forward` splits /// URL-tainted SSRF findings from body-tainted DATA_EXFIL findings /// instead of conflating both caps onto every parameter. /// /// `Vec<(param_idx, label_caps)>` is sufficient at cross-file /// granularity, the corresponding `payload_args` and /// `destination_uses` are intra-file context that does not survive /// the function-summary boundary (field idents reference SSA /// values from the callee body). /// /// Empty (the default) for callees whose internal sinks carry zero /// or one gate filter, the existing /// [`Self::param_to_sink`] / /// [`Self::param_to_sink_param`] machinery already records those /// cases without per-position cap conflict. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub param_to_gate_filters: Vec<(usize, Cap)>, /// Parameter indices whose container identity flows to the return value /// (e.g., function returns the same container it received as input). /// /// Populated by /// `extract_container_flow_summary` /// and applied at cross-file call sites to propagate the caller's /// points-to set for that argument onto the call's return SSA value. #[serde(default)] pub param_container_to_return: Vec, /// `(src_param, container_param)` pairs: `src_param`'s taint is stored /// into `container_param`'s container contents inside this function /// (e.g., `fn storeInto(value, arr) { arr.push(value); }` → `[(0, 1)]`). /// /// Populated by /// `extract_container_flow_summary` /// and applied at cross-file call sites by writing the caller's taint on /// the `src_param` argument into the heap objects pointed to by the /// `container_param` argument. #[serde(default)] pub param_to_container_store: Vec<(usize, usize)>, /// Inferred return type of the function, when determinable from constructor /// calls or type annotations. Enables cross-file type-qualified resolution. #[serde(default)] pub return_type: Option, /// Abstract domain fact for the return value. /// When present, callers can use this to seed the return SSA value's /// abstract state for cross-procedural interval/string analysis. #[serde(default)] pub return_abstract: Option, /// Internal source taint flows to a call of parameter N with these caps. /// Detects callback patterns like `fn apply(f: F) { let x = source(); f(x); }` /// where the function invokes a callback parameter with tainted data. #[serde(default)] pub source_to_callback: Vec<(usize, Cap)>, /// How receiver (`self`/`this`) taint flows to the return value. /// `None` when receiver taint does not reach the return. Matches the /// semantics of `param_to_return`'s `TaintTransform` for positional params. #[serde(default)] pub receiver_to_return: Option, /// Caps that the receiver's taint reaches in internal sinks. /// Empty when the receiver is not used as a sink payload inside the body. #[serde(default)] pub receiver_to_sink: Cap, /// Per-parameter abstract-domain transfer channels. /// /// Each entry `(param_index, transfer)` describes how a caller-known /// abstract value at that parameter maps to the function's return /// abstract value. At cross-file call sites the caller applies each /// transfer to the corresponding argument's abstract state and joins /// the results (then `meet`s with [`Self::return_abstract`]) to /// synthesise the return abstract value, recovering interval bounds /// and string prefixes that would otherwise be lost to the summary's /// Top-seeded baseline. /// /// Empty when no parameter carries useful abstract flow. Individual /// entries are omitted when their transfer is "top" (no knowledge), /// so on-disk size grows only when the callee really does propagate /// abstract values. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub abstract_transfer: Vec<(usize, AbstractTransfer)>, /// Per-parameter return-path decomposition. /// /// When non-empty, supplies finer-grained per-path data than /// `param_to_return`. Each parameter maps to up to /// [`MAX_RETURN_PATHS`] [`ReturnPathTransform`] entries, one per /// distinct path-predicate gate. Callers consult their own predicate /// state at the call site and apply only entries whose predicate is /// consistent with the caller's validated set, joining the applicable /// set into the effective call-site transform. /// /// Empty when the callee has a single return path, the aggregate /// `param_to_return` is already precise, or when extraction /// could not derive per-return state (e.g. early-exit probes). #[serde(default, skip_serializing_if = "Vec::is_empty")] pub param_return_paths: Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)>, /// Parameter-granularity points-to summary. /// /// Records bounded alias edges between parameter positions and the /// return value, so summary-path cross-file resolution can spread /// taint through object mutations that do not flow via the return. /// Empty (the default) for functions whose parameters do not alias /// each other or the return value. #[serde(default, skip_serializing_if = "PointsToSummary::is_empty")] pub points_to: PointsToSummary, /// field-granularity per-parameter points-to /// summary. Records which fields the callee reads from / writes /// to on each parameter, so cross-file resolution can spread /// taint through field-level mutations the callee performs on /// caller-supplied objects. /// /// Default-empty (most functions don't field-mutate their params) /// and elided from serialised output via `skip_serializing_if` so /// older summaries without this field deserialise cleanly without migration. /// Built by extraction in `summary_extract.rs` when the per-body /// [`crate::pointer::PointsToFacts`] are available /// (`NYX_POINTER_ANALYSIS=1`); empty otherwise. #[serde(default, skip_serializing_if = "FieldPointsToSummary::is_empty")] pub field_points_to: FieldPointsToSummary, /// Per-return-path abstract [`PathFact`] decomposition. /// /// When non-empty, supplies per-predicate-gate facts finer than the /// aggregate `return_abstract.path` (which is the join over all /// return blocks and loses narrowing on any block whose rv is Top). /// Each entry records the fact on one distinct predicate gate and, /// when the rv is a structural one-arg variant constructor, the /// inner fact so match-arm-sensitive callers can pick the arm- /// specific fact. /// /// Empty for callees whose return blocks produce no non-Top fact, /// or whose single return path makes the aggregate already precise. /// Cross-file callers that cannot pick a specific path fall back to /// joining the entries, equivalent to the pre-decomposition /// behaviour. #[serde(default, skip_serializing_if = "SmallVec::is_empty")] pub return_path_facts: SmallVec<[PathFactReturnEntry; 2]>, /// Per-call-site receiver-type info: `(call_ordinal, container_name)`. /// /// Populated during SSA lowering (`lower_all_functions_from_bodies`) /// when type-fact analysis can resolve a method call's receiver SSA /// value to a concrete [`crate::ssa::type_facts::TypeKind`] with a /// non-empty [`crate::ssa::type_facts::TypeKind::container_name`]. /// /// Consumed by [`crate::callgraph::build_call_graph`] to feed /// `CalleeQuery.receiver_type` for the matching ordinal, letting /// the call graph narrow indirect method-call edges to only those /// targets whose defining container matches the inferred type. /// Strictly additive: an empty map means today's name-only /// resolution applies unchanged. /// /// Ordinal here is the per-function `CallMeta.call_ordinal` shared /// with [`crate::summary::CalleeSite::ordinal`] so the two tables /// can be joined by ordinal at call-graph build time. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub typed_call_receivers: Vec<(u32, String)>, /// Parameter indices whose taint flow to the return value is fully /// validated by a dominating predicate (regex allowlist, type check, /// validation call, etc.) on every return path inside the function. /// /// At a call site, each tainted argument passed to a position in /// this list — and the call's own return value — are marked /// `validated_must` / `validated_may` in the caller's SSA taint /// state, the same way an inline `if (!regex.test(x)) throw` would /// validate the surviving branch. Sound because the call only /// returns normally on the validating arm; if validation failed, /// control would not reach the post-call instruction. /// /// Populated by /// `extract_ssa_func_summary` /// when a per-parameter probe shows the parameter's `var_name` in /// `validated_must` at every return block of the helper. Empty /// (the default) for helpers that do not validate any parameter. /// Closes the validated-flow propagation gap that left /// CVE-2026-25544 (Payload `sanitizeValue` SQL injection) detecting /// on both vulnerable and patched code. #[serde(default, skip_serializing_if = "SmallVec::is_empty")] pub validated_params_to_return: SmallVec<[usize; 2]>, /// Phase-10 Next.js entry-point classification. Mirrors /// [`crate::summary::FuncSummary::entry_kind`] — recorded on the /// SSA summary so cross-file consumers don't have to consult the /// coarse `FuncSummary` to know whether the callee is an entry /// point. `None` for ordinary helpers. #[serde(default, skip_serializing_if = "Option::is_none")] pub entry_kind: Option, } /// A per-return-path [`PathFact`] entry. /// /// `SsaFuncSummary.param_return_paths` decomposes taint transforms by /// predicate hash; `PathFactReturnEntry` records the matching decomposition /// for the abstract [`PathFact`] on the return SSA value. Each entry keys /// on the same `predicate_hash` so the two vectors can be joined at the /// call site. /// /// `variant_inner_fact` captures the *inner* fact when the return value on /// this path is a recognised one-argument variant constructor (e.g. /// `Some(s)` / `Ok(s)`). Callers that destructure the call result via a /// `match` or `if let` use the inner fact in the matching arm instead of /// the outer wrapper's fact. `None` when the return rv is not a variant /// wrapper. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct PathFactReturnEntry { /// Deterministic hash of the predicate gate at this return block. /// `0` = unguarded return. Shares the same hash space as /// [`ReturnPathTransform::path_predicate_hash`] so the two tables /// align per-path. pub predicate_hash: u64, /// `PredicateSummary::known_true` bits that hold on every path into /// this return (same encoding as /// [`ReturnPathTransform::known_true`]). pub known_true: u8, /// `PredicateSummary::known_false` bits at this return. pub known_false: u8, /// The return value's [`PathFact`] on this path. pub path_fact: PathFact, /// Inner [`PathFact`] when the rv on this path is a one-arg variant /// constructor; [`None`] otherwise. Match-arm-sensitive callers /// consume this in the variant-binding arm. #[serde(default, skip_serializing_if = "Option::is_none")] pub variant_inner_fact: Option, } impl PathFactReturnEntry { /// Dedup key. Two entries with identical `(predicate_hash, /// path_fact, variant_inner_fact)` describe the same abstract return /// under the same predicate gate and collapse losslessly. pub fn dedup_key(&self) -> (u64, &PathFact, Option<&PathFact>) { ( self.predicate_hash, &self.path_fact, self.variant_inner_fact.as_ref(), ) } } /// Maximum per-summary [`PathFactReturnEntry`] count. /// /// Mirrors [`MAX_RETURN_PATHS`]: a return-heavy callee pays bounded space /// while still preserving the typical 2-4 path decomposition. pub const MAX_PATH_FACT_RETURN_ENTRIES: usize = 8; /// Merge `incoming` into `existing`, deduping by /// [`PathFactReturnEntry::dedup_key`]. Entries whose /// `predicate_hash` matches but whose facts differ are joined /// (component-wise fact join) so the resulting entry over-approximates /// both paths. Caps the result at /// [`MAX_PATH_FACT_RETURN_ENTRIES`]; overflow collapses into a single /// Top-predicate entry whose fact is the join of all dropped entries' /// facts. pub fn merge_path_fact_return_paths( existing: &mut SmallVec<[PathFactReturnEntry; 2]>, incoming: &[PathFactReturnEntry], ) { for new_entry in incoming { if let Some(slot) = existing .iter_mut() .find(|e| e.predicate_hash == new_entry.predicate_hash) { // Same predicate gate: join facts (component-wise over-approximation). slot.path_fact = slot.path_fact.join(&new_entry.path_fact); slot.variant_inner_fact = match ( slot.variant_inner_fact.take(), &new_entry.variant_inner_fact, ) { (Some(a), Some(b)) => Some(a.join(b)), (Some(a), None) => Some(a), (None, Some(b)) => Some(b.clone()), (None, None) => None, }; // Intersect known_true / known_false: only bits proved on // every path into this predicate gate survive. slot.known_true &= new_entry.known_true; slot.known_false &= new_entry.known_false; } else { existing.push(new_entry.clone()); } } if existing.len() > MAX_PATH_FACT_RETURN_ENTRIES { let mut joined_fact = PathFact::bottom(); let mut joined_inner: Option = None; let mut kt = u8::MAX; let mut kf = u8::MAX; for e in existing.iter() { joined_fact = joined_fact.join(&e.path_fact); joined_inner = match (joined_inner, &e.variant_inner_fact) { (None, None) => None, (Some(a), None) => Some(a), (None, Some(b)) => Some(b.clone()), (Some(a), Some(b)) => Some(a.join(b)), }; kt &= e.known_true; kf &= e.known_false; } existing.clear(); existing.push(PathFactReturnEntry { predicate_hash: 0, known_true: kt, known_false: kf, path_fact: joined_fact, variant_inner_fact: joined_inner, }); } } /// Union-merge two `param_return_paths` lists keyed by parameter /// index. Each parameter keeps its own deduped [`ReturnPathTransform`] list, /// joining abstract contributions on collision and enforcing the /// [`MAX_RETURN_PATHS`] cap. Used by merge paths that combine summaries /// across iterations or files (SSA summaries are currently last-writer-wins /// in `GlobalSummaries`, but this helper is the entry point future union /// paths should call so per-path semantics stay centralised). pub fn union_param_return_paths( existing: &mut Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)>, incoming: &[(usize, SmallVec<[ReturnPathTransform; 2]>)], ) { for (idx, paths) in incoming { if let Some((_, slot)) = existing.iter_mut().find(|(i, _)| *i == *idx) { merge_return_paths(slot, paths); } else { let mut fresh: SmallVec<[ReturnPathTransform; 2]> = SmallVec::new(); merge_return_paths(&mut fresh, paths); existing.push((*idx, fresh)); } } } impl SsaFuncSummary { /// Per-parameter union of [`Cap`] bits across every [`SinkSite`] recorded /// for that parameter. /// /// Returns one `(param_index, caps)` pair per distinct parameter, with /// `caps` being the bitwise OR of every site's own `cap`. This is the /// backward-compatible view that pre-`SinkSite` consumers (resolver, /// taint engine) still rely on. pub fn param_to_sink_caps(&self) -> Vec<(usize, Cap)> { self.param_to_sink .iter() .map(|(idx, sites)| { let caps = sites.iter().fold(Cap::empty(), |acc, s| acc | s.cap); (*idx, caps) }) .collect() } /// Total [`Cap`] bits reached across every parameter's recorded sink sites. pub fn total_param_sink_caps(&self) -> Cap { self.param_to_sink .iter() .flat_map(|(_, sites)| sites.iter()) .fold(Cap::empty(), |acc, s| acc | s.cap) } }