//! Parameter-granularity points-to summaries. //! //! Captures the subset of intra-procedural alias behaviour that matters //! at cross-file call sites: which parameters' heap/field writes are //! observable to the caller through *another* parameter's alias, and //! which parameters flow identity to the return value. //! //! ## Scope //! //! This is **intentionally not** a whole-program points-to analysis. //! Nyx already has bounded intra-procedural heap tracking //! ([`crate::ssa::heap`]); this module bridges the cross-file cliff by recording //! a small, bounded alias graph between parameter positions and the return //! value, then replaying it at summary-resolution time. //! //! ## Edge model //! //! Edges are directed `AliasEdge { source, target, kind }`: //! //! * `Source(Param(i)) → Target(Param(j))`, the callee stores data //! derived from parameter `i` into a field/element of parameter `j`. //! Mutation is observable to the caller through its argument for `j`. //! * `Source(Param(i)) → Target(Return)`, the return value aliases //! parameter `i`'s heap identity. Adds heap-level precision on top of //! the coarser [`crate::summary::ssa_summary::TaintTransform::Identity`] view already carried in //! [`crate::summary::ssa_summary::SsaFuncSummary::param_to_return`]. //! //! `MustAlias` is intentionally omitted, the ROI on //! must-alias inference for cross-file summaries is low, and the soundness //! story for `MayAlias`-only application is straightforward ("take the //! union"). //! //! ## Bound and overflow policy //! //! Edge count is capped at [`MAX_ALIAS_EDGES`]. When a callee's alias //! graph exceeds the cap the summary records `overflow = true` and //! callers treat the function as "any tainted parameter may spread to //! every other parameter and to the return", the conservative //! greatest-lower-bound over the alias lattice. use serde::{Deserialize, Serialize}; use smallvec::SmallVec; /// Identity of one endpoint in an alias edge. /// /// Parameters are identified by their 0-based positional index as reported /// by [`crate::ssa::ir::SsaOp::Param`]; the implicit receiver (`self`/`this`) /// is handled outside this table and is deliberately not representable here. /// `Return` denotes the function's return SSA value, one per function, so /// no further qualifier is needed. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] pub enum AliasPosition { /// Positional parameter, 0-based. Receiver is excluded. Param(u32), /// The function's return value (union of every `Terminator::Return`). Return, } /// Strength of an alias edge. Only [`AliasKind::MayAlias`] is emitted ///, the analysis over-approximates identity-level aliasing rather than /// proving must-alias. The variant is kept as an enum so a future /// extension that distinguishes the two can slot in without migrating /// on-disk data. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] pub enum AliasKind { /// Under some execution, the two positions may reference the same /// heap object. Callers applying the edge take the *union* of /// points-to / taint at the source into the target. MayAlias, } /// A single directed alias edge. /// /// `(source, target)` are order-sensitive: data flows from `source` to /// `target` at the callee. Callers apply each edge by reading their /// argument / return abstraction for `source` and propagating into /// `target`. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct AliasEdge { pub source: AliasPosition, pub target: AliasPosition, pub kind: AliasKind, } /// Maximum edges retained per [`PointsToSummary`]. /// /// Chosen so typical callees (≤ 4 parameters, one return, a handful of /// field writes) fit without approximation while pathological graphs /// still terminate the analysis in bounded time. Overflow triggers the /// [`PointsToSummary::overflow`] fallback instead of silently dropping /// edges, so callers can reason about soundness. pub const MAX_ALIAS_EDGES: usize = 8; /// Parameter-granularity alias summary persisted in /// [`crate::summary::ssa_summary::SsaFuncSummary`]. /// /// The summary is empty by default, functions without any parameter / /// return aliasing (pure transformers, sinks that consume but don't /// mutate their arguments) carry no edges and cost nothing on disk. /// /// When the callee's alias graph exceeds [`MAX_ALIAS_EDGES`], extraction /// sets [`overflow = true`](Self::overflow) and callers must treat every /// parameter as reaching every other parameter and the return. This is /// the conservative fallback for bounded alias analysis. #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct PointsToSummary { /// Bounded edge list, deduped by `(source, target, kind)`. The /// `#[serde(default)]` attribute lets summaries pre-dating points-to /// tracking deserialise cleanly (no edges). #[serde(default, skip_serializing_if = "SmallVec::is_empty")] pub edges: SmallVec<[AliasEdge; 4]>, /// Conservative fallback flag, set when extraction hit /// [`MAX_ALIAS_EDGES`] and refused to drop any edge silently. When /// `true`, callers treat the callee as "every parameter may alias /// every other parameter and the return value". #[serde(default, skip_serializing_if = "core::ops::Not::not")] pub overflow: bool, /// At least one return path produces a *fresh* container allocation , /// a container literal (`[]`, `{}`) or a known container constructor /// call (`new Map()`, `list()`, …) that does not trace back to any /// parameter. When this is `true` the caller synthesises a fresh /// [`crate::ssa::heap::HeapObjectId`] keyed on the call's SSA value /// and seeds it into `dynamic_pts`, so later container operations on /// the call result (e.g. `bag[0]`, `fillBag(bag, …)`) can find a heap /// cell to read from or store into. /// /// Closes the factory-pattern cross-file gap, `const bag = makeBag()` /// followed by `fillBag(bag, env)` and `exec(bag[0])`, by giving the /// caller's heap analysis a stable identity to attach stores to. /// Combines freely with `Param(i) → Return` edges: a mixed-return /// function (one branch returns a param, another returns a fresh /// allocation) emits both and the caller joins the two points-to /// sets. #[serde(default, skip_serializing_if = "core::ops::Not::not")] pub returns_fresh_alloc: bool, } impl PointsToSummary { /// Empty summary, no aliasing, no overflow. Equivalent to /// [`Self::default`] but explicit at call sites. pub fn empty() -> Self { Self::default() } /// Whether this summary adds any information over the default "no /// aliasing" interpretation. Used by extraction to decide whether /// the field should be persisted or left empty. pub fn is_empty(&self) -> bool { self.edges.is_empty() && !self.overflow && !self.returns_fresh_alloc } /// Insert an edge, preserving dedup and the bounded-size invariant. /// /// Returns `true` when the edge was added, `false` when it was a /// duplicate or when the cap triggered an overflow. The caller can /// ignore the return, the summary always remains in a valid state. pub fn insert(&mut self, source: AliasPosition, target: AliasPosition, kind: AliasKind) { if self.overflow { return; } let edge = AliasEdge { source, target, kind, }; if self.edges.contains(&edge) { return; } if self.edges.len() >= MAX_ALIAS_EDGES { self.overflow = true; // Keep the existing edge list, a consumer that still reads // the vector gets a strict *subset* of the sound over- // approximation conveyed by `overflow`. Correctness is // owned by the overflow flag; the residual edges are purely // diagnostic. return; } self.edges.push(edge); } /// Union two summaries, merging edges and OR-ing the overflow / /// fresh-alloc flags. Respects the [`MAX_ALIAS_EDGES`] cap via the /// same overflow promotion used by [`Self::insert`]. pub fn merge(&mut self, other: &Self) { self.returns_fresh_alloc |= other.returns_fresh_alloc; if other.overflow { self.overflow = true; return; } for edge in &other.edges { self.insert(edge.source, edge.target, edge.kind); } } /// Parameter indices referenced by any edge in this summary. Used by /// `ssa_summary_fits_arity` to confirm the summary /// does not reference a parameter beyond the key's declared arity /// (which would indicate a synthetic-param mis-attribution in /// extraction). pub fn max_param_index(&self) -> Option { let mut max: Option = None; for edge in &self.edges { if let AliasPosition::Param(i) = edge.source { max = Some(max.map_or(i, |m| m.max(i))); } if let AliasPosition::Param(i) = edge.target { max = Some(max.map_or(i, |m| m.max(i))); } } max } } #[cfg(test)] mod tests { use super::*; #[test] fn empty_summary_is_noop() { let s = PointsToSummary::empty(); assert!(s.is_empty()); assert!(!s.overflow); assert_eq!(s.edges.len(), 0); } #[test] fn insert_dedups() { let mut s = PointsToSummary::empty(); s.insert( AliasPosition::Param(0), AliasPosition::Param(1), AliasKind::MayAlias, ); s.insert( AliasPosition::Param(0), AliasPosition::Param(1), AliasKind::MayAlias, ); assert_eq!(s.edges.len(), 1); } #[test] fn insert_overflows_at_cap() { let mut s = PointsToSummary::empty(); for i in 0..(MAX_ALIAS_EDGES as u32) { s.insert( AliasPosition::Param(i), AliasPosition::Return, AliasKind::MayAlias, ); } assert_eq!(s.edges.len(), MAX_ALIAS_EDGES); assert!(!s.overflow); s.insert( AliasPosition::Param(99), AliasPosition::Return, AliasKind::MayAlias, ); assert!(s.overflow); assert_eq!(s.edges.len(), MAX_ALIAS_EDGES); } #[test] fn merge_propagates_overflow() { let mut a = PointsToSummary::empty(); let mut b = PointsToSummary::empty(); b.overflow = true; a.merge(&b); assert!(a.overflow); } #[test] fn max_param_index_tracks_both_endpoints() { let mut s = PointsToSummary::empty(); s.insert( AliasPosition::Param(0), AliasPosition::Param(3), AliasKind::MayAlias, ); s.insert( AliasPosition::Param(1), AliasPosition::Return, AliasKind::MayAlias, ); assert_eq!(s.max_param_index(), Some(3)); } #[test] fn serde_round_trip_is_stable() { let mut s = PointsToSummary::empty(); s.insert( AliasPosition::Param(0), AliasPosition::Param(1), AliasKind::MayAlias, ); s.insert( AliasPosition::Param(2), AliasPosition::Return, AliasKind::MayAlias, ); let json = serde_json::to_string(&s).unwrap(); let back: PointsToSummary = serde_json::from_str(&json).unwrap(); assert_eq!(s, back); } #[test] fn serde_default_decodes_empty_object() { let back: PointsToSummary = serde_json::from_str("{}").unwrap(); assert!(back.is_empty()); } #[test] fn returns_fresh_alloc_is_not_empty() { let mut s = PointsToSummary::empty(); assert!(s.is_empty()); s.returns_fresh_alloc = true; assert!(!s.is_empty()); } #[test] fn merge_propagates_fresh_alloc_flag() { let mut a = PointsToSummary::empty(); let mut b = PointsToSummary::empty(); b.returns_fresh_alloc = true; a.merge(&b); assert!(a.returns_fresh_alloc); } #[test] fn returns_fresh_alloc_roundtrips() { let mut s = PointsToSummary::empty(); s.returns_fresh_alloc = true; let json = serde_json::to_string(&s).unwrap(); let back: PointsToSummary = serde_json::from_str(&json).unwrap(); assert!(back.returns_fresh_alloc); assert_eq!(s, back); } } // ── field-granularity points-to summary ────────────── /// Maximum field names retained per parameter in [`FieldPointsToSummary`]. /// /// Mirror of [`MAX_ALIAS_EDGES`]. Bounds on-disk + cross-file work /// while leaving room for typical helpers (a handful of fields each). pub const MAX_FIELDS_PER_PARAM: usize = 8; /// field-granularity per-parameter points-to summary. /// /// Records, for each positional parameter index, the set of field /// **names** read from and written to inside the callee body. Names /// (not [`crate::ssa::ir::FieldId`]) are persisted because field IDs /// are body-local, the per-body [`crate::ssa::ir::FieldInterner`] /// reassigns IDs across files. Callers re-intern through their own /// body's interner before consulting `field_taint` cells. /// /// The receiver (`self` / `this`) uses sentinel index [`usize::MAX`] /// in the outer `Vec` so positional params and the receiver share the /// same indexing convention as `SsaFuncSummary::receiver_to_*` /// (separate channel). /// /// Empty by default, functions that don't read or write any field on /// their parameters carry no entries and cost nothing on disk. #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct FieldPointsToSummary { /// `(param_index, field_names_read)`, the callee projected each /// listed field on a value derived from `param_index` somewhere /// in its body. Sorted, deduped per-entry. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub param_field_reads: Vec<(u32, SmallVec<[String; 2]>)>, /// `(param_index, field_names_written)`, the callee assigned to /// each listed field on a value derived from `param_index`. #[serde(default, skip_serializing_if = "Vec::is_empty")] pub param_field_writes: Vec<(u32, SmallVec<[String; 2]>)>, /// Set when the read/write graph hit /// [`MAX_FIELDS_PER_PARAM`] for any parameter. Callers seeing /// `overflow=true` treat each parameter as reading/writing every /// field on every other parameter, the conservative greatest /// lower bound that preserves soundness. #[serde(default, skip_serializing_if = "core::ops::Not::not")] pub overflow: bool, } impl FieldPointsToSummary { pub fn empty() -> Self { Self::default() } pub fn is_empty(&self) -> bool { self.param_field_reads.is_empty() && self.param_field_writes.is_empty() && !self.overflow } fn insert_into( list: &mut Vec<(u32, SmallVec<[String; 2]>)>, param: u32, field: &str, overflow: &mut bool, ) { let entry = match list.iter_mut().find(|(p, _)| *p == param) { Some(e) => &mut e.1, None => { list.push((param, SmallVec::new())); &mut list.last_mut().unwrap().1 } }; if entry.iter().any(|s| s == field) { return; } if entry.len() >= MAX_FIELDS_PER_PARAM { *overflow = true; return; } entry.push(field.to_string()); entry.sort(); } /// Record a field READ on parameter `param`. Bounded by /// [`MAX_FIELDS_PER_PARAM`] per parameter; over-cap inserts trip /// `overflow`. pub fn add_read(&mut self, param: u32, field: &str) { if self.overflow { return; } let mut overflow = false; Self::insert_into(&mut self.param_field_reads, param, field, &mut overflow); if overflow { self.overflow = true; } } /// Record a field WRITE on parameter `param`. Mirror of [`Self::add_read`]. pub fn add_write(&mut self, param: u32, field: &str) { if self.overflow { return; } let mut overflow = false; Self::insert_into(&mut self.param_field_writes, param, field, &mut overflow); if overflow { self.overflow = true; } } /// Union with `other`. Overflow propagates per /// [`PointsToSummary::merge`]'s semantics, once a callee is /// "any field on any parameter", merging cannot recover precision. pub fn merge(&mut self, other: &Self) { if other.overflow { self.overflow = true; return; } for (p, fields) in &other.param_field_reads { for f in fields { self.add_read(*p, f); } } for (p, fields) in &other.param_field_writes { for f in fields { self.add_write(*p, f); } } } } #[cfg(test)] mod field_summary_tests { use super::*; #[test] fn empty_summary_round_trips() { let s = FieldPointsToSummary::empty(); assert!(s.is_empty()); let json = serde_json::to_string(&s).unwrap(); let back: FieldPointsToSummary = serde_json::from_str(&json).unwrap(); assert_eq!(s, back); } #[test] fn add_read_dedupes_and_sorts() { let mut s = FieldPointsToSummary::empty(); s.add_read(0, "name"); s.add_read(0, "id"); s.add_read(0, "name"); // duplicate let entry = s.param_field_reads.iter().find(|(p, _)| *p == 0).unwrap(); assert_eq!(entry.1.as_slice(), &["id".to_string(), "name".to_string()]); } #[test] fn distinct_params_get_distinct_entries() { let mut s = FieldPointsToSummary::empty(); s.add_write(0, "cache"); s.add_write(1, "log"); assert_eq!(s.param_field_writes.len(), 2); } #[test] fn overflow_trips_at_cap() { let mut s = FieldPointsToSummary::empty(); for i in 0..(MAX_FIELDS_PER_PARAM + 4) { s.add_read(0, &format!("field{i}")); } assert!(s.overflow); } #[test] fn merge_unions_disjoint_keys() { let mut a = FieldPointsToSummary::empty(); let mut b = FieldPointsToSummary::empty(); a.add_read(0, "alpha"); b.add_read(1, "beta"); a.merge(&b); assert!(a.param_field_reads.iter().any(|(p, _)| *p == 0)); assert!(a.param_field_reads.iter().any(|(p, _)| *p == 1)); } #[test] fn merge_propagates_overflow() { let mut a = FieldPointsToSummary::empty(); let mut b = FieldPointsToSummary::empty(); b.overflow = true; a.merge(&b); assert!(a.overflow); } #[test] fn round_trip_preserves_entries() { let mut s = FieldPointsToSummary::empty(); s.add_read(0, "name"); s.add_write(1, "cache"); s.add_write(1, "log"); let json = serde_json::to_string(&s).unwrap(); let back: FieldPointsToSummary = serde_json::from_str(&json).unwrap(); assert_eq!(s, back); } #[test] fn empty_serializes_as_empty_object() { let s = FieldPointsToSummary::empty(); let json = serde_json::to_string(&s).unwrap(); assert_eq!(json, "{}"); let back: FieldPointsToSummary = serde_json::from_str("{}").unwrap(); assert!(back.is_empty()); } }