//! Formal points-to / heap analysis for SSA-based taint propagation. //! //! Provides bounded intra-procedural points-to analysis: each container //! allocation creates an abstract `HeapObjectId`, assignments and phi nodes //! propagate points-to sets, and the taint engine uses heap state to track //! taint through container store/load operations with proper aliasing. //! //! Key design: //! - HeapObjectId is keyed by allocation-site SsaValue (deterministic, zero-cost) //! - PointsToSet is bounded to `analysis.engine.max_pointsto` entries //! (default 32, widening on overflow, see [`effective_max_pointsto`]). //! Overflow drops emit an [`crate::engine_notes::EngineNote::PointsToTruncated`] //! note and increment `POINTSTO_TRUNCATION_COUNT` so operators can //! tell when the cap is firing on their corpus. //! - HeapState tracks per-(heap-object, slot) taint (monotone lattice) //! - HeapSlot::Index(u64) for constant-index container access (proven by const propagation) //! - HeapSlot::Elements for coarse element access (push/pop, dynamic index, overflow) //! - Intraprocedural: constant-index sensitivity is guaranteed when const propagation proves it //! - Interprocedural: best-effort, relies on correct const_values threading (already handled) //! - Unknown/unproven indices fall back to Elements (conservative) //! - Analysis runs as a pre-pass in optimize_ssa(), like type_facts #![allow(clippy::collapsible_if, clippy::unnecessary_map_or)] use crate::cfg::Cfg; use crate::labels::{Cap, bare_method_name}; use crate::ssa::ir::*; use crate::ssa::pointsto::{ContainerOp, classify_container_op}; use crate::symbol::Lang; use crate::taint::domain::TaintOrigin; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; use std::collections::HashMap; // Heap origin cap used to be `const MAX_HEAP_ORIGINS: usize = 4`, now // governed by the shared `analysis.engine.max_origins` knob through // `crate::taint::ssa_transfer::push_origin_bounded`. Unifying the two // lattices behind a single tunable means operators raise *one* value to // eliminate silent truncation everywhere. /// Test-only override for the points-to cap. `cap = 0` restores the /// runtime-configured default (see [`effective_max_pointsto`]). Used to /// force `PointsToTruncated` emission on small fixtures. static MAX_POINTSTO_OVERRIDE: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); /// Total heap-object members dropped by [`PointsToSet`] truncation since /// the last reset. Captured from `insert`/`union` so tests (and /// operators inspecting scan output) can detect truncation events that /// don't propagate to a finding, e.g. when the cap is tight enough /// that no taint flow survives to emit a sink event. pub(crate) static POINTSTO_TRUNCATION_COUNT: std::sync::atomic::AtomicUsize = std::sync::atomic::AtomicUsize::new(0); /// Test-only hook: pin the effective `max_pointsto` cap. `cap = 0` /// clears the override. #[doc(hidden)] pub fn set_max_pointsto_override(cap: usize) { MAX_POINTSTO_OVERRIDE.store(cap, std::sync::atomic::Ordering::Relaxed); } /// Resolve the live points-to cap. /// /// Precedence (highest first): /// 1. The test-only `MAX_POINTSTO_OVERRIDE` atomic /// ([`set_max_pointsto_override`]). /// 2. The runtime `analysis.engine.max_pointsto` option, which itself /// resolves through the installed runtime → `NYX_MAX_POINTSTO` → /// [`crate::utils::analysis_options::DEFAULT_MAX_POINTSTO`]. /// /// The runtime path clamps to /// [`crate::utils::analysis_options::MIN_MAX_POINTSTO`] on ingest, so the /// engine always carries at least one heap-object slot. pub fn effective_max_pointsto() -> usize { let o = MAX_POINTSTO_OVERRIDE.load(std::sync::atomic::Ordering::Relaxed); if o != 0 { return o; } crate::utils::analysis_options::current().max_pointsto as usize } /// Observability: total heap-object members dropped by the points-to /// analysis since the most recent [`reset_points_to_observability`] /// call. Monotone-increasing; `0` when no truncation happened. pub fn points_to_truncation_count() -> usize { POINTSTO_TRUNCATION_COUNT.load(std::sync::atomic::Ordering::Relaxed) } /// Reset the points-to truncation counter. Intended for tests. pub fn reset_points_to_observability() { POINTSTO_TRUNCATION_COUNT.store(0, std::sync::atomic::Ordering::Relaxed); } /// Record `dropped` truncated heap-object members on the counter and on /// the active body's engine-note collector. Called from the two /// [`PointsToSet`] cap sites (insert/union). fn record_pointsto_truncation(dropped: usize) { if dropped == 0 { return; } POINTSTO_TRUNCATION_COUNT.fetch_add(dropped, std::sync::atomic::Ordering::Relaxed); crate::taint::ssa_transfer::record_engine_note( crate::engine_notes::EngineNote::PointsToTruncated { dropped: dropped as u32, }, ); } /// Maximum distinct `Index(n)` slots tracked per heap object. /// When exceeded, all indexed entries for that object collapse into `Elements`. pub const MAX_TRACKED_INDICES: usize = 8; // ── HeapSlot ──────────────────────────────────────────────────────────── /// Distinguishes constant-index container access from coarse element access. /// /// `Elements` is the conservative default, all container elements merge into /// a single taint. `Index(n)` provides per-index precision when the index is /// provably a non-negative integer constant (via the function's own const /// propagation pass). /// /// Ordering: `Elements < Index(0) < Index(1) < …` so that sorted merge-join /// in `HeapState` groups all slots for the same `HeapObjectId` together. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] pub enum HeapSlot { /// Coarse union of all elements (push/pop, dynamic index, overflow). Elements, /// Constant-index slot, proven by the current function's const propagation. Index(u64), } // ── HeapObjectId ───────────────────────────────────────────────────────── /// Abstract heap object identity, keyed by the SSA value of the allocation site. /// /// When `items = []` creates SsaValue(5), the heap object is HeapObjectId(SsaValue(5)). /// SSA guarantees each definition is unique, so heap identity is deterministic. #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)] pub struct HeapObjectId(pub SsaValue); // ── PointsToSet ────────────────────────────────────────────────────────── /// Bounded set of heap objects that an SSA value may reference. /// /// Stored as a sorted, deduped SmallVec for O(n) merge-join, matching the /// pattern used by SsaTaintState.values. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct PointsToSet { ids: SmallVec<[HeapObjectId; 4]>, } impl PointsToSet { /// Empty points-to set. pub fn empty() -> Self { Self { ids: SmallVec::new(), } } /// Points-to set containing a single heap object. pub fn singleton(id: HeapObjectId) -> Self { let mut ids = SmallVec::new(); ids.push(id); Self { ids } } /// Bounded union of two points-to sets. /// /// Truncates to [`effective_max_pointsto`]; any heap-object member /// that would be admitted after the cap is reached is dropped and /// counted via `record_pointsto_truncation`. Truncation is /// deterministic: the merge proceeds in sorted order, so survivors /// are always the smallest `HeapObjectId`s across the two inputs. pub fn union(&self, other: &Self) -> Self { let cap = effective_max_pointsto(); let mut result: SmallVec<[HeapObjectId; 4]> = SmallVec::new(); let mut dropped = 0usize; let (mut i, mut j) = (0, 0); while i < self.ids.len() && j < other.ids.len() { match self.ids[i].cmp(&other.ids[j]) { std::cmp::Ordering::Less => { if result.len() < cap { result.push(self.ids[i]); } else { dropped += 1; } i += 1; } std::cmp::Ordering::Greater => { if result.len() < cap { result.push(other.ids[j]); } else { dropped += 1; } j += 1; } std::cmp::Ordering::Equal => { if result.len() < cap { result.push(self.ids[i]); } else { // The same id is in both sides; count as a single drop. dropped += 1; } i += 1; j += 1; } } } while i < self.ids.len() { if result.len() < cap { result.push(self.ids[i]); } else { dropped += 1; } i += 1; } while j < other.ids.len() { if result.len() < cap { result.push(other.ids[j]); } else { dropped += 1; } j += 1; } record_pointsto_truncation(dropped); Self { ids: result } } /// Insert a single HeapObjectId, maintaining sorted order and bound. /// /// When the set is already at [`effective_max_pointsto`], the new id /// is dropped and the drop is counted via /// `record_pointsto_truncation`. pub fn insert(&mut self, id: HeapObjectId) { match self.ids.binary_search(&id) { Ok(_) => {} // already present Err(pos) => { if self.ids.len() < effective_max_pointsto() { self.ids.insert(pos, id); } else { record_pointsto_truncation(1); } } } } pub fn contains(&self, id: HeapObjectId) -> bool { self.ids.binary_search(&id).is_ok() } pub fn is_empty(&self) -> bool { self.ids.is_empty() } pub fn len(&self) -> usize { self.ids.len() } pub fn iter(&self) -> impl Iterator { self.ids.iter() } } // ── HeapTaint ──────────────────────────────────────────────────────────── /// Taint stored inside an abstract heap object (container contents). #[derive(Clone, Debug, PartialEq, Eq)] pub struct HeapTaint { pub caps: Cap, pub origins: SmallVec<[TaintOrigin; 2]>, } impl HeapTaint { /// Monotone merge: OR caps, union origins (bounded, deterministic). /// /// Delegates to /// [`crate::taint::ssa_transfer::push_origin_bounded`] so the heap /// and SSA taint lattices share one origin cap /// (`analysis.engine.max_origins`) and one truncation-notification /// path. fn merge(&mut self, caps: Cap, origins: &[TaintOrigin]) { self.caps |= caps; for orig in origins { crate::taint::ssa_transfer::push_origin_bounded(&mut self.origins, *orig); } } /// Union two HeapTaint values (for load_set). fn union(&self, other: &HeapTaint) -> HeapTaint { let mut result = self.clone(); result.merge(other.caps, &other.origins); result } } // ── HeapState ──────────────────────────────────────────────────────────── /// Per-(heap-object, slot) taint state: abstract contents of all tracked /// containers with optional per-index precision. /// /// Sorted by `(HeapObjectId, HeapSlot)` for O(n) merge-join (lattice join = /// union of per-slot taint), matching the `SsaTaintState` pattern. /// /// Load semantics: /// - `load(id, Index(n))`: union of `(id, Index(n))` and `(id, Elements)` , /// indexed reads also see taint from dynamic/push operations. /// - `load(id, Elements)`: union of `(id, Elements)` and ALL `(id, Index(*))` /// entries, dynamic reads conservatively see all indexed taint. #[derive(Clone, Debug, PartialEq, Eq)] pub struct HeapState { entries: SmallVec<[((HeapObjectId, HeapSlot), HeapTaint); 4]>, } impl HeapState { pub fn empty() -> Self { Self { entries: SmallVec::new(), } } pub fn is_empty(&self) -> bool { self.entries.is_empty() } /// Store taint into a specific (object, slot) pair (monotone merge). /// /// If storing to `Index(n)` would exceed `MAX_TRACKED_INDICES` distinct /// indices for this object, all `Index(*)` entries for the object are /// collapsed into `Elements` and the new taint is merged there instead. pub fn store(&mut self, id: HeapObjectId, slot: HeapSlot, caps: Cap, origins: &[TaintOrigin]) { if caps.is_empty() { return; } // Check index overflow before inserting a new Index slot. if let HeapSlot::Index(_) = slot { let key = (id, slot); let already_present = self.entries.binary_search_by_key(&key, |(k, _)| *k).is_ok(); if !already_present { let index_count = self.count_indices_for(id); if index_count >= MAX_TRACKED_INDICES { // Collapse: merge all Index(*) entries into Elements, // then store the new taint into Elements too. self.collapse_indices_to_elements(id); self.store_raw(id, HeapSlot::Elements, caps, origins); return; } } } self.store_raw(id, slot, caps, origins); } /// Raw store without overflow checking. fn store_raw(&mut self, id: HeapObjectId, slot: HeapSlot, caps: Cap, origins: &[TaintOrigin]) { let key = (id, slot); match self.entries.binary_search_by_key(&key, |(k, _)| *k) { Ok(idx) => { self.entries[idx].1.merge(caps, origins); } Err(idx) => { let mut o: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); for orig in origins { crate::taint::ssa_transfer::push_origin_bounded(&mut o, *orig); } self.entries .insert(idx, (key, HeapTaint { caps, origins: o })); } } } /// Store taint into all heap objects in a points-to set. pub fn store_set( &mut self, pts: &PointsToSet, slot: HeapSlot, caps: Cap, origins: &[TaintOrigin], ) { for &id in pts.iter() { self.store(id, slot, caps, origins); } } /// Load taint from a specific (object, slot) pair. /// /// - `Index(n)`: returns union of `(id, Index(n))` ∪ `(id, Elements)`. /// - `Elements`: returns union of `(id, Elements)` ∪ all `(id, Index(*))`. pub fn load(&self, id: HeapObjectId, slot: HeapSlot) -> Option { match slot { HeapSlot::Index(n) => { // Union specific index with Elements. let idx_taint = self.load_raw(id, HeapSlot::Index(n)); let elem_taint = self.load_raw(id, HeapSlot::Elements); match (idx_taint, elem_taint) { (Some(a), Some(b)) => Some(a.union(b)), (Some(a), None) => Some(a.clone()), (None, Some(b)) => Some(b.clone()), (None, None) => None, } } HeapSlot::Elements => { // Union Elements with ALL Index(*) entries for this object. let mut result: Option = None; for ((eid, _slot), taint) in &self.entries { if *eid == id { result = Some(match result { Some(r) => r.union(taint), None => taint.clone(), }); } } result } } } /// Direct lookup of a single (id, slot) entry without cross-slot unioning. fn load_raw(&self, id: HeapObjectId, slot: HeapSlot) -> Option<&HeapTaint> { let key = (id, slot); self.entries .binary_search_by_key(&key, |(k, _)| *k) .ok() .map(|idx| &self.entries[idx].1) } /// Load and union taint from all heap objects in a points-to set. pub fn load_set(&self, pts: &PointsToSet, slot: HeapSlot) -> Option { let mut result: Option = None; for &id in pts.iter() { if let Some(ht) = self.load(id, slot) { result = Some(match result { Some(r) => r.union(&ht), None => ht, }); } } result } /// Lattice join: merge-join by (HeapObjectId, HeapSlot), union per-slot taint. pub fn join(&self, other: &Self) -> Self { let mut result = SmallVec::new(); let (mut i, mut j) = (0, 0); while i < self.entries.len() && j < other.entries.len() { let (ka, ta) = &self.entries[i]; let (kb, tb) = &other.entries[j]; match ka.cmp(kb) { std::cmp::Ordering::Less => { result.push((*ka, ta.clone())); i += 1; } std::cmp::Ordering::Greater => { result.push((*kb, tb.clone())); j += 1; } std::cmp::Ordering::Equal => { result.push((*ka, ta.union(tb))); i += 1; j += 1; } } } while i < self.entries.len() { result.push(self.entries[i].clone()); i += 1; } while j < other.entries.len() { result.push(other.entries[j].clone()); j += 1; } Self { entries: result } } /// Lattice ordering: every entry in self must be present in other with subset caps. pub fn leq(&self, other: &Self) -> bool { let mut j = 0; for (ka, ta) in &self.entries { loop { if j >= other.entries.len() { return false; } let (kb, _) = &other.entries[j]; match ka.cmp(kb) { std::cmp::Ordering::Equal => break, std::cmp::Ordering::Greater => j += 1, std::cmp::Ordering::Less => return false, } } let (_, tb) = &other.entries[j]; if (ta.caps & !tb.caps) != Cap::empty() { return false; } j += 1; } true } /// Count distinct `Index(*)` slots for a given object. fn count_indices_for(&self, id: HeapObjectId) -> usize { self.entries .iter() .filter(|((eid, slot), _)| *eid == id && matches!(slot, HeapSlot::Index(_))) .count() } /// Collapse all `Index(*)` entries for `id` into `Elements`. fn collapse_indices_to_elements(&mut self, id: HeapObjectId) { // Collect taint from all Index entries for this object. let mut merged_caps = Cap::empty(); let mut merged_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new(); self.entries.retain(|((eid, slot), taint)| { if *eid == id && matches!(slot, HeapSlot::Index(_)) { merged_caps |= taint.caps; for orig in &taint.origins { crate::taint::ssa_transfer::push_origin_bounded(&mut merged_origins, *orig); } false // remove this entry } else { true // keep } }); // Merge into Elements. if !merged_caps.is_empty() { self.store_raw(id, HeapSlot::Elements, merged_caps, &merged_origins); } } } // ── PointsToResult ─────────────────────────────────────────────────────── /// Result of intra-procedural points-to analysis. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct PointsToResult { pts: HashMap, } impl PointsToResult { pub fn empty() -> Self { Self { pts: HashMap::new(), } } /// Look up the points-to set for an SSA value. pub fn get(&self, v: SsaValue) -> Option<&PointsToSet> { self.pts.get(&v) } pub fn is_empty(&self) -> bool { self.pts.is_empty() } } // ── Allocation site detection ──────────────────────────────────────────── /// Public re-export wrapper for container-literal detection. /// /// Called from [`crate::ssa::param_points_to`] to decide whether a return /// path traces to a fresh allocation. Keeps the internal helper private /// while exposing the classification via a stable name. pub fn is_container_literal_public(text: &str) -> bool { is_container_literal(text) } /// Check if a const literal text represents a container/collection literal. fn is_container_literal(text: &str) -> bool { let t = text.trim(); // Empty or non-empty array/list literals if t.starts_with('[') && t.ends_with(']') { return true; } // Empty or non-empty object/dict/map/set literals if t.starts_with('{') && t.ends_with('}') { return true; } // `new Array(...)`, `new Map(...)`, etc. if t.starts_with("new ") { return true; } // Python dict()/list()/set() as literals if t == "dict()" || t == "list()" || t == "set()" { return true; } false } /// Check if a callee creates a new container (constructor/factory). pub fn is_container_constructor(callee: &str, lang: Lang) -> bool { // Extract last segment after '.' or '::' (whichever comes last) let after_dot = bare_method_name(callee); let suffix = after_dot.rsplit("::").next().unwrap_or(after_dot); let suffix_lower = suffix.to_ascii_lowercase(); match lang { Lang::JavaScript | Lang::TypeScript => { matches!(suffix, "Array" | "Map" | "Set" | "WeakMap" | "WeakSet") } Lang::Python => matches!( suffix, "list" | "dict" | "set" | "frozenset" | "defaultdict" | "OrderedDict" | "deque" | "Counter" ), Lang::Java => matches!( suffix, "ArrayList" | "LinkedList" | "HashMap" | "TreeMap" | "HashSet" | "TreeSet" | "Vector" | "Stack" | "ArrayDeque" | "PriorityQueue" | "ConcurrentHashMap" | "LinkedHashMap" | "LinkedHashSet" | "CopyOnWriteArrayList" ), Lang::Go => callee == "make", Lang::Ruby => { matches!(suffix, "new") && { // Only for known container types let prefix = callee.rsplit('.').nth(1).unwrap_or(""); matches!(prefix, "Array" | "Hash" | "Set") } } Lang::Php => matches!(suffix, "array"), Lang::C | Lang::Cpp => matches!( suffix_lower.as_str(), "vector" | "map" | "set" | "unordered_map" | "unordered_set" | "list" | "deque" | "queue" | "stack" | "multimap" | "multiset" | "priority_queue" ), Lang::Rust => { // Vec::new, HashMap::new, etc. suffix == "new" && callee.contains("::") && { let type_part = callee.rsplit("::").nth(1).unwrap_or(""); matches!( type_part, "Vec" | "HashMap" | "HashSet" | "BTreeMap" | "BTreeSet" | "VecDeque" | "LinkedList" | "BinaryHeap" ) } } } } // ── Points-to analysis ─────────────────────────────────────────────────── /// Run intra-procedural points-to analysis on an SSA body. /// /// Identifies allocation sites, propagates points-to sets through assignments /// and phi nodes, and returns a result that the taint engine can query. /// /// Runs as a pre-pass in optimize_ssa(), after type_facts. pub fn analyze_points_to(body: &SsaBody, _cfg: &Cfg, lang: Option) -> PointsToResult { let mut pts: HashMap = HashMap::new(); // Pass 1: identify allocation sites and seed points-to sets for block in &body.blocks { for inst in block.phis.iter().chain(block.body.iter()) { match &inst.op { SsaOp::Const(Some(text)) if is_container_literal(text) => { pts.insert(inst.value, PointsToSet::singleton(HeapObjectId(inst.value))); } SsaOp::Call { callee, .. } => { if let Some(l) = lang { if is_container_constructor(callee, l) { pts.insert( inst.value, PointsToSet::singleton(HeapObjectId(inst.value)), ); } } } _ => {} } } } if pts.is_empty() { return PointsToResult::empty(); } // Pass 2: forward propagation with fixed-point for phis (max 10 rounds) let max_rounds = 10; for _ in 0..max_rounds { let mut changed = false; for block in &body.blocks { // Process phis for inst in &block.phis { if let SsaOp::Phi(operands) = &inst.op { let mut merged = PointsToSet::empty(); for (_, v) in operands { if let Some(p) = pts.get(v) { merged = merged.union(p); } } if !merged.is_empty() { let old = pts.get(&inst.value); if old.map_or(true, |o| o != &merged) { let existing = pts.entry(inst.value).or_insert_with(PointsToSet::empty); let new = existing.union(&merged); if &new != existing { *existing = new; changed = true; } } } } } // Process body for inst in &block.body { match &inst.op { SsaOp::Assign(uses) => { let mut merged = PointsToSet::empty(); for &u in uses { if let Some(p) = pts.get(&u) { merged = merged.union(p); } } if !merged.is_empty() { let old = pts.get(&inst.value); if old.map_or(true, |o| o != &merged) { pts.insert(inst.value, merged); changed = true; } } } SsaOp::Call { callee, args, receiver, .. } => { // For container Store ops that return the container (Go append), // propagate receiver pts to result. if let Some(l) = lang { if let Some(ContainerOp::Store { .. }) = classify_container_op(callee, l) { // Find receiver pts let recv_pts = receiver.and_then(|rv| pts.get(&rv).cloned()).or_else(|| { // Go append: arg 0 is the slice if l == Lang::Go { args.first() .and_then(|a| a.first()) .and_then(|&v| pts.get(&v).cloned()) } else { // JS-style: find receiver from dotted callee let dot_pos = callee.rfind('.')?; let recv_name = &callee[..dot_pos]; for arg_group in args { for &v in arg_group { if let Some(def) = body.value_defs.get(v.0 as usize) { if def.var_name.as_deref() == Some(recv_name) { return pts.get(&v).cloned(); } } } } None } }); // For Go append, result gets receiver pts if l == Lang::Go && receiver.is_none() { if let Some(rp) = recv_pts { let old = pts.get(&inst.value); if old.map_or(true, |o| o != &rp) { pts.insert(inst.value, rp); changed = true; } } } } } } _ => {} } } } if !changed { break; } } PointsToResult { pts } } // ── Tests ──────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use crate::labels::SourceKind; use petgraph::graph::NodeIndex; use std::sync::Mutex; /// Serializes tests that touch [`MAX_POINTSTO_OVERRIDE`] or /// [`POINTSTO_TRUNCATION_COUNT`]. Both are process-wide atomics, so /// parallel tests would otherwise race on the counter and the /// override. static TEST_GUARD: Mutex<()> = Mutex::new(()); fn origin(idx: u32) -> TaintOrigin { TaintOrigin { node: NodeIndex::new(idx as usize), source_kind: SourceKind::UserInput, source_span: None, } } // ── PointsToSet tests ──────────────────────────────────────────── #[test] fn pts_singleton() { let s = PointsToSet::singleton(HeapObjectId(SsaValue(0))); assert_eq!(s.len(), 1); assert!(s.contains(HeapObjectId(SsaValue(0)))); assert!(!s.contains(HeapObjectId(SsaValue(1)))); } #[test] fn pts_union() { let a = PointsToSet::singleton(HeapObjectId(SsaValue(1))); let b = PointsToSet::singleton(HeapObjectId(SsaValue(3))); let c = a.union(&b); assert_eq!(c.len(), 2); assert!(c.contains(HeapObjectId(SsaValue(1)))); assert!(c.contains(HeapObjectId(SsaValue(3)))); } #[test] fn pts_union_dedup() { let a = PointsToSet::singleton(HeapObjectId(SsaValue(1))); let b = PointsToSet::singleton(HeapObjectId(SsaValue(1))); let c = a.union(&b); assert_eq!(c.len(), 1); } #[test] fn pts_union_overflow() { let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner()); // Tight override so the test runs deterministically against the // configured default. set_max_pointsto_override(8); reset_points_to_observability(); // Build a set with `cap` entries. let cap = effective_max_pointsto(); let mut big = PointsToSet::empty(); for i in 0..cap as u32 { big.insert(HeapObjectId(SsaValue(i))); } assert_eq!(big.len(), cap); // Union with one more should not grow, and should count the drop. let extra = PointsToSet::singleton(HeapObjectId(SsaValue(100))); let result = big.union(&extra); assert_eq!(result.len(), cap); assert_eq!(points_to_truncation_count(), 1); set_max_pointsto_override(0); reset_points_to_observability(); } #[test] fn pts_insert_overflow_counts_drops() { let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner()); set_max_pointsto_override(4); reset_points_to_observability(); let mut s = PointsToSet::empty(); // First 4 fit. for i in 0..4u32 { s.insert(HeapObjectId(SsaValue(i))); } assert_eq!(s.len(), 4); assert_eq!(points_to_truncation_count(), 0); // Next 3 are dropped; counter records each drop. for i in 4..7u32 { s.insert(HeapObjectId(SsaValue(i))); } assert_eq!(s.len(), 4); assert_eq!(points_to_truncation_count(), 3); // Duplicates of existing entries are *not* drops. s.insert(HeapObjectId(SsaValue(0))); assert_eq!(points_to_truncation_count(), 3); set_max_pointsto_override(0); reset_points_to_observability(); } #[test] fn pts_union_overflow_counts_exact_drops() { let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner()); set_max_pointsto_override(4); reset_points_to_observability(); // a = {0,1,2,3}, b = {4,5,6}, union wants 7 members; cap is 4 // so 3 members are dropped. Deterministic order: smallest // ids survive. let mut a = PointsToSet::empty(); for i in 0..4u32 { a.insert(HeapObjectId(SsaValue(i))); } let mut b = PointsToSet::empty(); for i in 4..7u32 { b.insert(HeapObjectId(SsaValue(i))); } // Sanity: the pre-union sets should not themselves have triggered // truncation (both are ≤ cap). assert_eq!(points_to_truncation_count(), 0); let c = a.union(&b); assert_eq!(c.len(), 4); assert!(c.contains(HeapObjectId(SsaValue(0)))); assert!(c.contains(HeapObjectId(SsaValue(3)))); assert!(!c.contains(HeapObjectId(SsaValue(6)))); assert_eq!(points_to_truncation_count(), 3); set_max_pointsto_override(0); reset_points_to_observability(); } #[test] fn pts_reset_observability_clears_counter() { let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner()); set_max_pointsto_override(2); reset_points_to_observability(); let mut s = PointsToSet::empty(); s.insert(HeapObjectId(SsaValue(0))); s.insert(HeapObjectId(SsaValue(1))); s.insert(HeapObjectId(SsaValue(2))); // dropped assert_eq!(points_to_truncation_count(), 1); reset_points_to_observability(); assert_eq!(points_to_truncation_count(), 0); set_max_pointsto_override(0); } #[test] fn pts_effective_cap_defaults_to_runtime() { let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner()); // With no override, the cap comes from the installed runtime // (which defaults to `DEFAULT_MAX_POINTSTO` in tests). set_max_pointsto_override(0); assert_eq!( effective_max_pointsto(), crate::utils::analysis_options::DEFAULT_MAX_POINTSTO as usize ); set_max_pointsto_override(5); assert_eq!(effective_max_pointsto(), 5); set_max_pointsto_override(0); } #[test] fn pts_empty() { let e = PointsToSet::empty(); assert!(e.is_empty()); assert_eq!(e.len(), 0); } #[test] fn pts_insert() { let mut s = PointsToSet::empty(); s.insert(HeapObjectId(SsaValue(5))); s.insert(HeapObjectId(SsaValue(2))); s.insert(HeapObjectId(SsaValue(5))); // dup assert_eq!(s.len(), 2); // Sorted order let ids: Vec<_> = s.iter().collect(); assert_eq!(ids[0].0, SsaValue(2)); assert_eq!(ids[1].0, SsaValue(5)); } // ── HeapState tests ────────────────────────────────────────────── #[test] fn heap_store_and_load() { let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); h.store(id, HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)]); let t = h.load(id, HeapSlot::Elements).unwrap(); assert_eq!(t.caps, Cap::HTML_ESCAPE); assert_eq!(t.origins.len(), 1); } #[test] fn heap_store_monotone_merge() { let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); h.store(id, HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)]); h.store(id, HeapSlot::Elements, Cap::SQL_QUERY, &[origin(1)]); let t = h.load(id, HeapSlot::Elements).unwrap(); assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY); assert_eq!(t.origins.len(), 2); } #[test] fn heap_store_empty_caps_noop() { let mut h = HeapState::empty(); h.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::empty(), &[origin(0)], ); assert!(h.is_empty()); } #[test] fn heap_load_missing() { let h = HeapState::empty(); assert!( h.load(HeapObjectId(SsaValue(0)), HeapSlot::Elements) .is_none() ); } #[test] fn heap_load_set_unions() { let mut h = HeapState::empty(); h.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)], ); h.store( HeapObjectId(SsaValue(1)), HeapSlot::Elements, Cap::SQL_QUERY, &[origin(1)], ); let mut pts = PointsToSet::empty(); pts.insert(HeapObjectId(SsaValue(0))); pts.insert(HeapObjectId(SsaValue(1))); let t = h.load_set(&pts, HeapSlot::Elements).unwrap(); assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY); assert_eq!(t.origins.len(), 2); } #[test] fn heap_load_set_empty_pts() { let mut h = HeapState::empty(); h.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)], ); let pts = PointsToSet::empty(); assert!(h.load_set(&pts, HeapSlot::Elements).is_none()); } #[test] fn heap_store_set() { let mut h = HeapState::empty(); let mut pts = PointsToSet::empty(); pts.insert(HeapObjectId(SsaValue(0))); pts.insert(HeapObjectId(SsaValue(1))); h.store_set(&pts, HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)]); assert_eq!( h.load(HeapObjectId(SsaValue(0)), HeapSlot::Elements) .unwrap() .caps, Cap::HTML_ESCAPE ); assert_eq!( h.load(HeapObjectId(SsaValue(1)), HeapSlot::Elements) .unwrap() .caps, Cap::HTML_ESCAPE ); } #[test] fn heap_join() { let mut a = HeapState::empty(); a.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)], ); let mut b = HeapState::empty(); b.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::SQL_QUERY, &[origin(1)], ); b.store( HeapObjectId(SsaValue(1)), HeapSlot::Elements, Cap::FILE_IO, &[origin(2)], ); let c = a.join(&b); let t0 = c .load(HeapObjectId(SsaValue(0)), HeapSlot::Elements) .unwrap(); assert_eq!(t0.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY); let t1 = c .load(HeapObjectId(SsaValue(1)), HeapSlot::Elements) .unwrap(); assert_eq!(t1.caps, Cap::FILE_IO); } #[test] fn heap_leq() { let mut a = HeapState::empty(); a.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)], ); let mut b = HeapState::empty(); b.store( HeapObjectId(SsaValue(0)), HeapSlot::Elements, Cap::HTML_ESCAPE | Cap::SQL_QUERY, &[origin(0)], ); assert!(a.leq(&b)); // a ⊆ b assert!(!b.leq(&a)); // b ⊄ a } #[test] fn heap_leq_missing_entry() { let mut a = HeapState::empty(); a.store( HeapObjectId(SsaValue(5)), HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)], ); let b = HeapState::empty(); assert!(!a.leq(&b)); // a has entry, b doesn't assert!(b.leq(&a)); // b empty is always ⊆ } // ── HeapSlot indexed tests ────────────────────────────────────── #[test] fn heap_indexed_store_load_isolation() { // Store to Index(0), load from Index(1) → no taint let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); h.store(id, HeapSlot::Index(0), Cap::HTML_ESCAPE, &[origin(0)]); // Index(0) should have taint let t0 = h.load(id, HeapSlot::Index(0)).unwrap(); assert_eq!(t0.caps, Cap::HTML_ESCAPE); // Index(1) should NOT have taint (no Elements, no Index(1) entry) assert!(h.load(id, HeapSlot::Index(1)).is_none()); } #[test] fn heap_indexed_load_unions_with_elements() { // Store to Elements → indexed load should see it let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); h.store(id, HeapSlot::Elements, Cap::SQL_QUERY, &[origin(0)]); // Index(1) load should union with Elements let t = h.load(id, HeapSlot::Index(1)).unwrap(); assert_eq!(t.caps, Cap::SQL_QUERY); } #[test] fn heap_elements_load_unions_all_indices() { // Store to Index(0) and Index(2), Elements load should see both let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); h.store(id, HeapSlot::Index(0), Cap::HTML_ESCAPE, &[origin(0)]); h.store(id, HeapSlot::Index(2), Cap::SQL_QUERY, &[origin(1)]); let t = h.load(id, HeapSlot::Elements).unwrap(); assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY); } #[test] fn heap_indexed_and_elements_combined() { // Index(0) = tainted, Elements = tainted with different cap // Index(0) load should see both; Index(1) should see only Elements let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); h.store(id, HeapSlot::Index(0), Cap::HTML_ESCAPE, &[origin(0)]); h.store(id, HeapSlot::Elements, Cap::FILE_IO, &[origin(1)]); let t0 = h.load(id, HeapSlot::Index(0)).unwrap(); assert_eq!(t0.caps, Cap::HTML_ESCAPE | Cap::FILE_IO); let t1 = h.load(id, HeapSlot::Index(1)).unwrap(); assert_eq!(t1.caps, Cap::FILE_IO); // only Elements taint } #[test] fn heap_max_tracked_indices_collapse() { let mut h = HeapState::empty(); let id = HeapObjectId(SsaValue(0)); // Fill MAX_TRACKED_INDICES index slots for i in 0..MAX_TRACKED_INDICES as u64 { h.store( id, HeapSlot::Index(i), Cap::HTML_ESCAPE, &[origin(i as u32)], ); } // One more should trigger collapse into Elements h.store( id, HeapSlot::Index(MAX_TRACKED_INDICES as u64), Cap::SQL_QUERY, &[origin(99)], ); // All Index entries should be collapsed into Elements. // There should be no Index entries left. assert_eq!(h.count_indices_for(id), 0); // Elements load should see all taint let t = h.load(id, HeapSlot::Elements).unwrap(); assert!(t.caps.contains(Cap::HTML_ESCAPE)); assert!(t.caps.contains(Cap::SQL_QUERY)); } // ── is_container_literal tests ─────────────────────────────────── #[test] fn container_literal_detection() { assert!(is_container_literal("[]")); assert!(is_container_literal("[1, 2, 3]")); assert!(is_container_literal("{}")); assert!(is_container_literal("{a: 1}")); assert!(is_container_literal("new Map()")); assert!(is_container_literal("new ArrayList<>()")); assert!(is_container_literal("dict()")); assert!(is_container_literal("list()")); assert!(is_container_literal("set()")); assert!(!is_container_literal("42")); assert!(!is_container_literal("\"hello\"")); assert!(!is_container_literal("true")); } // ── is_container_constructor tests ─────────────────────────────── #[test] fn container_constructor_js() { assert!(is_container_constructor("Array", Lang::JavaScript)); assert!(is_container_constructor("Map", Lang::JavaScript)); assert!(is_container_constructor("Set", Lang::JavaScript)); assert!(!is_container_constructor("Object", Lang::JavaScript)); } #[test] fn container_constructor_python() { assert!(is_container_constructor("list", Lang::Python)); assert!(is_container_constructor("dict", Lang::Python)); assert!(is_container_constructor("defaultdict", Lang::Python)); assert!(!is_container_constructor("str", Lang::Python)); } #[test] fn container_constructor_java() { assert!(is_container_constructor("ArrayList", Lang::Java)); assert!(is_container_constructor("HashMap", Lang::Java)); assert!(is_container_constructor("ConcurrentHashMap", Lang::Java)); assert!(!is_container_constructor("String", Lang::Java)); } #[test] fn container_constructor_go() { assert!(is_container_constructor("make", Lang::Go)); assert!(!is_container_constructor("new", Lang::Go)); } #[test] fn container_constructor_rust() { assert!(is_container_constructor("Vec::new", Lang::Rust)); assert!(is_container_constructor("HashMap::new", Lang::Rust)); assert!(!is_container_constructor("String::new", Lang::Rust)); assert!(!is_container_constructor("new", Lang::Rust)); } #[test] fn container_constructor_cpp() { assert!(is_container_constructor("vector", Lang::Cpp)); assert!(is_container_constructor("std::map", Lang::Cpp)); assert!(is_container_constructor("unordered_set", Lang::Cpp)); } // ── PointsToResult tests ───────────────────────────────────────── #[test] fn pts_result_empty() { let r = PointsToResult::empty(); assert!(r.is_empty()); assert!(r.get(SsaValue(0)).is_none()); } }