nyx/src/ssa/heap.rs
Eli Peter 1f2bfe76c1
docs: Enhance module documentation across various files for clarity a… (#62)
* docs: Enhance module documentation across various files for clarity and completeness

* fix: Remove unnecessary blank line in build.rs for cleaner code

* docs: Update documentation to improve clarity and consistency in code comments
2026-05-02 17:46:45 -04:00

1350 lines
49 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Formal points-to / heap analysis for SSA-based taint propagation.
//!
//! Provides bounded intra-procedural points-to analysis: each container
//! allocation creates an abstract `HeapObjectId`, assignments and phi nodes
//! propagate points-to sets, and the taint engine uses heap state to track
//! taint through container store/load operations with proper aliasing.
//!
//! Key design:
//! - HeapObjectId is keyed by allocation-site SsaValue (deterministic, zero-cost)
//! - PointsToSet is bounded to `analysis.engine.max_pointsto` entries
//! (default 32, widening on overflow, see [`effective_max_pointsto`]).
//! Overflow drops emit an [`crate::engine_notes::EngineNote::PointsToTruncated`]
//! note and increment `POINTSTO_TRUNCATION_COUNT` so operators can
//! tell when the cap is firing on their corpus.
//! - HeapState tracks per-(heap-object, slot) taint (monotone lattice)
//! - HeapSlot::Index(u64) for constant-index container access (proven by const propagation)
//! - HeapSlot::Elements for coarse element access (push/pop, dynamic index, overflow)
//! - Intraprocedural: constant-index sensitivity is guaranteed when const propagation proves it
//! - Interprocedural: best-effort, relies on correct const_values threading (already handled)
//! - Unknown/unproven indices fall back to Elements (conservative)
//! - Analysis runs as a pre-pass in optimize_ssa(), like type_facts
#![allow(clippy::collapsible_if, clippy::unnecessary_map_or)]
use crate::cfg::Cfg;
use crate::labels::{Cap, bare_method_name};
use crate::ssa::ir::*;
use crate::ssa::pointsto::{ContainerOp, classify_container_op};
use crate::symbol::Lang;
use crate::taint::domain::TaintOrigin;
use serde::{Deserialize, Serialize};
use smallvec::SmallVec;
use std::collections::HashMap;
// Heap origin cap used to be `const MAX_HEAP_ORIGINS: usize = 4`, now
// governed by the shared `analysis.engine.max_origins` knob through
// `crate::taint::ssa_transfer::push_origin_bounded`. Unifying the two
// lattices behind a single tunable means operators raise *one* value to
// eliminate silent truncation everywhere.
/// Test-only override for the points-to cap. `cap = 0` restores the
/// runtime-configured default (see [`effective_max_pointsto`]). Used to
/// force `PointsToTruncated` emission on small fixtures.
static MAX_POINTSTO_OVERRIDE: std::sync::atomic::AtomicUsize =
std::sync::atomic::AtomicUsize::new(0);
/// Total heap-object members dropped by [`PointsToSet`] truncation since
/// the last reset. Captured from `insert`/`union` so tests (and
/// operators inspecting scan output) can detect truncation events that
/// don't propagate to a finding, e.g. when the cap is tight enough
/// that no taint flow survives to emit a sink event.
pub(crate) static POINTSTO_TRUNCATION_COUNT: std::sync::atomic::AtomicUsize =
std::sync::atomic::AtomicUsize::new(0);
/// Test-only hook: pin the effective `max_pointsto` cap. `cap = 0`
/// clears the override.
#[doc(hidden)]
pub fn set_max_pointsto_override(cap: usize) {
MAX_POINTSTO_OVERRIDE.store(cap, std::sync::atomic::Ordering::Relaxed);
}
/// Resolve the live points-to cap.
///
/// Precedence (highest first):
/// 1. The test-only `MAX_POINTSTO_OVERRIDE` atomic
/// ([`set_max_pointsto_override`]).
/// 2. The runtime `analysis.engine.max_pointsto` option, which itself
/// resolves through the installed runtime → `NYX_MAX_POINTSTO` →
/// [`crate::utils::analysis_options::DEFAULT_MAX_POINTSTO`].
///
/// The runtime path clamps to
/// [`crate::utils::analysis_options::MIN_MAX_POINTSTO`] on ingest, so the
/// engine always carries at least one heap-object slot.
pub fn effective_max_pointsto() -> usize {
let o = MAX_POINTSTO_OVERRIDE.load(std::sync::atomic::Ordering::Relaxed);
if o != 0 {
return o;
}
crate::utils::analysis_options::current().max_pointsto as usize
}
/// Observability: total heap-object members dropped by the points-to
/// analysis since the most recent [`reset_points_to_observability`]
/// call. Monotone-increasing; `0` when no truncation happened.
pub fn points_to_truncation_count() -> usize {
POINTSTO_TRUNCATION_COUNT.load(std::sync::atomic::Ordering::Relaxed)
}
/// Reset the points-to truncation counter. Intended for tests.
pub fn reset_points_to_observability() {
POINTSTO_TRUNCATION_COUNT.store(0, std::sync::atomic::Ordering::Relaxed);
}
/// Record `dropped` truncated heap-object members on the counter and on
/// the active body's engine-note collector. Called from the two
/// [`PointsToSet`] cap sites (insert/union).
fn record_pointsto_truncation(dropped: usize) {
if dropped == 0 {
return;
}
POINTSTO_TRUNCATION_COUNT.fetch_add(dropped, std::sync::atomic::Ordering::Relaxed);
crate::taint::ssa_transfer::record_engine_note(
crate::engine_notes::EngineNote::PointsToTruncated {
dropped: dropped as u32,
},
);
}
/// Maximum distinct `Index(n)` slots tracked per heap object.
/// When exceeded, all indexed entries for that object collapse into `Elements`.
pub const MAX_TRACKED_INDICES: usize = 8;
// ── HeapSlot ────────────────────────────────────────────────────────────
/// Distinguishes constant-index container access from coarse element access.
///
/// `Elements` is the conservative default, all container elements merge into
/// a single taint. `Index(n)` provides per-index precision when the index is
/// provably a non-negative integer constant (via the function's own const
/// propagation pass).
///
/// Ordering: `Elements < Index(0) < Index(1) < …` so that sorted merge-join
/// in `HeapState` groups all slots for the same `HeapObjectId` together.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum HeapSlot {
/// Coarse union of all elements (push/pop, dynamic index, overflow).
Elements,
/// Constant-index slot, proven by the current function's const propagation.
Index(u64),
}
// ── HeapObjectId ─────────────────────────────────────────────────────────
/// Abstract heap object identity, keyed by the SSA value of the allocation site.
///
/// When `items = []` creates SsaValue(5), the heap object is HeapObjectId(SsaValue(5)).
/// SSA guarantees each definition is unique, so heap identity is deterministic.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
pub struct HeapObjectId(pub SsaValue);
// ── PointsToSet ──────────────────────────────────────────────────────────
/// Bounded set of heap objects that an SSA value may reference.
///
/// Stored as a sorted, deduped SmallVec for O(n) merge-join, matching the
/// pattern used by SsaTaintState.values.
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
pub struct PointsToSet {
ids: SmallVec<[HeapObjectId; 4]>,
}
impl PointsToSet {
/// Empty points-to set.
pub fn empty() -> Self {
Self {
ids: SmallVec::new(),
}
}
/// Points-to set containing a single heap object.
pub fn singleton(id: HeapObjectId) -> Self {
let mut ids = SmallVec::new();
ids.push(id);
Self { ids }
}
/// Bounded union of two points-to sets.
///
/// Truncates to [`effective_max_pointsto`]; any heap-object member
/// that would be admitted after the cap is reached is dropped and
/// counted via `record_pointsto_truncation`. Truncation is
/// deterministic: the merge proceeds in sorted order, so survivors
/// are always the smallest `HeapObjectId`s across the two inputs.
pub fn union(&self, other: &Self) -> Self {
let cap = effective_max_pointsto();
let mut result: SmallVec<[HeapObjectId; 4]> = SmallVec::new();
let mut dropped = 0usize;
let (mut i, mut j) = (0, 0);
while i < self.ids.len() && j < other.ids.len() {
match self.ids[i].cmp(&other.ids[j]) {
std::cmp::Ordering::Less => {
if result.len() < cap {
result.push(self.ids[i]);
} else {
dropped += 1;
}
i += 1;
}
std::cmp::Ordering::Greater => {
if result.len() < cap {
result.push(other.ids[j]);
} else {
dropped += 1;
}
j += 1;
}
std::cmp::Ordering::Equal => {
if result.len() < cap {
result.push(self.ids[i]);
} else {
// The same id is in both sides; count as a single drop.
dropped += 1;
}
i += 1;
j += 1;
}
}
}
while i < self.ids.len() {
if result.len() < cap {
result.push(self.ids[i]);
} else {
dropped += 1;
}
i += 1;
}
while j < other.ids.len() {
if result.len() < cap {
result.push(other.ids[j]);
} else {
dropped += 1;
}
j += 1;
}
record_pointsto_truncation(dropped);
Self { ids: result }
}
/// Insert a single HeapObjectId, maintaining sorted order and bound.
///
/// When the set is already at [`effective_max_pointsto`], the new id
/// is dropped and the drop is counted via
/// `record_pointsto_truncation`.
pub fn insert(&mut self, id: HeapObjectId) {
match self.ids.binary_search(&id) {
Ok(_) => {} // already present
Err(pos) => {
if self.ids.len() < effective_max_pointsto() {
self.ids.insert(pos, id);
} else {
record_pointsto_truncation(1);
}
}
}
}
pub fn contains(&self, id: HeapObjectId) -> bool {
self.ids.binary_search(&id).is_ok()
}
pub fn is_empty(&self) -> bool {
self.ids.is_empty()
}
pub fn len(&self) -> usize {
self.ids.len()
}
pub fn iter(&self) -> impl Iterator<Item = &HeapObjectId> {
self.ids.iter()
}
}
// ── HeapTaint ────────────────────────────────────────────────────────────
/// Taint stored inside an abstract heap object (container contents).
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct HeapTaint {
pub caps: Cap,
pub origins: SmallVec<[TaintOrigin; 2]>,
}
impl HeapTaint {
/// Monotone merge: OR caps, union origins (bounded, deterministic).
///
/// Delegates to
/// [`crate::taint::ssa_transfer::push_origin_bounded`] so the heap
/// and SSA taint lattices share one origin cap
/// (`analysis.engine.max_origins`) and one truncation-notification
/// path.
fn merge(&mut self, caps: Cap, origins: &[TaintOrigin]) {
self.caps |= caps;
for orig in origins {
crate::taint::ssa_transfer::push_origin_bounded(&mut self.origins, *orig);
}
}
/// Union two HeapTaint values (for load_set).
fn union(&self, other: &HeapTaint) -> HeapTaint {
let mut result = self.clone();
result.merge(other.caps, &other.origins);
result
}
}
// ── HeapState ────────────────────────────────────────────────────────────
/// Per-(heap-object, slot) taint state: abstract contents of all tracked
/// containers with optional per-index precision.
///
/// Sorted by `(HeapObjectId, HeapSlot)` for O(n) merge-join (lattice join =
/// union of per-slot taint), matching the `SsaTaintState` pattern.
///
/// Load semantics:
/// - `load(id, Index(n))`: union of `(id, Index(n))` and `(id, Elements)` ,
/// indexed reads also see taint from dynamic/push operations.
/// - `load(id, Elements)`: union of `(id, Elements)` and ALL `(id, Index(*))`
/// entries, dynamic reads conservatively see all indexed taint.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct HeapState {
entries: SmallVec<[((HeapObjectId, HeapSlot), HeapTaint); 4]>,
}
impl HeapState {
pub fn empty() -> Self {
Self {
entries: SmallVec::new(),
}
}
pub fn is_empty(&self) -> bool {
self.entries.is_empty()
}
/// Store taint into a specific (object, slot) pair (monotone merge).
///
/// If storing to `Index(n)` would exceed `MAX_TRACKED_INDICES` distinct
/// indices for this object, all `Index(*)` entries for the object are
/// collapsed into `Elements` and the new taint is merged there instead.
pub fn store(&mut self, id: HeapObjectId, slot: HeapSlot, caps: Cap, origins: &[TaintOrigin]) {
if caps.is_empty() {
return;
}
// Check index overflow before inserting a new Index slot.
if let HeapSlot::Index(_) = slot {
let key = (id, slot);
let already_present = self.entries.binary_search_by_key(&key, |(k, _)| *k).is_ok();
if !already_present {
let index_count = self.count_indices_for(id);
if index_count >= MAX_TRACKED_INDICES {
// Collapse: merge all Index(*) entries into Elements,
// then store the new taint into Elements too.
self.collapse_indices_to_elements(id);
self.store_raw(id, HeapSlot::Elements, caps, origins);
return;
}
}
}
self.store_raw(id, slot, caps, origins);
}
/// Raw store without overflow checking.
fn store_raw(&mut self, id: HeapObjectId, slot: HeapSlot, caps: Cap, origins: &[TaintOrigin]) {
let key = (id, slot);
match self.entries.binary_search_by_key(&key, |(k, _)| *k) {
Ok(idx) => {
self.entries[idx].1.merge(caps, origins);
}
Err(idx) => {
let mut o: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
for orig in origins {
crate::taint::ssa_transfer::push_origin_bounded(&mut o, *orig);
}
self.entries
.insert(idx, (key, HeapTaint { caps, origins: o }));
}
}
}
/// Store taint into all heap objects in a points-to set.
pub fn store_set(
&mut self,
pts: &PointsToSet,
slot: HeapSlot,
caps: Cap,
origins: &[TaintOrigin],
) {
for &id in pts.iter() {
self.store(id, slot, caps, origins);
}
}
/// Load taint from a specific (object, slot) pair.
///
/// - `Index(n)`: returns union of `(id, Index(n))` `(id, Elements)`.
/// - `Elements`: returns union of `(id, Elements)` all `(id, Index(*))`.
pub fn load(&self, id: HeapObjectId, slot: HeapSlot) -> Option<HeapTaint> {
match slot {
HeapSlot::Index(n) => {
// Union specific index with Elements.
let idx_taint = self.load_raw(id, HeapSlot::Index(n));
let elem_taint = self.load_raw(id, HeapSlot::Elements);
match (idx_taint, elem_taint) {
(Some(a), Some(b)) => Some(a.union(b)),
(Some(a), None) => Some(a.clone()),
(None, Some(b)) => Some(b.clone()),
(None, None) => None,
}
}
HeapSlot::Elements => {
// Union Elements with ALL Index(*) entries for this object.
let mut result: Option<HeapTaint> = None;
for ((eid, _slot), taint) in &self.entries {
if *eid == id {
result = Some(match result {
Some(r) => r.union(taint),
None => taint.clone(),
});
}
}
result
}
}
}
/// Direct lookup of a single (id, slot) entry without cross-slot unioning.
fn load_raw(&self, id: HeapObjectId, slot: HeapSlot) -> Option<&HeapTaint> {
let key = (id, slot);
self.entries
.binary_search_by_key(&key, |(k, _)| *k)
.ok()
.map(|idx| &self.entries[idx].1)
}
/// Load and union taint from all heap objects in a points-to set.
pub fn load_set(&self, pts: &PointsToSet, slot: HeapSlot) -> Option<HeapTaint> {
let mut result: Option<HeapTaint> = None;
for &id in pts.iter() {
if let Some(ht) = self.load(id, slot) {
result = Some(match result {
Some(r) => r.union(&ht),
None => ht,
});
}
}
result
}
/// Lattice join: merge-join by (HeapObjectId, HeapSlot), union per-slot taint.
pub fn join(&self, other: &Self) -> Self {
let mut result = SmallVec::new();
let (mut i, mut j) = (0, 0);
while i < self.entries.len() && j < other.entries.len() {
let (ka, ta) = &self.entries[i];
let (kb, tb) = &other.entries[j];
match ka.cmp(kb) {
std::cmp::Ordering::Less => {
result.push((*ka, ta.clone()));
i += 1;
}
std::cmp::Ordering::Greater => {
result.push((*kb, tb.clone()));
j += 1;
}
std::cmp::Ordering::Equal => {
result.push((*ka, ta.union(tb)));
i += 1;
j += 1;
}
}
}
while i < self.entries.len() {
result.push(self.entries[i].clone());
i += 1;
}
while j < other.entries.len() {
result.push(other.entries[j].clone());
j += 1;
}
Self { entries: result }
}
/// Lattice ordering: every entry in self must be present in other with subset caps.
pub fn leq(&self, other: &Self) -> bool {
let mut j = 0;
for (ka, ta) in &self.entries {
loop {
if j >= other.entries.len() {
return false;
}
let (kb, _) = &other.entries[j];
match ka.cmp(kb) {
std::cmp::Ordering::Equal => break,
std::cmp::Ordering::Greater => j += 1,
std::cmp::Ordering::Less => return false,
}
}
let (_, tb) = &other.entries[j];
if (ta.caps & !tb.caps) != Cap::empty() {
return false;
}
j += 1;
}
true
}
/// Count distinct `Index(*)` slots for a given object.
fn count_indices_for(&self, id: HeapObjectId) -> usize {
self.entries
.iter()
.filter(|((eid, slot), _)| *eid == id && matches!(slot, HeapSlot::Index(_)))
.count()
}
/// Collapse all `Index(*)` entries for `id` into `Elements`.
fn collapse_indices_to_elements(&mut self, id: HeapObjectId) {
// Collect taint from all Index entries for this object.
let mut merged_caps = Cap::empty();
let mut merged_origins: SmallVec<[TaintOrigin; 2]> = SmallVec::new();
self.entries.retain(|((eid, slot), taint)| {
if *eid == id && matches!(slot, HeapSlot::Index(_)) {
merged_caps |= taint.caps;
for orig in &taint.origins {
crate::taint::ssa_transfer::push_origin_bounded(&mut merged_origins, *orig);
}
false // remove this entry
} else {
true // keep
}
});
// Merge into Elements.
if !merged_caps.is_empty() {
self.store_raw(id, HeapSlot::Elements, merged_caps, &merged_origins);
}
}
}
// ── PointsToResult ───────────────────────────────────────────────────────
/// Result of intra-procedural points-to analysis.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct PointsToResult {
pts: HashMap<SsaValue, PointsToSet>,
}
impl PointsToResult {
pub fn empty() -> Self {
Self {
pts: HashMap::new(),
}
}
/// Look up the points-to set for an SSA value.
pub fn get(&self, v: SsaValue) -> Option<&PointsToSet> {
self.pts.get(&v)
}
pub fn is_empty(&self) -> bool {
self.pts.is_empty()
}
}
// ── Allocation site detection ────────────────────────────────────────────
/// Public re-export wrapper for container-literal detection.
///
/// Called from [`crate::ssa::param_points_to`] to decide whether a return
/// path traces to a fresh allocation. Keeps the internal helper private
/// while exposing the classification via a stable name.
pub fn is_container_literal_public(text: &str) -> bool {
is_container_literal(text)
}
/// Check if a const literal text represents a container/collection literal.
fn is_container_literal(text: &str) -> bool {
let t = text.trim();
// Empty or non-empty array/list literals
if t.starts_with('[') && t.ends_with(']') {
return true;
}
// Empty or non-empty object/dict/map/set literals
if t.starts_with('{') && t.ends_with('}') {
return true;
}
// `new Array(...)`, `new Map(...)`, etc.
if t.starts_with("new ") {
return true;
}
// Python dict()/list()/set() as literals
if t == "dict()" || t == "list()" || t == "set()" {
return true;
}
false
}
/// Check if a callee creates a new container (constructor/factory).
pub fn is_container_constructor(callee: &str, lang: Lang) -> bool {
// Extract last segment after '.' or '::' (whichever comes last)
let after_dot = bare_method_name(callee);
let suffix = after_dot.rsplit("::").next().unwrap_or(after_dot);
let suffix_lower = suffix.to_ascii_lowercase();
match lang {
Lang::JavaScript | Lang::TypeScript => {
matches!(suffix, "Array" | "Map" | "Set" | "WeakMap" | "WeakSet")
}
Lang::Python => matches!(
suffix,
"list"
| "dict"
| "set"
| "frozenset"
| "defaultdict"
| "OrderedDict"
| "deque"
| "Counter"
),
Lang::Java => matches!(
suffix,
"ArrayList"
| "LinkedList"
| "HashMap"
| "TreeMap"
| "HashSet"
| "TreeSet"
| "Vector"
| "Stack"
| "ArrayDeque"
| "PriorityQueue"
| "ConcurrentHashMap"
| "LinkedHashMap"
| "LinkedHashSet"
| "CopyOnWriteArrayList"
),
Lang::Go => callee == "make",
Lang::Ruby => {
matches!(suffix, "new") && {
// Only for known container types
let prefix = callee.rsplit('.').nth(1).unwrap_or("");
matches!(prefix, "Array" | "Hash" | "Set")
}
}
Lang::Php => matches!(suffix, "array"),
Lang::C | Lang::Cpp => matches!(
suffix_lower.as_str(),
"vector"
| "map"
| "set"
| "unordered_map"
| "unordered_set"
| "list"
| "deque"
| "queue"
| "stack"
| "multimap"
| "multiset"
| "priority_queue"
),
Lang::Rust => {
// Vec::new, HashMap::new, etc.
suffix == "new" && callee.contains("::") && {
let type_part = callee.rsplit("::").nth(1).unwrap_or("");
matches!(
type_part,
"Vec"
| "HashMap"
| "HashSet"
| "BTreeMap"
| "BTreeSet"
| "VecDeque"
| "LinkedList"
| "BinaryHeap"
)
}
}
}
}
// ── Points-to analysis ───────────────────────────────────────────────────
/// Run intra-procedural points-to analysis on an SSA body.
///
/// Identifies allocation sites, propagates points-to sets through assignments
/// and phi nodes, and returns a result that the taint engine can query.
///
/// Runs as a pre-pass in optimize_ssa(), after type_facts.
pub fn analyze_points_to(body: &SsaBody, _cfg: &Cfg, lang: Option<Lang>) -> PointsToResult {
let mut pts: HashMap<SsaValue, PointsToSet> = HashMap::new();
// Pass 1: identify allocation sites and seed points-to sets
for block in &body.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
match &inst.op {
SsaOp::Const(Some(text)) if is_container_literal(text) => {
pts.insert(inst.value, PointsToSet::singleton(HeapObjectId(inst.value)));
}
SsaOp::Call { callee, .. } => {
if let Some(l) = lang {
if is_container_constructor(callee, l) {
pts.insert(
inst.value,
PointsToSet::singleton(HeapObjectId(inst.value)),
);
}
}
}
_ => {}
}
}
}
if pts.is_empty() {
return PointsToResult::empty();
}
// Pass 2: forward propagation with fixed-point for phis (max 10 rounds)
let max_rounds = 10;
for _ in 0..max_rounds {
let mut changed = false;
for block in &body.blocks {
// Process phis
for inst in &block.phis {
if let SsaOp::Phi(operands) = &inst.op {
let mut merged = PointsToSet::empty();
for (_, v) in operands {
if let Some(p) = pts.get(v) {
merged = merged.union(p);
}
}
if !merged.is_empty() {
let old = pts.get(&inst.value);
if old.map_or(true, |o| o != &merged) {
let existing = pts.entry(inst.value).or_insert_with(PointsToSet::empty);
let new = existing.union(&merged);
if &new != existing {
*existing = new;
changed = true;
}
}
}
}
}
// Process body
for inst in &block.body {
match &inst.op {
SsaOp::Assign(uses) => {
let mut merged = PointsToSet::empty();
for &u in uses {
if let Some(p) = pts.get(&u) {
merged = merged.union(p);
}
}
if !merged.is_empty() {
let old = pts.get(&inst.value);
if old.map_or(true, |o| o != &merged) {
pts.insert(inst.value, merged);
changed = true;
}
}
}
SsaOp::Call {
callee,
args,
receiver,
..
} => {
// For container Store ops that return the container (Go append),
// propagate receiver pts to result.
if let Some(l) = lang {
if let Some(ContainerOp::Store { .. }) =
classify_container_op(callee, l)
{
// Find receiver pts
let recv_pts =
receiver.and_then(|rv| pts.get(&rv).cloned()).or_else(|| {
// Go append: arg 0 is the slice
if l == Lang::Go {
args.first()
.and_then(|a| a.first())
.and_then(|&v| pts.get(&v).cloned())
} else {
// JS-style: find receiver from dotted callee
let dot_pos = callee.rfind('.')?;
let recv_name = &callee[..dot_pos];
for arg_group in args {
for &v in arg_group {
if let Some(def) =
body.value_defs.get(v.0 as usize)
{
if def.var_name.as_deref()
== Some(recv_name)
{
return pts.get(&v).cloned();
}
}
}
}
None
}
});
// For Go append, result gets receiver pts
if l == Lang::Go && receiver.is_none() {
if let Some(rp) = recv_pts {
let old = pts.get(&inst.value);
if old.map_or(true, |o| o != &rp) {
pts.insert(inst.value, rp);
changed = true;
}
}
}
}
}
}
_ => {}
}
}
}
if !changed {
break;
}
}
PointsToResult { pts }
}
// ── Tests ────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use crate::labels::SourceKind;
use petgraph::graph::NodeIndex;
use std::sync::Mutex;
/// Serializes tests that touch [`MAX_POINTSTO_OVERRIDE`] or
/// [`POINTSTO_TRUNCATION_COUNT`]. Both are process-wide atomics, so
/// parallel tests would otherwise race on the counter and the
/// override.
static TEST_GUARD: Mutex<()> = Mutex::new(());
fn origin(idx: u32) -> TaintOrigin {
TaintOrigin {
node: NodeIndex::new(idx as usize),
source_kind: SourceKind::UserInput,
source_span: None,
}
}
// ── PointsToSet tests ────────────────────────────────────────────
#[test]
fn pts_singleton() {
let s = PointsToSet::singleton(HeapObjectId(SsaValue(0)));
assert_eq!(s.len(), 1);
assert!(s.contains(HeapObjectId(SsaValue(0))));
assert!(!s.contains(HeapObjectId(SsaValue(1))));
}
#[test]
fn pts_union() {
let a = PointsToSet::singleton(HeapObjectId(SsaValue(1)));
let b = PointsToSet::singleton(HeapObjectId(SsaValue(3)));
let c = a.union(&b);
assert_eq!(c.len(), 2);
assert!(c.contains(HeapObjectId(SsaValue(1))));
assert!(c.contains(HeapObjectId(SsaValue(3))));
}
#[test]
fn pts_union_dedup() {
let a = PointsToSet::singleton(HeapObjectId(SsaValue(1)));
let b = PointsToSet::singleton(HeapObjectId(SsaValue(1)));
let c = a.union(&b);
assert_eq!(c.len(), 1);
}
#[test]
fn pts_union_overflow() {
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
// Tight override so the test runs deterministically against the
// configured default.
set_max_pointsto_override(8);
reset_points_to_observability();
// Build a set with `cap` entries.
let cap = effective_max_pointsto();
let mut big = PointsToSet::empty();
for i in 0..cap as u32 {
big.insert(HeapObjectId(SsaValue(i)));
}
assert_eq!(big.len(), cap);
// Union with one more should not grow, and should count the drop.
let extra = PointsToSet::singleton(HeapObjectId(SsaValue(100)));
let result = big.union(&extra);
assert_eq!(result.len(), cap);
assert_eq!(points_to_truncation_count(), 1);
set_max_pointsto_override(0);
reset_points_to_observability();
}
#[test]
fn pts_insert_overflow_counts_drops() {
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_max_pointsto_override(4);
reset_points_to_observability();
let mut s = PointsToSet::empty();
// First 4 fit.
for i in 0..4u32 {
s.insert(HeapObjectId(SsaValue(i)));
}
assert_eq!(s.len(), 4);
assert_eq!(points_to_truncation_count(), 0);
// Next 3 are dropped; counter records each drop.
for i in 4..7u32 {
s.insert(HeapObjectId(SsaValue(i)));
}
assert_eq!(s.len(), 4);
assert_eq!(points_to_truncation_count(), 3);
// Duplicates of existing entries are *not* drops.
s.insert(HeapObjectId(SsaValue(0)));
assert_eq!(points_to_truncation_count(), 3);
set_max_pointsto_override(0);
reset_points_to_observability();
}
#[test]
fn pts_union_overflow_counts_exact_drops() {
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_max_pointsto_override(4);
reset_points_to_observability();
// a = {0,1,2,3}, b = {4,5,6}, union wants 7 members; cap is 4
// so 3 members are dropped. Deterministic order: smallest
// ids survive.
let mut a = PointsToSet::empty();
for i in 0..4u32 {
a.insert(HeapObjectId(SsaValue(i)));
}
let mut b = PointsToSet::empty();
for i in 4..7u32 {
b.insert(HeapObjectId(SsaValue(i)));
}
// Sanity: the pre-union sets should not themselves have triggered
// truncation (both are ≤ cap).
assert_eq!(points_to_truncation_count(), 0);
let c = a.union(&b);
assert_eq!(c.len(), 4);
assert!(c.contains(HeapObjectId(SsaValue(0))));
assert!(c.contains(HeapObjectId(SsaValue(3))));
assert!(!c.contains(HeapObjectId(SsaValue(6))));
assert_eq!(points_to_truncation_count(), 3);
set_max_pointsto_override(0);
reset_points_to_observability();
}
#[test]
fn pts_reset_observability_clears_counter() {
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
set_max_pointsto_override(2);
reset_points_to_observability();
let mut s = PointsToSet::empty();
s.insert(HeapObjectId(SsaValue(0)));
s.insert(HeapObjectId(SsaValue(1)));
s.insert(HeapObjectId(SsaValue(2))); // dropped
assert_eq!(points_to_truncation_count(), 1);
reset_points_to_observability();
assert_eq!(points_to_truncation_count(), 0);
set_max_pointsto_override(0);
}
#[test]
fn pts_effective_cap_defaults_to_runtime() {
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
// With no override, the cap comes from the installed runtime
// (which defaults to `DEFAULT_MAX_POINTSTO` in tests).
set_max_pointsto_override(0);
assert_eq!(
effective_max_pointsto(),
crate::utils::analysis_options::DEFAULT_MAX_POINTSTO as usize
);
set_max_pointsto_override(5);
assert_eq!(effective_max_pointsto(), 5);
set_max_pointsto_override(0);
}
#[test]
fn pts_empty() {
let e = PointsToSet::empty();
assert!(e.is_empty());
assert_eq!(e.len(), 0);
}
#[test]
fn pts_insert() {
let mut s = PointsToSet::empty();
s.insert(HeapObjectId(SsaValue(5)));
s.insert(HeapObjectId(SsaValue(2)));
s.insert(HeapObjectId(SsaValue(5))); // dup
assert_eq!(s.len(), 2);
// Sorted order
let ids: Vec<_> = s.iter().collect();
assert_eq!(ids[0].0, SsaValue(2));
assert_eq!(ids[1].0, SsaValue(5));
}
// ── HeapState tests ──────────────────────────────────────────────
#[test]
fn heap_store_and_load() {
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)]);
let t = h.load(id, HeapSlot::Elements).unwrap();
assert_eq!(t.caps, Cap::HTML_ESCAPE);
assert_eq!(t.origins.len(), 1);
}
#[test]
fn heap_store_monotone_merge() {
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)]);
h.store(id, HeapSlot::Elements, Cap::SQL_QUERY, &[origin(1)]);
let t = h.load(id, HeapSlot::Elements).unwrap();
assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY);
assert_eq!(t.origins.len(), 2);
}
#[test]
fn heap_store_empty_caps_noop() {
let mut h = HeapState::empty();
h.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::empty(),
&[origin(0)],
);
assert!(h.is_empty());
}
#[test]
fn heap_load_missing() {
let h = HeapState::empty();
assert!(
h.load(HeapObjectId(SsaValue(0)), HeapSlot::Elements)
.is_none()
);
}
#[test]
fn heap_load_set_unions() {
let mut h = HeapState::empty();
h.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::HTML_ESCAPE,
&[origin(0)],
);
h.store(
HeapObjectId(SsaValue(1)),
HeapSlot::Elements,
Cap::SQL_QUERY,
&[origin(1)],
);
let mut pts = PointsToSet::empty();
pts.insert(HeapObjectId(SsaValue(0)));
pts.insert(HeapObjectId(SsaValue(1)));
let t = h.load_set(&pts, HeapSlot::Elements).unwrap();
assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY);
assert_eq!(t.origins.len(), 2);
}
#[test]
fn heap_load_set_empty_pts() {
let mut h = HeapState::empty();
h.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::HTML_ESCAPE,
&[origin(0)],
);
let pts = PointsToSet::empty();
assert!(h.load_set(&pts, HeapSlot::Elements).is_none());
}
#[test]
fn heap_store_set() {
let mut h = HeapState::empty();
let mut pts = PointsToSet::empty();
pts.insert(HeapObjectId(SsaValue(0)));
pts.insert(HeapObjectId(SsaValue(1)));
h.store_set(&pts, HeapSlot::Elements, Cap::HTML_ESCAPE, &[origin(0)]);
assert_eq!(
h.load(HeapObjectId(SsaValue(0)), HeapSlot::Elements)
.unwrap()
.caps,
Cap::HTML_ESCAPE
);
assert_eq!(
h.load(HeapObjectId(SsaValue(1)), HeapSlot::Elements)
.unwrap()
.caps,
Cap::HTML_ESCAPE
);
}
#[test]
fn heap_join() {
let mut a = HeapState::empty();
a.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::HTML_ESCAPE,
&[origin(0)],
);
let mut b = HeapState::empty();
b.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::SQL_QUERY,
&[origin(1)],
);
b.store(
HeapObjectId(SsaValue(1)),
HeapSlot::Elements,
Cap::FILE_IO,
&[origin(2)],
);
let c = a.join(&b);
let t0 = c
.load(HeapObjectId(SsaValue(0)), HeapSlot::Elements)
.unwrap();
assert_eq!(t0.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY);
let t1 = c
.load(HeapObjectId(SsaValue(1)), HeapSlot::Elements)
.unwrap();
assert_eq!(t1.caps, Cap::FILE_IO);
}
#[test]
fn heap_leq() {
let mut a = HeapState::empty();
a.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::HTML_ESCAPE,
&[origin(0)],
);
let mut b = HeapState::empty();
b.store(
HeapObjectId(SsaValue(0)),
HeapSlot::Elements,
Cap::HTML_ESCAPE | Cap::SQL_QUERY,
&[origin(0)],
);
assert!(a.leq(&b)); // a ⊆ b
assert!(!b.leq(&a)); // b ⊄ a
}
#[test]
fn heap_leq_missing_entry() {
let mut a = HeapState::empty();
a.store(
HeapObjectId(SsaValue(5)),
HeapSlot::Elements,
Cap::HTML_ESCAPE,
&[origin(0)],
);
let b = HeapState::empty();
assert!(!a.leq(&b)); // a has entry, b doesn't
assert!(b.leq(&a)); // b empty is always ⊆
}
// ── HeapSlot indexed tests ──────────────────────────────────────
#[test]
fn heap_indexed_store_load_isolation() {
// Store to Index(0), load from Index(1) → no taint
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Index(0), Cap::HTML_ESCAPE, &[origin(0)]);
// Index(0) should have taint
let t0 = h.load(id, HeapSlot::Index(0)).unwrap();
assert_eq!(t0.caps, Cap::HTML_ESCAPE);
// Index(1) should NOT have taint (no Elements, no Index(1) entry)
assert!(h.load(id, HeapSlot::Index(1)).is_none());
}
#[test]
fn heap_indexed_load_unions_with_elements() {
// Store to Elements → indexed load should see it
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Elements, Cap::SQL_QUERY, &[origin(0)]);
// Index(1) load should union with Elements
let t = h.load(id, HeapSlot::Index(1)).unwrap();
assert_eq!(t.caps, Cap::SQL_QUERY);
}
#[test]
fn heap_elements_load_unions_all_indices() {
// Store to Index(0) and Index(2), Elements load should see both
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Index(0), Cap::HTML_ESCAPE, &[origin(0)]);
h.store(id, HeapSlot::Index(2), Cap::SQL_QUERY, &[origin(1)]);
let t = h.load(id, HeapSlot::Elements).unwrap();
assert_eq!(t.caps, Cap::HTML_ESCAPE | Cap::SQL_QUERY);
}
#[test]
fn heap_indexed_and_elements_combined() {
// Index(0) = tainted, Elements = tainted with different cap
// Index(0) load should see both; Index(1) should see only Elements
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
h.store(id, HeapSlot::Index(0), Cap::HTML_ESCAPE, &[origin(0)]);
h.store(id, HeapSlot::Elements, Cap::FILE_IO, &[origin(1)]);
let t0 = h.load(id, HeapSlot::Index(0)).unwrap();
assert_eq!(t0.caps, Cap::HTML_ESCAPE | Cap::FILE_IO);
let t1 = h.load(id, HeapSlot::Index(1)).unwrap();
assert_eq!(t1.caps, Cap::FILE_IO); // only Elements taint
}
#[test]
fn heap_max_tracked_indices_collapse() {
let mut h = HeapState::empty();
let id = HeapObjectId(SsaValue(0));
// Fill MAX_TRACKED_INDICES index slots
for i in 0..MAX_TRACKED_INDICES as u64 {
h.store(
id,
HeapSlot::Index(i),
Cap::HTML_ESCAPE,
&[origin(i as u32)],
);
}
// One more should trigger collapse into Elements
h.store(
id,
HeapSlot::Index(MAX_TRACKED_INDICES as u64),
Cap::SQL_QUERY,
&[origin(99)],
);
// All Index entries should be collapsed into Elements.
// There should be no Index entries left.
assert_eq!(h.count_indices_for(id), 0);
// Elements load should see all taint
let t = h.load(id, HeapSlot::Elements).unwrap();
assert!(t.caps.contains(Cap::HTML_ESCAPE));
assert!(t.caps.contains(Cap::SQL_QUERY));
}
// ── is_container_literal tests ───────────────────────────────────
#[test]
fn container_literal_detection() {
assert!(is_container_literal("[]"));
assert!(is_container_literal("[1, 2, 3]"));
assert!(is_container_literal("{}"));
assert!(is_container_literal("{a: 1}"));
assert!(is_container_literal("new Map()"));
assert!(is_container_literal("new ArrayList<>()"));
assert!(is_container_literal("dict()"));
assert!(is_container_literal("list()"));
assert!(is_container_literal("set()"));
assert!(!is_container_literal("42"));
assert!(!is_container_literal("\"hello\""));
assert!(!is_container_literal("true"));
}
// ── is_container_constructor tests ───────────────────────────────
#[test]
fn container_constructor_js() {
assert!(is_container_constructor("Array", Lang::JavaScript));
assert!(is_container_constructor("Map", Lang::JavaScript));
assert!(is_container_constructor("Set", Lang::JavaScript));
assert!(!is_container_constructor("Object", Lang::JavaScript));
}
#[test]
fn container_constructor_python() {
assert!(is_container_constructor("list", Lang::Python));
assert!(is_container_constructor("dict", Lang::Python));
assert!(is_container_constructor("defaultdict", Lang::Python));
assert!(!is_container_constructor("str", Lang::Python));
}
#[test]
fn container_constructor_java() {
assert!(is_container_constructor("ArrayList", Lang::Java));
assert!(is_container_constructor("HashMap", Lang::Java));
assert!(is_container_constructor("ConcurrentHashMap", Lang::Java));
assert!(!is_container_constructor("String", Lang::Java));
}
#[test]
fn container_constructor_go() {
assert!(is_container_constructor("make", Lang::Go));
assert!(!is_container_constructor("new", Lang::Go));
}
#[test]
fn container_constructor_rust() {
assert!(is_container_constructor("Vec::new", Lang::Rust));
assert!(is_container_constructor("HashMap::new", Lang::Rust));
assert!(!is_container_constructor("String::new", Lang::Rust));
assert!(!is_container_constructor("new", Lang::Rust));
}
#[test]
fn container_constructor_cpp() {
assert!(is_container_constructor("vector", Lang::Cpp));
assert!(is_container_constructor("std::map", Lang::Cpp));
assert!(is_container_constructor("unordered_set", Lang::Cpp));
}
// ── PointsToResult tests ─────────────────────────────────────────
#[test]
fn pts_result_empty() {
let r = PointsToResult::empty();
assert!(r.is_empty());
assert!(r.get(SsaValue(0)).is_none());
}
}