mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
* docs: Enhance module documentation across various files for clarity and completeness * fix: Remove unnecessary blank line in build.rs for cleaner code * docs: Update documentation to improve clarity and consistency in code comments
543 lines
20 KiB
Rust
543 lines
20 KiB
Rust
//! Parameter-granularity points-to summaries.
|
|
//!
|
|
//! Captures the subset of intra-procedural alias behaviour that matters
|
|
//! at cross-file call sites: which parameters' heap/field writes are
|
|
//! observable to the caller through *another* parameter's alias, and
|
|
//! which parameters flow identity to the return value.
|
|
//!
|
|
//! ## Scope
|
|
//!
|
|
//! This is **intentionally not** a whole-program points-to analysis.
|
|
//! Nyx already has bounded intra-procedural heap tracking
|
|
//! ([`crate::ssa::heap`]); this module bridges the cross-file cliff by recording
|
|
//! a small, bounded alias graph between parameter positions and the return
|
|
//! value, then replaying it at summary-resolution time.
|
|
//!
|
|
//! ## Edge model
|
|
//!
|
|
//! Edges are directed `AliasEdge { source, target, kind }`:
|
|
//!
|
|
//! * `Source(Param(i)) → Target(Param(j))`, the callee stores data
|
|
//! derived from parameter `i` into a field/element of parameter `j`.
|
|
//! Mutation is observable to the caller through its argument for `j`.
|
|
//! * `Source(Param(i)) → Target(Return)`, the return value aliases
|
|
//! parameter `i`'s heap identity. Adds heap-level precision on top of
|
|
//! the coarser [`crate::summary::ssa_summary::TaintTransform::Identity`] view already carried in
|
|
//! [`crate::summary::ssa_summary::SsaFuncSummary::param_to_return`].
|
|
//!
|
|
//! `MustAlias` is intentionally omitted, the ROI on
|
|
//! must-alias inference for cross-file summaries is low, and the soundness
|
|
//! story for `MayAlias`-only application is straightforward ("take the
|
|
//! union").
|
|
//!
|
|
//! ## Bound and overflow policy
|
|
//!
|
|
//! Edge count is capped at [`MAX_ALIAS_EDGES`]. When a callee's alias
|
|
//! graph exceeds the cap the summary records `overflow = true` and
|
|
//! callers treat the function as "any tainted parameter may spread to
|
|
//! every other parameter and to the return", the conservative
|
|
//! greatest-lower-bound over the alias lattice.
|
|
|
|
use serde::{Deserialize, Serialize};
|
|
use smallvec::SmallVec;
|
|
|
|
/// Identity of one endpoint in an alias edge.
|
|
///
|
|
/// Parameters are identified by their 0-based positional index as reported
|
|
/// by [`crate::ssa::ir::SsaOp::Param`]; the implicit receiver (`self`/`this`)
|
|
/// is handled outside this table and is deliberately not representable here.
|
|
/// `Return` denotes the function's return SSA value, one per function, so
|
|
/// no further qualifier is needed.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
|
pub enum AliasPosition {
|
|
/// Positional parameter, 0-based. Receiver is excluded.
|
|
Param(u32),
|
|
/// The function's return value (union of every `Terminator::Return`).
|
|
Return,
|
|
}
|
|
|
|
/// Strength of an alias edge. Only [`AliasKind::MayAlias`] is emitted
|
|
///, the analysis over-approximates identity-level aliasing rather than
|
|
/// proving must-alias. The variant is kept as an enum so a future
|
|
/// extension that distinguishes the two can slot in without migrating
|
|
/// on-disk data.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
|
pub enum AliasKind {
|
|
/// Under some execution, the two positions may reference the same
|
|
/// heap object. Callers applying the edge take the *union* of
|
|
/// points-to / taint at the source into the target.
|
|
MayAlias,
|
|
}
|
|
|
|
/// A single directed alias edge.
|
|
///
|
|
/// `(source, target)` are order-sensitive: data flows from `source` to
|
|
/// `target` at the callee. Callers apply each edge by reading their
|
|
/// argument / return abstraction for `source` and propagating into
|
|
/// `target`.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct AliasEdge {
|
|
pub source: AliasPosition,
|
|
pub target: AliasPosition,
|
|
pub kind: AliasKind,
|
|
}
|
|
|
|
/// Maximum edges retained per [`PointsToSummary`].
|
|
///
|
|
/// Chosen so typical callees (≤ 4 parameters, one return, a handful of
|
|
/// field writes) fit without approximation while pathological graphs
|
|
/// still terminate the analysis in bounded time. Overflow triggers the
|
|
/// [`PointsToSummary::overflow`] fallback instead of silently dropping
|
|
/// edges, so callers can reason about soundness.
|
|
pub const MAX_ALIAS_EDGES: usize = 8;
|
|
|
|
/// Parameter-granularity alias summary persisted in
|
|
/// [`crate::summary::ssa_summary::SsaFuncSummary`].
|
|
///
|
|
/// The summary is empty by default, functions without any parameter /
|
|
/// return aliasing (pure transformers, sinks that consume but don't
|
|
/// mutate their arguments) carry no edges and cost nothing on disk.
|
|
///
|
|
/// When the callee's alias graph exceeds [`MAX_ALIAS_EDGES`], extraction
|
|
/// sets [`overflow = true`](Self::overflow) and callers must treat every
|
|
/// parameter as reaching every other parameter and the return. This is
|
|
/// the conservative fallback for bounded alias analysis.
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct PointsToSummary {
|
|
/// Bounded edge list, deduped by `(source, target, kind)`. The
|
|
/// `#[serde(default)]` attribute lets summaries pre-dating points-to
|
|
/// tracking deserialise cleanly (no edges).
|
|
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
|
pub edges: SmallVec<[AliasEdge; 4]>,
|
|
/// Conservative fallback flag, set when extraction hit
|
|
/// [`MAX_ALIAS_EDGES`] and refused to drop any edge silently. When
|
|
/// `true`, callers treat the callee as "every parameter may alias
|
|
/// every other parameter and the return value".
|
|
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
|
pub overflow: bool,
|
|
/// At least one return path produces a *fresh* container allocation ,
|
|
/// a container literal (`[]`, `{}`) or a known container constructor
|
|
/// call (`new Map()`, `list()`, …) that does not trace back to any
|
|
/// parameter. When this is `true` the caller synthesises a fresh
|
|
/// [`crate::ssa::heap::HeapObjectId`] keyed on the call's SSA value
|
|
/// and seeds it into `dynamic_pts`, so later container operations on
|
|
/// the call result (e.g. `bag[0]`, `fillBag(bag, …)`) can find a heap
|
|
/// cell to read from or store into.
|
|
///
|
|
/// Closes the factory-pattern cross-file gap, `const bag = makeBag()`
|
|
/// followed by `fillBag(bag, env)` and `exec(bag[0])`, by giving the
|
|
/// caller's heap analysis a stable identity to attach stores to.
|
|
/// Combines freely with `Param(i) → Return` edges: a mixed-return
|
|
/// function (one branch returns a param, another returns a fresh
|
|
/// allocation) emits both and the caller joins the two points-to
|
|
/// sets.
|
|
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
|
pub returns_fresh_alloc: bool,
|
|
}
|
|
|
|
impl PointsToSummary {
|
|
/// Empty summary, no aliasing, no overflow. Equivalent to
|
|
/// [`Self::default`] but explicit at call sites.
|
|
pub fn empty() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Whether this summary adds any information over the default "no
|
|
/// aliasing" interpretation. Used by extraction to decide whether
|
|
/// the field should be persisted or left empty.
|
|
pub fn is_empty(&self) -> bool {
|
|
self.edges.is_empty() && !self.overflow && !self.returns_fresh_alloc
|
|
}
|
|
|
|
/// Insert an edge, preserving dedup and the bounded-size invariant.
|
|
///
|
|
/// Returns `true` when the edge was added, `false` when it was a
|
|
/// duplicate or when the cap triggered an overflow. The caller can
|
|
/// ignore the return, the summary always remains in a valid state.
|
|
pub fn insert(&mut self, source: AliasPosition, target: AliasPosition, kind: AliasKind) {
|
|
if self.overflow {
|
|
return;
|
|
}
|
|
let edge = AliasEdge {
|
|
source,
|
|
target,
|
|
kind,
|
|
};
|
|
if self.edges.contains(&edge) {
|
|
return;
|
|
}
|
|
if self.edges.len() >= MAX_ALIAS_EDGES {
|
|
self.overflow = true;
|
|
// Keep the existing edge list, a consumer that still reads
|
|
// the vector gets a strict *subset* of the sound over-
|
|
// approximation conveyed by `overflow`. Correctness is
|
|
// owned by the overflow flag; the residual edges are purely
|
|
// diagnostic.
|
|
return;
|
|
}
|
|
self.edges.push(edge);
|
|
}
|
|
|
|
/// Union two summaries, merging edges and OR-ing the overflow /
|
|
/// fresh-alloc flags. Respects the [`MAX_ALIAS_EDGES`] cap via the
|
|
/// same overflow promotion used by [`Self::insert`].
|
|
pub fn merge(&mut self, other: &Self) {
|
|
self.returns_fresh_alloc |= other.returns_fresh_alloc;
|
|
if other.overflow {
|
|
self.overflow = true;
|
|
return;
|
|
}
|
|
for edge in &other.edges {
|
|
self.insert(edge.source, edge.target, edge.kind);
|
|
}
|
|
}
|
|
|
|
/// Parameter indices referenced by any edge in this summary. Used by
|
|
/// `ssa_summary_fits_arity` to confirm the summary
|
|
/// does not reference a parameter beyond the key's declared arity
|
|
/// (which would indicate a synthetic-param mis-attribution in
|
|
/// extraction).
|
|
pub fn max_param_index(&self) -> Option<u32> {
|
|
let mut max: Option<u32> = None;
|
|
for edge in &self.edges {
|
|
if let AliasPosition::Param(i) = edge.source {
|
|
max = Some(max.map_or(i, |m| m.max(i)));
|
|
}
|
|
if let AliasPosition::Param(i) = edge.target {
|
|
max = Some(max.map_or(i, |m| m.max(i)));
|
|
}
|
|
}
|
|
max
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn empty_summary_is_noop() {
|
|
let s = PointsToSummary::empty();
|
|
assert!(s.is_empty());
|
|
assert!(!s.overflow);
|
|
assert_eq!(s.edges.len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn insert_dedups() {
|
|
let mut s = PointsToSummary::empty();
|
|
s.insert(
|
|
AliasPosition::Param(0),
|
|
AliasPosition::Param(1),
|
|
AliasKind::MayAlias,
|
|
);
|
|
s.insert(
|
|
AliasPosition::Param(0),
|
|
AliasPosition::Param(1),
|
|
AliasKind::MayAlias,
|
|
);
|
|
assert_eq!(s.edges.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn insert_overflows_at_cap() {
|
|
let mut s = PointsToSummary::empty();
|
|
for i in 0..(MAX_ALIAS_EDGES as u32) {
|
|
s.insert(
|
|
AliasPosition::Param(i),
|
|
AliasPosition::Return,
|
|
AliasKind::MayAlias,
|
|
);
|
|
}
|
|
assert_eq!(s.edges.len(), MAX_ALIAS_EDGES);
|
|
assert!(!s.overflow);
|
|
s.insert(
|
|
AliasPosition::Param(99),
|
|
AliasPosition::Return,
|
|
AliasKind::MayAlias,
|
|
);
|
|
assert!(s.overflow);
|
|
assert_eq!(s.edges.len(), MAX_ALIAS_EDGES);
|
|
}
|
|
|
|
#[test]
|
|
fn merge_propagates_overflow() {
|
|
let mut a = PointsToSummary::empty();
|
|
let mut b = PointsToSummary::empty();
|
|
b.overflow = true;
|
|
a.merge(&b);
|
|
assert!(a.overflow);
|
|
}
|
|
|
|
#[test]
|
|
fn max_param_index_tracks_both_endpoints() {
|
|
let mut s = PointsToSummary::empty();
|
|
s.insert(
|
|
AliasPosition::Param(0),
|
|
AliasPosition::Param(3),
|
|
AliasKind::MayAlias,
|
|
);
|
|
s.insert(
|
|
AliasPosition::Param(1),
|
|
AliasPosition::Return,
|
|
AliasKind::MayAlias,
|
|
);
|
|
assert_eq!(s.max_param_index(), Some(3));
|
|
}
|
|
|
|
#[test]
|
|
fn serde_round_trip_is_stable() {
|
|
let mut s = PointsToSummary::empty();
|
|
s.insert(
|
|
AliasPosition::Param(0),
|
|
AliasPosition::Param(1),
|
|
AliasKind::MayAlias,
|
|
);
|
|
s.insert(
|
|
AliasPosition::Param(2),
|
|
AliasPosition::Return,
|
|
AliasKind::MayAlias,
|
|
);
|
|
let json = serde_json::to_string(&s).unwrap();
|
|
let back: PointsToSummary = serde_json::from_str(&json).unwrap();
|
|
assert_eq!(s, back);
|
|
}
|
|
|
|
#[test]
|
|
fn serde_default_decodes_empty_object() {
|
|
let back: PointsToSummary = serde_json::from_str("{}").unwrap();
|
|
assert!(back.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn returns_fresh_alloc_is_not_empty() {
|
|
let mut s = PointsToSummary::empty();
|
|
assert!(s.is_empty());
|
|
s.returns_fresh_alloc = true;
|
|
assert!(!s.is_empty());
|
|
}
|
|
|
|
#[test]
|
|
fn merge_propagates_fresh_alloc_flag() {
|
|
let mut a = PointsToSummary::empty();
|
|
let mut b = PointsToSummary::empty();
|
|
b.returns_fresh_alloc = true;
|
|
a.merge(&b);
|
|
assert!(a.returns_fresh_alloc);
|
|
}
|
|
|
|
#[test]
|
|
fn returns_fresh_alloc_roundtrips() {
|
|
let mut s = PointsToSummary::empty();
|
|
s.returns_fresh_alloc = true;
|
|
let json = serde_json::to_string(&s).unwrap();
|
|
let back: PointsToSummary = serde_json::from_str(&json).unwrap();
|
|
assert!(back.returns_fresh_alloc);
|
|
assert_eq!(s, back);
|
|
}
|
|
}
|
|
|
|
// ── field-granularity points-to summary ──────────────
|
|
|
|
/// Maximum field names retained per parameter in [`FieldPointsToSummary`].
|
|
///
|
|
/// Mirror of [`MAX_ALIAS_EDGES`]. Bounds on-disk + cross-file work
|
|
/// while leaving room for typical helpers (a handful of fields each).
|
|
pub const MAX_FIELDS_PER_PARAM: usize = 8;
|
|
|
|
/// field-granularity per-parameter points-to summary.
|
|
///
|
|
/// Records, for each positional parameter index, the set of field
|
|
/// **names** read from and written to inside the callee body. Names
|
|
/// (not [`crate::ssa::ir::FieldId`]) are persisted because field IDs
|
|
/// are body-local, the per-body [`crate::ssa::ir::FieldInterner`]
|
|
/// reassigns IDs across files. Callers re-intern through their own
|
|
/// body's interner before consulting `field_taint` cells.
|
|
///
|
|
/// The receiver (`self` / `this`) uses sentinel index [`usize::MAX`]
|
|
/// in the outer `Vec` so positional params and the receiver share the
|
|
/// same indexing convention as `SsaFuncSummary::receiver_to_*`
|
|
/// (separate channel).
|
|
///
|
|
/// Empty by default, functions that don't read or write any field on
|
|
/// their parameters carry no entries and cost nothing on disk.
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
|
pub struct FieldPointsToSummary {
|
|
/// `(param_index, field_names_read)`, the callee projected each
|
|
/// listed field on a value derived from `param_index` somewhere
|
|
/// in its body. Sorted, deduped per-entry.
|
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
|
pub param_field_reads: Vec<(u32, SmallVec<[String; 2]>)>,
|
|
/// `(param_index, field_names_written)`, the callee assigned to
|
|
/// each listed field on a value derived from `param_index`.
|
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
|
pub param_field_writes: Vec<(u32, SmallVec<[String; 2]>)>,
|
|
/// Set when the read/write graph hit
|
|
/// [`MAX_FIELDS_PER_PARAM`] for any parameter. Callers seeing
|
|
/// `overflow=true` treat each parameter as reading/writing every
|
|
/// field on every other parameter, the conservative greatest
|
|
/// lower bound that preserves soundness.
|
|
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
|
pub overflow: bool,
|
|
}
|
|
|
|
impl FieldPointsToSummary {
|
|
pub fn empty() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
pub fn is_empty(&self) -> bool {
|
|
self.param_field_reads.is_empty() && self.param_field_writes.is_empty() && !self.overflow
|
|
}
|
|
|
|
fn insert_into(
|
|
list: &mut Vec<(u32, SmallVec<[String; 2]>)>,
|
|
param: u32,
|
|
field: &str,
|
|
overflow: &mut bool,
|
|
) {
|
|
let entry = match list.iter_mut().find(|(p, _)| *p == param) {
|
|
Some(e) => &mut e.1,
|
|
None => {
|
|
list.push((param, SmallVec::new()));
|
|
&mut list.last_mut().unwrap().1
|
|
}
|
|
};
|
|
if entry.iter().any(|s| s == field) {
|
|
return;
|
|
}
|
|
if entry.len() >= MAX_FIELDS_PER_PARAM {
|
|
*overflow = true;
|
|
return;
|
|
}
|
|
entry.push(field.to_string());
|
|
entry.sort();
|
|
}
|
|
|
|
/// Record a field READ on parameter `param`. Bounded by
|
|
/// [`MAX_FIELDS_PER_PARAM`] per parameter; over-cap inserts trip
|
|
/// `overflow`.
|
|
pub fn add_read(&mut self, param: u32, field: &str) {
|
|
if self.overflow {
|
|
return;
|
|
}
|
|
let mut overflow = false;
|
|
Self::insert_into(&mut self.param_field_reads, param, field, &mut overflow);
|
|
if overflow {
|
|
self.overflow = true;
|
|
}
|
|
}
|
|
|
|
/// Record a field WRITE on parameter `param`. Mirror of [`Self::add_read`].
|
|
pub fn add_write(&mut self, param: u32, field: &str) {
|
|
if self.overflow {
|
|
return;
|
|
}
|
|
let mut overflow = false;
|
|
Self::insert_into(&mut self.param_field_writes, param, field, &mut overflow);
|
|
if overflow {
|
|
self.overflow = true;
|
|
}
|
|
}
|
|
|
|
/// Union with `other`. Overflow propagates per
|
|
/// [`PointsToSummary::merge`]'s semantics, once a callee is
|
|
/// "any field on any parameter", merging cannot recover precision.
|
|
pub fn merge(&mut self, other: &Self) {
|
|
if other.overflow {
|
|
self.overflow = true;
|
|
return;
|
|
}
|
|
for (p, fields) in &other.param_field_reads {
|
|
for f in fields {
|
|
self.add_read(*p, f);
|
|
}
|
|
}
|
|
for (p, fields) in &other.param_field_writes {
|
|
for f in fields {
|
|
self.add_write(*p, f);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod field_summary_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn empty_summary_round_trips() {
|
|
let s = FieldPointsToSummary::empty();
|
|
assert!(s.is_empty());
|
|
let json = serde_json::to_string(&s).unwrap();
|
|
let back: FieldPointsToSummary = serde_json::from_str(&json).unwrap();
|
|
assert_eq!(s, back);
|
|
}
|
|
|
|
#[test]
|
|
fn add_read_dedupes_and_sorts() {
|
|
let mut s = FieldPointsToSummary::empty();
|
|
s.add_read(0, "name");
|
|
s.add_read(0, "id");
|
|
s.add_read(0, "name"); // duplicate
|
|
let entry = s.param_field_reads.iter().find(|(p, _)| *p == 0).unwrap();
|
|
assert_eq!(entry.1.as_slice(), &["id".to_string(), "name".to_string()]);
|
|
}
|
|
|
|
#[test]
|
|
fn distinct_params_get_distinct_entries() {
|
|
let mut s = FieldPointsToSummary::empty();
|
|
s.add_write(0, "cache");
|
|
s.add_write(1, "log");
|
|
assert_eq!(s.param_field_writes.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn overflow_trips_at_cap() {
|
|
let mut s = FieldPointsToSummary::empty();
|
|
for i in 0..(MAX_FIELDS_PER_PARAM + 4) {
|
|
s.add_read(0, &format!("field{i}"));
|
|
}
|
|
assert!(s.overflow);
|
|
}
|
|
|
|
#[test]
|
|
fn merge_unions_disjoint_keys() {
|
|
let mut a = FieldPointsToSummary::empty();
|
|
let mut b = FieldPointsToSummary::empty();
|
|
a.add_read(0, "alpha");
|
|
b.add_read(1, "beta");
|
|
a.merge(&b);
|
|
assert!(a.param_field_reads.iter().any(|(p, _)| *p == 0));
|
|
assert!(a.param_field_reads.iter().any(|(p, _)| *p == 1));
|
|
}
|
|
|
|
#[test]
|
|
fn merge_propagates_overflow() {
|
|
let mut a = FieldPointsToSummary::empty();
|
|
let mut b = FieldPointsToSummary::empty();
|
|
b.overflow = true;
|
|
a.merge(&b);
|
|
assert!(a.overflow);
|
|
}
|
|
|
|
#[test]
|
|
fn round_trip_preserves_entries() {
|
|
let mut s = FieldPointsToSummary::empty();
|
|
s.add_read(0, "name");
|
|
s.add_write(1, "cache");
|
|
s.add_write(1, "log");
|
|
let json = serde_json::to_string(&s).unwrap();
|
|
let back: FieldPointsToSummary = serde_json::from_str(&json).unwrap();
|
|
assert_eq!(s, back);
|
|
}
|
|
|
|
#[test]
|
|
fn empty_serializes_as_empty_object() {
|
|
let s = FieldPointsToSummary::empty();
|
|
let json = serde_json::to_string(&s).unwrap();
|
|
assert_eq!(json, "{}");
|
|
let back: FieldPointsToSummary = serde_json::from_str("{}").unwrap();
|
|
assert!(back.is_empty());
|
|
}
|
|
}
|