mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
339 lines
13 KiB
Rust
339 lines
13 KiB
Rust
|
|
//! Parameter-granularity points-to summaries.
|
||
|
|
//!
|
||
|
|
//! Captures the subset of intra-procedural alias behaviour that matters
|
||
|
|
//! at cross-file call sites: which parameters' heap/field writes are
|
||
|
|
//! observable to the caller through *another* parameter's alias, and
|
||
|
|
//! which parameters flow identity to the return value.
|
||
|
|
//!
|
||
|
|
//! ## Scope
|
||
|
|
//!
|
||
|
|
//! This is **intentionally not** a whole-program points-to analysis.
|
||
|
|
//! Nyx already has bounded intra-procedural heap tracking
|
||
|
|
//! ([`crate::ssa::heap`]); this module bridges the cross-file cliff by recording
|
||
|
|
//! a small, bounded alias graph between parameter positions and the return
|
||
|
|
//! value, then replaying it at summary-resolution time.
|
||
|
|
//!
|
||
|
|
//! ## Edge model
|
||
|
|
//!
|
||
|
|
//! Edges are directed `AliasEdge { source, target, kind }`:
|
||
|
|
//!
|
||
|
|
//! * `Source(Param(i)) → Target(Param(j))` — the callee stores data
|
||
|
|
//! derived from parameter `i` into a field/element of parameter `j`.
|
||
|
|
//! Mutation is observable to the caller through its argument for `j`.
|
||
|
|
//! * `Source(Param(i)) → Target(Return)` — the return value aliases
|
||
|
|
//! parameter `i`'s heap identity. Adds heap-level precision on top of
|
||
|
|
//! the coarser [`TaintTransform::Identity`] view already carried in
|
||
|
|
//! [`crate::summary::ssa_summary::SsaFuncSummary::param_to_return`].
|
||
|
|
//!
|
||
|
|
//! `MustAlias` is intentionally omitted — the ROI on
|
||
|
|
//! must-alias inference for cross-file summaries is low, and the soundness
|
||
|
|
//! story for `MayAlias`-only application is straightforward ("take the
|
||
|
|
//! union").
|
||
|
|
//!
|
||
|
|
//! ## Bound and overflow policy
|
||
|
|
//!
|
||
|
|
//! Edge count is capped at [`MAX_ALIAS_EDGES`]. When a callee's alias
|
||
|
|
//! graph exceeds the cap the summary records `overflow = true` and
|
||
|
|
//! callers treat the function as "any tainted parameter may spread to
|
||
|
|
//! every other parameter and to the return" — the conservative
|
||
|
|
//! greatest-lower-bound over the alias lattice.
|
||
|
|
|
||
|
|
use serde::{Deserialize, Serialize};
|
||
|
|
use smallvec::SmallVec;
|
||
|
|
|
||
|
|
/// Identity of one endpoint in an alias edge.
|
||
|
|
///
|
||
|
|
/// Parameters are identified by their 0-based positional index as reported
|
||
|
|
/// by [`crate::ssa::ir::SsaOp::Param`]; the implicit receiver (`self`/`this`)
|
||
|
|
/// is handled outside this table and is deliberately not representable here.
|
||
|
|
/// `Return` denotes the function's return SSA value — one per function, so
|
||
|
|
/// no further qualifier is needed.
|
||
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||
|
|
pub enum AliasPosition {
|
||
|
|
/// Positional parameter, 0-based. Receiver is excluded.
|
||
|
|
Param(u32),
|
||
|
|
/// The function's return value (union of every `Terminator::Return`).
|
||
|
|
Return,
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Strength of an alias edge. Only [`AliasKind::MayAlias`] is emitted
|
||
|
|
/// — the analysis over-approximates identity-level aliasing rather than
|
||
|
|
/// proving must-alias. The variant is kept as an enum so a future
|
||
|
|
/// extension that distinguishes the two can slot in without migrating
|
||
|
|
/// on-disk data.
|
||
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||
|
|
pub enum AliasKind {
|
||
|
|
/// Under some execution, the two positions may reference the same
|
||
|
|
/// heap object. Callers applying the edge take the *union* of
|
||
|
|
/// points-to / taint at the source into the target.
|
||
|
|
MayAlias,
|
||
|
|
}
|
||
|
|
|
||
|
|
/// A single directed alias edge.
|
||
|
|
///
|
||
|
|
/// `(source, target)` are order-sensitive: data flows from `source` to
|
||
|
|
/// `target` at the callee. Callers apply each edge by reading their
|
||
|
|
/// argument / return abstraction for `source` and propagating into
|
||
|
|
/// `target`.
|
||
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||
|
|
pub struct AliasEdge {
|
||
|
|
pub source: AliasPosition,
|
||
|
|
pub target: AliasPosition,
|
||
|
|
pub kind: AliasKind,
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Maximum edges retained per [`PointsToSummary`].
|
||
|
|
///
|
||
|
|
/// Chosen so typical callees (≤ 4 parameters, one return, a handful of
|
||
|
|
/// field writes) fit without approximation while pathological graphs
|
||
|
|
/// still terminate the analysis in bounded time. Overflow triggers the
|
||
|
|
/// [`PointsToSummary::overflow`] fallback instead of silently dropping
|
||
|
|
/// edges, so callers can reason about soundness.
|
||
|
|
pub const MAX_ALIAS_EDGES: usize = 8;
|
||
|
|
|
||
|
|
/// Parameter-granularity alias summary persisted in
|
||
|
|
/// [`crate::summary::ssa_summary::SsaFuncSummary`].
|
||
|
|
///
|
||
|
|
/// The summary is empty by default — functions without any parameter /
|
||
|
|
/// return aliasing (pure transformers, sinks that consume but don't
|
||
|
|
/// mutate their arguments) carry no edges and cost nothing on disk.
|
||
|
|
///
|
||
|
|
/// When the callee's alias graph exceeds [`MAX_ALIAS_EDGES`], extraction
|
||
|
|
/// sets [`overflow = true`](Self::overflow) and callers must treat every
|
||
|
|
/// parameter as reaching every other parameter and the return. This is
|
||
|
|
/// the conservative fallback for bounded alias analysis.
|
||
|
|
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||
|
|
pub struct PointsToSummary {
|
||
|
|
/// Bounded edge list, deduped by `(source, target, kind)`. The
|
||
|
|
/// [`serde(default)`] attribute lets summaries pre-dating points-to
|
||
|
|
/// tracking deserialise cleanly (no edges).
|
||
|
|
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
||
|
|
pub edges: SmallVec<[AliasEdge; 4]>,
|
||
|
|
/// Conservative fallback flag — set when extraction hit
|
||
|
|
/// [`MAX_ALIAS_EDGES`] and refused to drop any edge silently. When
|
||
|
|
/// `true`, callers treat the callee as "every parameter may alias
|
||
|
|
/// every other parameter and the return value".
|
||
|
|
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
||
|
|
pub overflow: bool,
|
||
|
|
/// At least one return path produces a *fresh* container allocation —
|
||
|
|
/// a container literal (`[]`, `{}`) or a known container constructor
|
||
|
|
/// call (`new Map()`, `list()`, …) that does not trace back to any
|
||
|
|
/// parameter. When this is `true` the caller synthesises a fresh
|
||
|
|
/// [`crate::ssa::heap::HeapObjectId`] keyed on the call's SSA value
|
||
|
|
/// and seeds it into `dynamic_pts`, so later container operations on
|
||
|
|
/// the call result (e.g. `bag[0]`, `fillBag(bag, …)`) can find a heap
|
||
|
|
/// cell to read from or store into.
|
||
|
|
///
|
||
|
|
/// Closes the factory-pattern cross-file gap — `const bag = makeBag()`
|
||
|
|
/// followed by `fillBag(bag, env)` and `exec(bag[0])` — by giving the
|
||
|
|
/// caller's heap analysis a stable identity to attach stores to.
|
||
|
|
/// Combines freely with `Param(i) → Return` edges: a mixed-return
|
||
|
|
/// function (one branch returns a param, another returns a fresh
|
||
|
|
/// allocation) emits both and the caller joins the two points-to
|
||
|
|
/// sets.
|
||
|
|
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
||
|
|
pub returns_fresh_alloc: bool,
|
||
|
|
}
|
||
|
|
|
||
|
|
impl PointsToSummary {
|
||
|
|
/// Empty summary — no aliasing, no overflow. Equivalent to
|
||
|
|
/// [`Self::default`] but explicit at call sites.
|
||
|
|
pub fn empty() -> Self {
|
||
|
|
Self::default()
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Whether this summary adds any information over the default "no
|
||
|
|
/// aliasing" interpretation. Used by extraction to decide whether
|
||
|
|
/// the field should be persisted or left empty.
|
||
|
|
pub fn is_empty(&self) -> bool {
|
||
|
|
self.edges.is_empty() && !self.overflow && !self.returns_fresh_alloc
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Insert an edge, preserving dedup and the bounded-size invariant.
|
||
|
|
///
|
||
|
|
/// Returns `true` when the edge was added, `false` when it was a
|
||
|
|
/// duplicate or when the cap triggered an overflow. The caller can
|
||
|
|
/// ignore the return — the summary always remains in a valid state.
|
||
|
|
pub fn insert(&mut self, source: AliasPosition, target: AliasPosition, kind: AliasKind) {
|
||
|
|
if self.overflow {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
let edge = AliasEdge {
|
||
|
|
source,
|
||
|
|
target,
|
||
|
|
kind,
|
||
|
|
};
|
||
|
|
if self.edges.contains(&edge) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if self.edges.len() >= MAX_ALIAS_EDGES {
|
||
|
|
self.overflow = true;
|
||
|
|
// Keep the existing edge list — a consumer that still reads
|
||
|
|
// the vector gets a strict *subset* of the sound over-
|
||
|
|
// approximation conveyed by `overflow`. Correctness is
|
||
|
|
// owned by the overflow flag; the residual edges are purely
|
||
|
|
// diagnostic.
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
self.edges.push(edge);
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Union two summaries, merging edges and OR-ing the overflow /
|
||
|
|
/// fresh-alloc flags. Respects the [`MAX_ALIAS_EDGES`] cap via the
|
||
|
|
/// same overflow promotion used by [`Self::insert`].
|
||
|
|
pub fn merge(&mut self, other: &Self) {
|
||
|
|
self.returns_fresh_alloc |= other.returns_fresh_alloc;
|
||
|
|
if other.overflow {
|
||
|
|
self.overflow = true;
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
for edge in &other.edges {
|
||
|
|
self.insert(edge.source, edge.target, edge.kind);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
/// Parameter indices referenced by any edge in this summary. Used by
|
||
|
|
/// [`crate::summary::ssa_summary_fits_arity`] to confirm the summary
|
||
|
|
/// does not reference a parameter beyond the key's declared arity
|
||
|
|
/// (which would indicate a synthetic-param mis-attribution in
|
||
|
|
/// extraction).
|
||
|
|
pub fn max_param_index(&self) -> Option<u32> {
|
||
|
|
let mut max: Option<u32> = None;
|
||
|
|
for edge in &self.edges {
|
||
|
|
if let AliasPosition::Param(i) = edge.source {
|
||
|
|
max = Some(max.map_or(i, |m| m.max(i)));
|
||
|
|
}
|
||
|
|
if let AliasPosition::Param(i) = edge.target {
|
||
|
|
max = Some(max.map_or(i, |m| m.max(i)));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
max
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(test)]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn empty_summary_is_noop() {
|
||
|
|
let s = PointsToSummary::empty();
|
||
|
|
assert!(s.is_empty());
|
||
|
|
assert!(!s.overflow);
|
||
|
|
assert_eq!(s.edges.len(), 0);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn insert_dedups() {
|
||
|
|
let mut s = PointsToSummary::empty();
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(0),
|
||
|
|
AliasPosition::Param(1),
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(0),
|
||
|
|
AliasPosition::Param(1),
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
assert_eq!(s.edges.len(), 1);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn insert_overflows_at_cap() {
|
||
|
|
let mut s = PointsToSummary::empty();
|
||
|
|
for i in 0..(MAX_ALIAS_EDGES as u32) {
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(i),
|
||
|
|
AliasPosition::Return,
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
}
|
||
|
|
assert_eq!(s.edges.len(), MAX_ALIAS_EDGES);
|
||
|
|
assert!(!s.overflow);
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(99),
|
||
|
|
AliasPosition::Return,
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
assert!(s.overflow);
|
||
|
|
assert_eq!(s.edges.len(), MAX_ALIAS_EDGES);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn merge_propagates_overflow() {
|
||
|
|
let mut a = PointsToSummary::empty();
|
||
|
|
let mut b = PointsToSummary::empty();
|
||
|
|
b.overflow = true;
|
||
|
|
a.merge(&b);
|
||
|
|
assert!(a.overflow);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn max_param_index_tracks_both_endpoints() {
|
||
|
|
let mut s = PointsToSummary::empty();
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(0),
|
||
|
|
AliasPosition::Param(3),
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(1),
|
||
|
|
AliasPosition::Return,
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
assert_eq!(s.max_param_index(), Some(3));
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn serde_round_trip_is_stable() {
|
||
|
|
let mut s = PointsToSummary::empty();
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(0),
|
||
|
|
AliasPosition::Param(1),
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
s.insert(
|
||
|
|
AliasPosition::Param(2),
|
||
|
|
AliasPosition::Return,
|
||
|
|
AliasKind::MayAlias,
|
||
|
|
);
|
||
|
|
let json = serde_json::to_string(&s).unwrap();
|
||
|
|
let back: PointsToSummary = serde_json::from_str(&json).unwrap();
|
||
|
|
assert_eq!(s, back);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn serde_default_decodes_empty_object() {
|
||
|
|
let back: PointsToSummary = serde_json::from_str("{}").unwrap();
|
||
|
|
assert!(back.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn returns_fresh_alloc_is_not_empty() {
|
||
|
|
let mut s = PointsToSummary::empty();
|
||
|
|
assert!(s.is_empty());
|
||
|
|
s.returns_fresh_alloc = true;
|
||
|
|
assert!(!s.is_empty());
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn merge_propagates_fresh_alloc_flag() {
|
||
|
|
let mut a = PointsToSummary::empty();
|
||
|
|
let mut b = PointsToSummary::empty();
|
||
|
|
b.returns_fresh_alloc = true;
|
||
|
|
a.merge(&b);
|
||
|
|
assert!(a.returns_fresh_alloc);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn returns_fresh_alloc_roundtrips() {
|
||
|
|
let mut s = PointsToSummary::empty();
|
||
|
|
s.returns_fresh_alloc = true;
|
||
|
|
let json = serde_json::to_string(&s).unwrap();
|
||
|
|
let back: PointsToSummary = serde_json::from_str(&json).unwrap();
|
||
|
|
assert!(back.returns_fresh_alloc);
|
||
|
|
assert_eq!(s, back);
|
||
|
|
}
|
||
|
|
}
|