mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
629 lines
25 KiB
Rust
629 lines
25 KiB
Rust
use crate::constraint::domain::ConstValue;
|
|
use crate::constraint::lower::ConditionExpr;
|
|
use crate::ssa::type_facts::TypeKind;
|
|
use petgraph::graph::NodeIndex;
|
|
use serde::{Deserialize, Serialize};
|
|
use smallvec::SmallVec;
|
|
use std::collections::{HashMap, HashSet};
|
|
|
|
/// Unique identifier for an SSA value (one per definition point).
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
|
pub struct SsaValue(pub u32);
|
|
|
|
/// Basic block identifier.
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
|
pub struct BlockId(pub u32);
|
|
|
|
/// Interned field-name identifier, scoped to a single [`SsaBody`].
|
|
///
|
|
/// Different bodies may assign different `FieldId`s to the same field name,
|
|
/// so callers MUST resolve through the owning body's [`FieldInterner`]
|
|
/// (`SsaBody::field_name`) before using the name in cross-body contexts
|
|
/// (e.g. summary serialization).
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
|
pub struct FieldId(pub u32);
|
|
|
|
impl FieldId {
|
|
/// Sentinel for the abstract "any element of a container" field.
|
|
/// Every numeric or dynamic index access (`arr[i]`, `arr.shift()`,
|
|
/// `map[k]`) projects through the same `Field(pt(container), ELEM)`
|
|
/// cell. `u32::MAX` is reserved; the per-body interner never
|
|
/// assigns it.
|
|
pub const ELEM: FieldId = FieldId(u32::MAX);
|
|
|
|
/// "Tainted at every field" wildcard sentinel, distinct from
|
|
/// [`Self::ELEM`] (which is container-element semantics: every
|
|
/// numeric/dynamic index access projects through it).
|
|
/// `ANY_FIELD` represents the case where a writeback-shaped sink
|
|
/// (`json.NewDecoder(r.Body).Decode(&dest)`,
|
|
/// `proto.Unmarshal(buf, &msg)`) taints the destination wholesale
|
|
/// without a per-field decomposition the caller can enumerate.
|
|
/// Read by [`SsaOp::FieldProj`] as a fallback when no specific
|
|
/// `(loc, *field)` cell exists, so subsequent struct-field reads
|
|
/// pick up the writeback's taint without over-tainting unrelated
|
|
/// containers' element cells. `u32::MAX - 1` is reserved
|
|
/// alongside `ELEM` and is similarly never assigned by the per-
|
|
/// body interner.
|
|
pub const ANY_FIELD: FieldId = FieldId(u32::MAX - 1);
|
|
}
|
|
|
|
/// Per-body interner for field names referenced by [`SsaOp::FieldProj`].
|
|
///
|
|
/// Names are deduped within a single SSA body: every distinct field-name
|
|
/// string is assigned a stable `FieldId(u32)` for the lifetime of the body.
|
|
/// The interner is serialized alongside the body so deserialization restores
|
|
/// IDs intact; cross-body summary code is responsible for resolving names
|
|
/// before passing them across body boundaries.
|
|
#[derive(Clone, Debug, Default, Serialize, Deserialize)]
|
|
pub struct FieldInterner {
|
|
/// Names indexed by `FieldId.0`.
|
|
names: Vec<String>,
|
|
/// Reverse lookup: name → existing FieldId.
|
|
#[serde(skip)]
|
|
lookup: HashMap<String, u32>,
|
|
}
|
|
|
|
impl FieldInterner {
|
|
/// Create an empty interner.
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Intern a field name, returning its [`FieldId`]. Reuses the existing
|
|
/// id if the name has already been interned.
|
|
pub fn intern(&mut self, name: &str) -> FieldId {
|
|
if let Some(&id) = self.lookup.get(name) {
|
|
return FieldId(id);
|
|
}
|
|
let id = self.names.len() as u32;
|
|
self.names.push(name.to_string());
|
|
self.lookup.insert(name.to_string(), id);
|
|
FieldId(id)
|
|
}
|
|
|
|
/// Read-only lookup: returns the [`FieldId`] for `name` if it has
|
|
/// already been interned, or `None` otherwise.
|
|
///
|
|
/// Used by cross-call resolvers to avoid growing the caller's
|
|
/// interner with field names introduced solely by callee summaries
|
|
///, such cells would be write-only.
|
|
pub fn lookup(&self, name: &str) -> Option<FieldId> {
|
|
// Walk `names` directly so we don't require the post-deserialise
|
|
// `ensure_lookup()` rebuild before this method is callable.
|
|
// Callers usually own `&SsaBody`, interning was either done at
|
|
// lowering time or via `ensure_lookup` post-deserialise, so the
|
|
// hot path goes through the `lookup` table; the linear walk is
|
|
// a fallback for the (small) deserialised-but-not-rebuilt case.
|
|
if let Some(&id) = self.lookup.get(name) {
|
|
return Some(FieldId(id));
|
|
}
|
|
for (idx, n) in self.names.iter().enumerate() {
|
|
if n == name {
|
|
return Some(FieldId(idx as u32));
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Resolve a [`FieldId`] back to its interned name.
|
|
pub fn resolve(&self, id: FieldId) -> &str {
|
|
&self.names[id.0 as usize]
|
|
}
|
|
|
|
/// Number of unique interned names.
|
|
pub fn len(&self) -> usize {
|
|
self.names.len()
|
|
}
|
|
|
|
/// Whether the interner contains no names.
|
|
pub fn is_empty(&self) -> bool {
|
|
self.names.is_empty()
|
|
}
|
|
|
|
/// Rebuild the reverse lookup after deserialization. Called lazily by
|
|
/// [`Self::ensure_lookup`] so deserialized interners can still be used
|
|
/// for further interning.
|
|
fn rebuild_lookup(&mut self) {
|
|
self.lookup.clear();
|
|
for (i, n) in self.names.iter().enumerate() {
|
|
self.lookup.entry(n.clone()).or_insert(i as u32);
|
|
}
|
|
}
|
|
|
|
/// Ensure the reverse lookup is populated (rebuilds after a serde
|
|
/// roundtrip when the lookup table was skipped).
|
|
pub fn ensure_lookup(&mut self) {
|
|
if self.lookup.len() != self.names.len() {
|
|
self.rebuild_lookup();
|
|
}
|
|
}
|
|
}
|
|
|
|
/// SSA instruction operation.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub enum SsaOp {
|
|
/// Phi: merge values from predecessor blocks.
|
|
Phi(SmallVec<[(BlockId, SsaValue); 2]>),
|
|
/// Assignment: result depends on the listed SSA values.
|
|
Assign(SmallVec<[SsaValue; 4]>),
|
|
/// Function/method call.
|
|
///
|
|
/// `callee` is the canonical name SSA-time consumers should match on.
|
|
/// When SSA lowering decomposes a chained-receiver method call into a
|
|
/// `FieldProj` chain (e.g. `c.mu.Lock()` → `v_mu = FieldProj(v_c, "mu")`,
|
|
/// `Call("Lock", [v_mu])`), `callee` carries the bare method name
|
|
/// (`"Lock"`) and `callee_text` carries the original full path
|
|
/// (`Some("c.mu.Lock")`). When no decomposition happens, `callee_text`
|
|
/// is `None` and `callee` already holds the original textual form.
|
|
Call {
|
|
callee: String,
|
|
/// Original textual full path when SSA decomposed a chained receiver.
|
|
/// `None` when the callee was not rewritten, `callee` already holds
|
|
/// the source-level textual form.
|
|
///
|
|
/// **Debug / display only.** Analysis code must walk the SSA receiver
|
|
/// chain (through `FieldProj` ops) for precise field structure, or
|
|
/// use [`crate::labels::bare_method_name`] when only the terminal
|
|
/// method name is needed from a textual callee.
|
|
#[doc(hidden)]
|
|
#[serde(default)]
|
|
callee_text: Option<String>,
|
|
/// Per-argument SSA value uses.
|
|
args: Vec<SmallVec<[SsaValue; 2]>>,
|
|
/// Receiver SSA value (for method calls).
|
|
receiver: Option<SsaValue>,
|
|
},
|
|
/// Field projection: read field `field` of object `receiver`.
|
|
///
|
|
/// Models member-access expressions (`obj.field`) as a first-class SSA
|
|
/// op. Lowering walks the receiver tree so chained accesses like
|
|
/// `c.writer.header` produce a chain of `FieldProj` ops with explicit
|
|
/// per-step receivers, eliminating the textual-prefix parsing that
|
|
/// previously misclassified deep receivers (the gin/context.go FP).
|
|
///
|
|
/// `field` is interned in the owning [`SsaBody`]'s [`FieldInterner`].
|
|
/// `projected_type` carries the inferred type of the projected field
|
|
/// when known (populated by type-fact analysis), `None` otherwise.
|
|
FieldProj {
|
|
receiver: SsaValue,
|
|
field: FieldId,
|
|
projected_type: Option<TypeKind>,
|
|
},
|
|
/// Taint source introduction.
|
|
Source,
|
|
/// Constant / literal value (no taint).
|
|
/// The optional string carries the raw source text when captured during lowering.
|
|
Const(Option<String>),
|
|
/// Function parameter (positional). Index is the 0-based positional
|
|
/// parameter index, *excluding* any implicit receiver (`self`/`this`).
|
|
/// The receiver, when present, is represented by [`SsaOp::SelfParam`].
|
|
Param { index: usize },
|
|
/// Implicit method receiver (`self` in Rust/Python, `this` in
|
|
/// JS/TS/Java/PHP). Emitted in block 0 of a function body whenever the
|
|
/// body has a receiver (either an explicit `self` formal parameter or an
|
|
/// implicit `this` reference). Having a dedicated IR node keeps
|
|
/// receiver taint tracking entirely separate from positional-parameter
|
|
/// taint, eliminating off-by-receiver arithmetic at call sites.
|
|
SelfParam,
|
|
/// Catch-clause exception binding.
|
|
CatchParam,
|
|
/// Non-defining node (e.g. If condition evaluation, Entry, Exit).
|
|
Nop,
|
|
/// Sentinel for "no reaching definition on this control-flow edge".
|
|
///
|
|
/// Emitted by SSA lowering as a synthesized instruction in the entry
|
|
/// block and referenced from phi operands whose incoming edge does
|
|
/// not carry a definition of the phi's variable, e.g. a try/catch
|
|
/// rejoin where a variable is only defined on the normal path, or
|
|
/// an early-return branch on a later-defined variable.
|
|
///
|
|
/// Having an explicit value lets phis satisfy the invariant that
|
|
/// `phi.operands.len() == block.preds.len()` (one operand per
|
|
/// predecessor). Downstream analyses treat Undef as a
|
|
/// no-taint / unknown / bottom-of-the-lattice contribution: a phi
|
|
/// operand of Undef carries no caps, no concrete value, and no
|
|
/// abstract fact.
|
|
Undef,
|
|
}
|
|
|
|
/// A single SSA instruction.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct SsaInst {
|
|
/// The SSA value defined by this instruction.
|
|
pub value: SsaValue,
|
|
/// The operation.
|
|
pub op: SsaOp,
|
|
/// The original CFG node this instruction was derived from.
|
|
pub cfg_node: NodeIndex,
|
|
/// Original variable name (for debugging and label lookups).
|
|
pub var_name: Option<String>,
|
|
/// Source byte span from the original file.
|
|
pub span: (usize, usize),
|
|
}
|
|
|
|
/// Basic block terminator.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub enum Terminator {
|
|
Goto(BlockId),
|
|
Branch {
|
|
cond: NodeIndex,
|
|
true_blk: BlockId,
|
|
false_blk: BlockId,
|
|
/// Structured condition lowered from CFG metadata during SSA construction.
|
|
/// `None` when the condition could not be lowered (falls back to text-based
|
|
/// lowering in taint transfer).
|
|
condition: Option<Box<ConditionExpr>>,
|
|
},
|
|
/// Multi-way switch dispatch.
|
|
///
|
|
/// `targets` lists the per-case successor blocks (order matches the
|
|
/// source-order of cases in the switch); `default` is the fallback
|
|
/// branch taken when no case matches. Block `succs` remain the
|
|
/// authoritative flow set, the terminator is a structured summary.
|
|
///
|
|
/// Emitted only for switch-like dispatch whose semantics are
|
|
/// guaranteed-exclusive across cases (e.g. Go `switch`, Java
|
|
/// arrow-switch, Rust `match`). Fall-through switches (C, C++, Java
|
|
/// classic switch without `break`) continue to use the cascaded
|
|
/// `Branch` lowering because the precision advantage only holds when
|
|
/// cases are mutually exclusive.
|
|
Switch {
|
|
scrutinee: SsaValue,
|
|
targets: SmallVec<[BlockId; 4]>,
|
|
default: BlockId,
|
|
/// Per-target case literals, aligned 1:1 with `targets`.
|
|
///
|
|
/// `Some(c)` records the constant value the scrutinee must equal for
|
|
/// the corresponding target to be taken. `None` means the literal is
|
|
/// unknown, emitted for synthetic ≥3-way CFG fanouts or for case
|
|
/// patterns that aren't plain literals (OR-patterns, ranges, guards).
|
|
///
|
|
/// When omitted/empty (length zero), all targets behave as "unknown
|
|
/// literal", preserves backward compatibility with consumers that
|
|
/// only inspect `targets`/`default`.
|
|
#[serde(default)]
|
|
case_values: SmallVec<[Option<ConstValue>; 4]>,
|
|
},
|
|
Return(Option<SsaValue>),
|
|
Unreachable,
|
|
}
|
|
|
|
/// A basic block in SSA form.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct SsaBlock {
|
|
pub id: BlockId,
|
|
/// Phi instructions (always at block start).
|
|
pub phis: Vec<SsaInst>,
|
|
/// Body instructions (after phis).
|
|
pub body: Vec<SsaInst>,
|
|
/// Block terminator.
|
|
pub terminator: Terminator,
|
|
/// Predecessor block IDs.
|
|
pub preds: SmallVec<[BlockId; 2]>,
|
|
/// Successor block IDs.
|
|
pub succs: SmallVec<[BlockId; 2]>,
|
|
}
|
|
|
|
/// Per-value definition metadata.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct ValueDef {
|
|
/// Original variable name (if any).
|
|
pub var_name: Option<String>,
|
|
/// The CFG node where this value was defined.
|
|
pub cfg_node: NodeIndex,
|
|
/// The block containing the definition.
|
|
pub block: BlockId,
|
|
}
|
|
|
|
/// Complete SSA representation for a function/scope.
|
|
#[derive(Clone, Debug, Serialize, Deserialize)]
|
|
pub struct SsaBody {
|
|
/// All basic blocks, indexed by BlockId.
|
|
pub blocks: Vec<SsaBlock>,
|
|
/// Entry block.
|
|
pub entry: BlockId,
|
|
/// Per-SsaValue definition info, indexed by SsaValue.0.
|
|
pub value_defs: Vec<ValueDef>,
|
|
/// Map from original CFG NodeIndex to the primary SsaValue defined there.
|
|
pub cfg_node_map: std::collections::HashMap<NodeIndex, SsaValue>,
|
|
/// Exception edges: (source block, catch entry block).
|
|
/// Recorded during lowering when exception edges are stripped from the CFG.
|
|
/// Used by taint analysis to seed catch blocks with try-body taint state.
|
|
pub exception_edges: Vec<(BlockId, BlockId)>,
|
|
/// Per-body interner for [`SsaOp::FieldProj`] field names.
|
|
///
|
|
/// Empty until lowering emits FieldProj ops. Cross-body callers
|
|
/// (cross-file summaries, debug serialization) MUST resolve interned
|
|
/// ids through this interner before transporting field references
|
|
/// to other bodies.
|
|
#[serde(default)]
|
|
pub field_interner: FieldInterner,
|
|
/// Side-table mapping a synthetic base-update [`SsaOp::Assign`]'s
|
|
/// defined value back to the `(receiver, field)` pair it
|
|
/// represents. Populated by lowering at the `obj.f = rhs` synthesis
|
|
/// point so the taint engine can treat the synthetic assign as a
|
|
/// structural field WRITE.
|
|
///
|
|
/// Empty by default; only synthetic assigns whose enclosing source
|
|
/// statement was a dotted-path assignment (`a.b.c = …`) appear here.
|
|
/// Lookup is `O(log n)` worst case (`HashMap`), but the per-body
|
|
/// table is small (one entry per synthetic chain link).
|
|
///
|
|
/// Serialized via `#[serde(default)]` so pre-W1 SSA blobs decode
|
|
/// cleanly with an empty map (no migration needed).
|
|
#[serde(default)]
|
|
pub field_writes: HashMap<SsaValue, (SsaValue, FieldId)>,
|
|
/// SSA values that lowering injected for **free / closure-captured**
|
|
/// variables (variables referenced by the body but not declared as
|
|
/// formal parameters and not assigned within the body).
|
|
///
|
|
/// Lowering models every external use as an [`SsaOp::Param`] in block
|
|
/// 0 so the rename pass can reference it. Real formal parameters and
|
|
/// closure captures end up using the same op variant; this side-table
|
|
/// distinguishes the two so downstream analyses (in particular the
|
|
/// JS/TS handler-name auto-seed in
|
|
/// [`crate::taint::ssa_transfer`]) can avoid treating closure
|
|
/// captures as if they were the function's own parameters. Without
|
|
/// this distinction, a lambda body that references an out-of-scope
|
|
/// `userId` / `cmd` / `payload` would have the synthetic Param
|
|
/// auto-seeded as `UserInput`, producing a phantom source on the
|
|
/// enclosing function's declaration line.
|
|
///
|
|
/// `#[serde(default)]` for backward compatibility with summary blobs
|
|
/// produced before this field existed.
|
|
#[serde(default)]
|
|
pub synthetic_externals: HashSet<SsaValue>,
|
|
/// SSA values whose [`SsaOp::Assign`] is a slot-scoped binding from a
|
|
/// bare-array destructure rewrite (see `bare_array_ops` in
|
|
/// [`crate::ssa::lower`]). The Assign transfer arm in
|
|
/// [`crate::taint::ssa_transfer`] consults this set to skip the
|
|
/// `info.taint.labels` Source pickup that would otherwise bleed the
|
|
/// outer destructure node's Source label into the slot-scoped binding.
|
|
///
|
|
/// Operand union still runs normally, so transitive taint via an
|
|
/// inner ident (e.g. `helper(tainted_local)` in slot 1 of
|
|
/// `[req.body.other, helper(tainted_local)]`) propagates through the
|
|
/// Assign's operands without inheriting the outer-node Source.
|
|
///
|
|
/// Empty by default; only the per-slot kill arm in the bare-array
|
|
/// destructure lowering populates this set.
|
|
#[serde(default)]
|
|
pub slot_scoped_assigns: HashSet<SsaValue>,
|
|
}
|
|
|
|
impl SsaBody {
|
|
/// Get a block by its ID.
|
|
pub fn block(&self, id: BlockId) -> &SsaBlock {
|
|
&self.blocks[id.0 as usize]
|
|
}
|
|
|
|
/// Get a mutable block by its ID.
|
|
pub fn block_mut(&mut self, id: BlockId) -> &mut SsaBlock {
|
|
&mut self.blocks[id.0 as usize]
|
|
}
|
|
|
|
/// Total number of SSA values.
|
|
pub fn num_values(&self) -> usize {
|
|
self.value_defs.len()
|
|
}
|
|
|
|
/// Look up definition info for an SSA value.
|
|
pub fn def_of(&self, v: SsaValue) -> &ValueDef {
|
|
&self.value_defs[v.0 as usize]
|
|
}
|
|
|
|
/// Resolve a [`FieldId`] back to the interned field name within this body.
|
|
pub fn field_name(&self, id: FieldId) -> &str {
|
|
self.field_interner.resolve(id)
|
|
}
|
|
|
|
/// Intern a field name in this body's [`FieldInterner`], returning its
|
|
/// stable [`FieldId`].
|
|
pub fn intern_field(&mut self, name: &str) -> FieldId {
|
|
self.field_interner.intern(name)
|
|
}
|
|
}
|
|
|
|
impl SsaInst {
|
|
/// Iterate over the SSA values used (read) by this instruction.
|
|
///
|
|
/// Yields receiver/operand values for `Call`, `Phi`, `Assign`, and
|
|
/// `FieldProj`; nothing for leaf ops (`Const`, `Param`, `Source`, etc.).
|
|
/// Callers that need the values as a `Vec` should `.collect()`.
|
|
pub fn uses_iter(&self) -> SmallVec<[SsaValue; 4]> {
|
|
match &self.op {
|
|
SsaOp::Phi(operands) => operands.iter().map(|(_, v)| *v).collect(),
|
|
SsaOp::Assign(uses) => uses.iter().copied().collect(),
|
|
SsaOp::Call { args, receiver, .. } => {
|
|
let mut out: SmallVec<[SsaValue; 4]> = SmallVec::new();
|
|
if let Some(rv) = receiver {
|
|
out.push(*rv);
|
|
}
|
|
for arg in args {
|
|
out.extend(arg.iter().copied());
|
|
}
|
|
out
|
|
}
|
|
SsaOp::FieldProj { receiver, .. } => {
|
|
let mut out: SmallVec<[SsaValue; 4]> = SmallVec::new();
|
|
out.push(*receiver);
|
|
out
|
|
}
|
|
SsaOp::Source
|
|
| SsaOp::Const(_)
|
|
| SsaOp::Param { .. }
|
|
| SsaOp::SelfParam
|
|
| SsaOp::CatchParam
|
|
| SsaOp::Nop
|
|
| SsaOp::Undef => SmallVec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Errors that can occur during SSA lowering.
|
|
#[derive(Debug, Clone)]
|
|
pub enum SsaError {
|
|
/// The CFG has no reachable nodes from the entry.
|
|
EmptyCfg,
|
|
/// Entry node not found in the CFG.
|
|
InvalidEntry,
|
|
}
|
|
|
|
impl std::fmt::Display for SsaError {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self {
|
|
SsaError::EmptyCfg => write!(f, "CFG has no reachable nodes"),
|
|
SsaError::InvalidEntry => write!(f, "entry node not found in CFG"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::error::Error for SsaError {}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn field_interner_dedupes_names() {
|
|
let mut interner = FieldInterner::new();
|
|
let a = interner.intern("mu");
|
|
let b = interner.intern("mu");
|
|
let c = interner.intern("writer");
|
|
assert_eq!(a, b, "interning same name twice yields same id");
|
|
assert_ne!(a, c, "different names get different ids");
|
|
assert_eq!(interner.resolve(a), "mu");
|
|
assert_eq!(interner.resolve(c), "writer");
|
|
assert_eq!(interner.len(), 2);
|
|
}
|
|
|
|
#[test]
|
|
fn field_interner_serde_roundtrip_rebuilds_lookup() {
|
|
let mut interner = FieldInterner::new();
|
|
let a = interner.intern("mu");
|
|
let b = interner.intern("writer");
|
|
let json = serde_json::to_string(&interner).expect("serialize");
|
|
let mut restored: FieldInterner = serde_json::from_str(&json).expect("deserialize");
|
|
assert_eq!(restored.resolve(a), "mu");
|
|
assert_eq!(restored.resolve(b), "writer");
|
|
// After ensure_lookup, intern("mu") returns the original id (not a new one).
|
|
restored.ensure_lookup();
|
|
assert_eq!(restored.intern("mu"), a);
|
|
assert_eq!(restored.intern("header"), FieldId(2));
|
|
}
|
|
|
|
#[test]
|
|
fn field_proj_use_iter_includes_receiver() {
|
|
let inst = SsaInst {
|
|
value: SsaValue(3),
|
|
op: SsaOp::FieldProj {
|
|
receiver: SsaValue(1),
|
|
field: FieldId(0),
|
|
projected_type: None,
|
|
},
|
|
cfg_node: NodeIndex::new(0),
|
|
var_name: Some("c.mu".into()),
|
|
span: (0, 0),
|
|
};
|
|
let uses: Vec<SsaValue> = inst.uses_iter().into_iter().collect();
|
|
assert_eq!(uses, vec![SsaValue(1)]);
|
|
}
|
|
|
|
/// the [`FieldId::ELEM`] sentinel is
|
|
/// reserved for "any element of a container". The interner assigns
|
|
/// IDs monotonically from `0`, so the sentinel `u32::MAX` can only
|
|
/// collide if the body declares ~4 billion fields, a corner case
|
|
/// no realistic codebase reaches. Pin the contract with a stress
|
|
/// loop so future implementation drift can't silently shift IDs to
|
|
/// the sentinel value.
|
|
#[test]
|
|
fn field_interner_never_assigns_elem_sentinel() {
|
|
let mut interner = FieldInterner::new();
|
|
for i in 0..1024 {
|
|
let id = interner.intern(&format!("f{i}"));
|
|
assert_ne!(
|
|
id,
|
|
FieldId::ELEM,
|
|
"intern('f{i}') yielded the ELEM sentinel — invariant broken",
|
|
);
|
|
}
|
|
// Lookup of the sentinel name (used by W3 to round-trip
|
|
// container-element flow through summary) must NOT match a
|
|
// real interned name even when the same name is interned.
|
|
// The wire-format keeps `<elem>` as a *string marker*, it
|
|
// never goes through `intern`. Instead, callers compare
|
|
// explicitly against `FieldId::ELEM`.
|
|
assert_ne!(interner.intern("<elem>"), FieldId::ELEM);
|
|
}
|
|
|
|
/// A6: the `<elem>` marker round-trips through extraction →
|
|
/// SQLite → caller-side translation without colliding with a
|
|
/// caller-interned `<elem>` field. When a caller's body has its
|
|
/// own `<elem>` field, that gets a regular FieldId, distinct from
|
|
/// the sentinel.
|
|
#[test]
|
|
fn elem_marker_distinct_from_interner_assigned_id() {
|
|
let mut interner = FieldInterner::new();
|
|
let lit_elem = interner.intern("<elem>");
|
|
// Sentinel still compares equal to itself only.
|
|
assert_eq!(FieldId::ELEM, FieldId(u32::MAX));
|
|
assert_ne!(lit_elem, FieldId::ELEM);
|
|
// Resolve the literal-string id back to its interned name.
|
|
assert_eq!(interner.resolve(lit_elem), "<elem>");
|
|
}
|
|
|
|
#[test]
|
|
fn field_proj_serde_roundtrip_with_field_name() {
|
|
// Build a tiny body with one FieldProj op and check that the
|
|
// body's interner survives round-trip and the id resolves back
|
|
// to the original name.
|
|
let mut body = SsaBody {
|
|
blocks: vec![SsaBlock {
|
|
id: BlockId(0),
|
|
phis: vec![],
|
|
body: vec![],
|
|
terminator: Terminator::Return(None),
|
|
preds: SmallVec::new(),
|
|
succs: SmallVec::new(),
|
|
}],
|
|
entry: BlockId(0),
|
|
value_defs: vec![ValueDef {
|
|
var_name: Some("c".into()),
|
|
cfg_node: NodeIndex::new(0),
|
|
block: BlockId(0),
|
|
}],
|
|
cfg_node_map: HashMap::new(),
|
|
exception_edges: vec![],
|
|
field_interner: FieldInterner::new(),
|
|
field_writes: HashMap::new(),
|
|
synthetic_externals: HashSet::new(),
|
|
slot_scoped_assigns: HashSet::new(),
|
|
};
|
|
let fid = body.intern_field("mu");
|
|
body.blocks[0].body.push(SsaInst {
|
|
value: SsaValue(1),
|
|
op: SsaOp::FieldProj {
|
|
receiver: SsaValue(0),
|
|
field: fid,
|
|
projected_type: None,
|
|
},
|
|
cfg_node: NodeIndex::new(0),
|
|
var_name: Some("c.mu".into()),
|
|
span: (0, 0),
|
|
});
|
|
|
|
let json = serde_json::to_string(&body).expect("serialize body");
|
|
let restored: SsaBody = serde_json::from_str(&json).expect("deserialize body");
|
|
|
|
let inst = &restored.blocks[0].body[0];
|
|
match &inst.op {
|
|
SsaOp::FieldProj {
|
|
receiver, field, ..
|
|
} => {
|
|
assert_eq!(*receiver, SsaValue(0));
|
|
assert_eq!(restored.field_name(*field), "mu");
|
|
}
|
|
other => panic!("expected FieldProj, got {other:?}"),
|
|
}
|
|
}
|
|
}
|