mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -1,9 +1,9 @@
|
|||
//! Taint event emission and conversion to [`crate::taint::Finding`].
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`SsaTaintEvent`] — the raw event struct produced by the block-level
|
||||
//! * [`SsaTaintEvent`], the raw event struct produced by the block-level
|
||||
//! worklist each time a tainted value reaches a sink.
|
||||
//! * [`ssa_events_to_findings`] — event → `Finding` conversion with the
|
||||
//! * [`ssa_events_to_findings`], event → `Finding` conversion with the
|
||||
//! `primary_location` invariant and dedup.
|
||||
//! * Flow-path reconstruction helpers ([`reconstruct_flow_path`] and
|
||||
//! operand pickers).
|
||||
|
|
@ -38,14 +38,14 @@ pub struct SsaTaintEvent {
|
|||
/// `sink_caps`. When multiple [`SinkSite`]s for the same `(param_idx,
|
||||
/// cap mask)` match, the emission site produces one event per
|
||||
/// [`SinkSite`] so each downstream [`crate::taint::Finding`] carries a
|
||||
/// single primary attribution — the multi-primary case collapses to
|
||||
/// single primary attribution, the multi-primary case collapses to
|
||||
/// multiple single-primary events.
|
||||
///
|
||||
/// `None` for:
|
||||
/// * intra-procedural sinks (`uses_summary == false`), where the
|
||||
/// caller's sink span already names the dangerous instruction;
|
||||
/// * summary-resolved sinks whose callee summary carried only cap-only
|
||||
/// [`SinkSite`]s (no source coordinates — e.g. pass-2 transient
|
||||
/// [`SinkSite`]s (no source coordinates, e.g. pass-2 transient
|
||||
/// summaries or local `LocalFuncSummary`-only callees).
|
||||
pub primary_sink_site: Option<SinkSite>,
|
||||
}
|
||||
|
|
@ -79,7 +79,7 @@ pub(super) fn block_distance(ssa: &SsaBody, source_node: NodeIndex, sink_node: N
|
|||
}
|
||||
}
|
||||
}
|
||||
0 // unreachable or not connected — conservative default
|
||||
0 // unreachable or not connected, conservative default
|
||||
}
|
||||
|
||||
// ── Flow Path Reconstruction ─────────────────────────────────────────────
|
||||
|
|
@ -204,7 +204,7 @@ pub(super) fn reconstruct_flow_path(
|
|||
SsaOp::FieldProj { receiver, .. } => {
|
||||
// Treat field projection as a one-step assignment for
|
||||
// flow-step reconstruction: taint reaching `obj.f` came
|
||||
// from `obj`. Phase 4 will refine the witness rendering
|
||||
// from `obj`. the analysis may refine the witness rendering
|
||||
// to include the field name in the step.
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: inst.cfg_node,
|
||||
|
|
@ -270,7 +270,7 @@ fn pick_tainted_operand_call(
|
|||
///
|
||||
/// Note: this invariant is intentionally independent of `uses_summary`.
|
||||
/// The taint-chain flag tracks summary-propagated *taint*, not summary-
|
||||
/// resolved *sinks* — a local source can reach a cross-file sink, so
|
||||
/// resolved *sinks*, a local source can reach a cross-file sink, so
|
||||
/// `primary_location.is_some()` does not imply `uses_summary == true`.
|
||||
pub fn ssa_events_to_findings(
|
||||
events: &[SsaTaintEvent],
|
||||
|
|
@ -329,7 +329,7 @@ pub fn ssa_events_to_findings(
|
|||
|
||||
// Data-integrity invariant: a populated primary_location must at least
|
||||
// carry resolved line coordinates. `file_rel` may legitimately be
|
||||
// empty — when the scan root is the caller file itself (single-file
|
||||
// empty, when the scan root is the caller file itself (single-file
|
||||
// scans), every namespace normalizes to `""` and the callee's site
|
||||
// inherits that empty path; consumers resolve it against the file
|
||||
// under analysis. Line==0 is the only filter-worthy invariant.
|
||||
|
|
@ -340,7 +340,7 @@ pub fn ssa_events_to_findings(
|
|||
|
||||
// Dedup key includes primary location so multi-site events that
|
||||
// share a single (source, sink) pair still produce distinct findings
|
||||
// — one per resolved callee-internal site.
|
||||
//, one per resolved callee-internal site.
|
||||
let loc_key = primary_location
|
||||
.as_ref()
|
||||
.map(|l| (l.file_rel.clone(), l.line, l.col));
|
||||
|
|
@ -374,6 +374,11 @@ pub fn ssa_events_to_findings(
|
|||
path_hash,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: smallvec::SmallVec::new(),
|
||||
// Per-event mask from the multi-gate dispatch, picks
|
||||
// exactly the cap that fired (e.g. `Cap::DATA_EXFIL`
|
||||
// for a `fetch` body-flow finding versus `Cap::SSRF`
|
||||
// for a URL-flow finding on the same call).
|
||||
effective_sink_caps: event.sink_caps & *caps,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,34 +1,10 @@
|
|||
//! Context-sensitive inline analysis — cache, body, and attribution types.
|
||||
//! Context-sensitive inline analysis, cache, body, and attribution types.
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`ArgTaintSig`] — compact per-arg cap signature used as a cache key.
|
||||
//! * [`InlineResult`] / [`CachedInlineShape`] / [`ReturnShape`] — the
|
||||
//! callsite-adapted and callsite-agnostic inline-analysis result types.
|
||||
//! * [`InlineCache`] — the shared cache map keyed by
|
||||
//! `(FuncKey, ArgTaintSig)`.
|
||||
//! * [`CrossFileNodeMeta`] / [`CalleeSsaBody`] — the serde-able bodies
|
||||
//! persisted to SQLite for cross-file context-sensitive analysis.
|
||||
//! * [`populate_node_meta`] / [`rebuild_body_graph`] — bookkeeping for
|
||||
//! cross-file body proxy CFGs.
|
||||
//!
|
||||
//! The implementation functions (`inline_analyse_callee`,
|
||||
//! `apply_cached_shape`, `extract_inline_return_taint`) remain in the
|
||||
//! parent `mod.rs` because they depend tightly on the block worklist, the
|
||||
//! `run_ssa_taint_full` entry point, and the callee-resolution pipeline.
|
||||
//!
|
||||
//! # Cache key scope and origin attribution
|
||||
//!
|
||||
//! The inline-analysis cache below ([`InlineCache`]) is keyed by
|
||||
//! `(FuncKey, ArgTaintSig)`, where [`ArgTaintSig`] encodes **per-arg
|
||||
//! capability bits only** — not the identity of the source
|
||||
//! [`crate::taint::domain::TaintOrigin`]s that produced those caps. The
|
||||
//! stored value ([`CachedInlineShape`]) captures **only the structural**
|
||||
//! shape of the callee's return taint: return caps, callee-internal
|
||||
//! origins (from `Source` ops inside the callee body), and per-parameter
|
||||
//! provenance flags that record which formal parameters contributed to
|
||||
//! the return. Caller-specific origin identity is *not* stored — it is
|
||||
//! re-attributed at cache-apply time from the current call site's
|
||||
//! argument taint.
|
||||
//! The cache ([`InlineCache`]) is keyed by `(FuncKey, ArgTaintSig)`,
|
||||
//! where [`ArgTaintSig`] is per-arg cap bits only (not origin identity).
|
||||
//! Stored values ([`CachedInlineShape`]) capture the structural shape of
|
||||
//! the callee's return taint; caller-specific origins are re-attributed
|
||||
//! at apply time.
|
||||
|
||||
use crate::labels::Cap;
|
||||
use crate::ssa::ir::{SsaBody, Terminator};
|
||||
|
|
@ -42,61 +18,30 @@ use std::collections::HashMap;
|
|||
/// Maximum SSA blocks in a callee body before skipping inline analysis.
|
||||
pub(super) const MAX_INLINE_BLOCKS: usize = 500;
|
||||
|
||||
/// Compact cache key: per-arg-position cap bits (sorted, non-empty only).
|
||||
///
|
||||
/// Two calls with identical `ArgTaintSig` produce identical inline results
|
||||
/// for soundness purposes (return caps, callee-internal sink activations).
|
||||
/// Origin identity is **not** part of the key — see the module-level note
|
||||
/// above on origin-attribution non-determinism.
|
||||
/// Compact cache key: per-arg-position cap bits (sorted, non-empty
|
||||
/// only). Origin identity is not part of the key.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct ArgTaintSig(pub(super) SmallVec<[(usize, u16); 4]>);
|
||||
|
||||
/// Call-site-adapted result of inline-analyzing a callee.
|
||||
///
|
||||
/// Constructed fresh per call site by `apply_cached_shape` from a stored
|
||||
/// [`CachedInlineShape`]; carries origins that point to the *current*
|
||||
/// caller's source chain, not to whichever caller first populated the
|
||||
/// cache entry.
|
||||
/// Call-site-adapted result of inline-analyzing a callee. Built fresh
|
||||
/// per call site so origins point to the current caller's chain.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct InlineResult {
|
||||
/// Taint on the return value after inline analysis.
|
||||
pub(super) return_taint: Option<VarTaint>,
|
||||
/// PathFact on the return value after inline analysis.
|
||||
///
|
||||
/// Non-top when the callee's body provably narrows the
|
||||
/// [`crate::abstract_interp::PathFact`] of the value it returns (for
|
||||
/// example, a `sanitize_path(s) -> Option<String>` helper that
|
||||
/// early-returns on `s.contains("..")` / `s.starts_with('/')`). At
|
||||
/// apply time the caller sets its call-result SSA value's PathFact to
|
||||
/// this narrowed fact, so downstream FILE_IO sinks see the sanitised
|
||||
/// axis regardless of whether a named label-rule exists for the
|
||||
/// helper. Top when the callee produces no narrowing — matches
|
||||
/// pre-PathFact behaviour exactly.
|
||||
/// PathFact on the return value. Non-top when the callee body
|
||||
/// provably narrows it (e.g. a `sanitize_path` early-returning on
|
||||
/// `s.contains("..")`).
|
||||
pub(super) return_path_fact: crate::abstract_interp::PathFact,
|
||||
/// Per-return-path decomposition of [`Self::return_path_fact`].
|
||||
///
|
||||
/// Non-empty when the callee has ≥2 distinct return blocks whose
|
||||
/// predicate gates differ. Match-arm-sensitive callers pick the
|
||||
/// entry whose `variant_inner_fact` matches the arm binding's
|
||||
/// variant; path-resolvable callers may refuse infeasible entries.
|
||||
/// Callers unable to distinguish paths still consult
|
||||
/// [`Self::return_path_fact`] (the join of all entries) and see
|
||||
/// pre-decomposition behaviour.
|
||||
/// Per-return-path decomposition of `return_path_fact`. Non-empty
|
||||
/// when the callee has ≥2 return blocks with different predicate
|
||||
/// gates.
|
||||
#[allow(dead_code)]
|
||||
pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
}
|
||||
|
||||
/// Structural (callsite-agnostic) summary of an inline-analyzed callee.
|
||||
///
|
||||
/// Stored in [`InlineCache`] in place of a fully-attributed `InlineResult`.
|
||||
/// Origin-identity information that depends on the caller's argument chain
|
||||
/// is *not* kept here; instead, [`ReturnShape::param_provenance`]
|
||||
/// records which callee parameter positions contributed seed taint to the
|
||||
/// return, and the actual caller origins are re-unioned in at apply time.
|
||||
///
|
||||
/// `None` means "this callee produced no return taint for the given
|
||||
/// argument shape". A cached `None` is still a meaningful result — it
|
||||
/// short-circuits re-analysis on subsequent calls with matching caps.
|
||||
/// Structural (callsite-agnostic) summary of an inline-analyzed
|
||||
/// callee. `None` means "no return taint for this arg shape", still
|
||||
/// meaningful, short-circuits subsequent calls with matching caps.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);
|
||||
|
||||
|
|
@ -107,7 +52,7 @@ pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);
|
|||
/// origins. See the module-level note above on origin attribution.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ReturnShape {
|
||||
/// Return value caps (cap bits only — structural).
|
||||
/// Return value caps (cap bits only, structural).
|
||||
pub(super) caps: Cap,
|
||||
/// Origins produced **inside the callee body** (e.g. `Source` op fired
|
||||
/// in the callee). `node` is set to a placeholder; at apply time the
|
||||
|
|
@ -115,31 +60,19 @@ pub(crate) struct ReturnShape {
|
|||
/// stable (from the callee CFG) and preserved as-is.
|
||||
pub(super) internal_origins: SmallVec<[TaintOrigin; 2]>,
|
||||
/// Bit i set = callee's `Param(i)` seed taint reached the return value.
|
||||
/// At apply time, caller's argument origins at matching positions are
|
||||
/// unioned into the applied `VarTaint`. Params beyond index 63 are
|
||||
/// dropped (matching `SmallBitSet` semantics); the capped case is rare
|
||||
/// and still yields cap-correct results.
|
||||
/// At apply time, caller arg origins at matching positions are
|
||||
/// unioned into the applied `VarTaint`. Params beyond 63 are
|
||||
/// dropped (matches `SmallBitSet`); rare and still cap-correct.
|
||||
pub(super) param_provenance: u64,
|
||||
/// Whether the receiver (`SelfParam`) seed taint flowed to the return.
|
||||
/// Whether the receiver (`SelfParam`) seed taint flowed to return.
|
||||
pub(super) receiver_provenance: bool,
|
||||
/// Whether the applied `VarTaint` should be tagged `uses_summary`.
|
||||
pub(super) uses_summary: bool,
|
||||
/// PathFact of the return value observed from the callee's exit
|
||||
/// abstract state. Cache-safe because the callee is inline-analysed
|
||||
/// with [`crate::abstract_interp::PathFact::top`] Param seeds — the
|
||||
/// resulting fact describes the callee's intrinsic narrowing (e.g.
|
||||
/// the `Some` arm of a `sanitize(..) -> Option<String>` body
|
||||
/// proves `dotdot = No`) and does not depend on caller-side
|
||||
/// narrowing of the argument's PathFact. Top when the callee does
|
||||
/// not narrow.
|
||||
/// PathFact of the return value, observed from the callee exit
|
||||
/// state under Top-seeded Params. Describes the callee's intrinsic
|
||||
/// narrowing.
|
||||
pub(super) return_path_fact: crate::abstract_interp::PathFact,
|
||||
/// Per-return-path [`PathFact`] decomposition of the return value.
|
||||
///
|
||||
/// Populated alongside [`Self::return_path_fact`] when the callee
|
||||
/// has ≥2 distinct return blocks with different predicate gates.
|
||||
/// Cache-safe for the same reason as `return_path_fact`: entries
|
||||
/// describe callee-intrinsic narrowing under Top-seeded Params.
|
||||
/// Empty when no per-path distinction was observed.
|
||||
/// Per-return-path decomposition of the return value. Populated
|
||||
/// when the callee has ≥2 return blocks with different predicates.
|
||||
pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
}
|
||||
|
||||
|
|
@ -151,50 +84,21 @@ impl CachedInlineShape {
|
|||
}
|
||||
}
|
||||
|
||||
/// Cache for context-sensitive inline analysis results.
|
||||
///
|
||||
/// Keyed by the callee's canonical [`FuncKey`] rather than a bare function
|
||||
/// name so that same-name definitions (e.g. two `process/1` methods on
|
||||
/// different classes in the same file) never share or overwrite each
|
||||
/// other's cache entries. Values are stored as [`CachedInlineShape`]; see
|
||||
/// the module-level note above for why origins are stripped from the
|
||||
/// cache value and re-attributed at apply time.
|
||||
/// Cache for context-sensitive inline analysis results, keyed by
|
||||
/// canonical [`FuncKey`] so same-name definitions in different scopes
|
||||
/// never collide.
|
||||
pub(crate) type InlineCache = HashMap<(FuncKey, ArgTaintSig), CachedInlineShape>;
|
||||
|
||||
/// Drop every entry from an inline cache, marking the start of a new
|
||||
/// convergence epoch.
|
||||
///
|
||||
/// Cross-file SCC fixed-point iteration runs pass 2 repeatedly until the
|
||||
/// merged summaries stop changing. Between iterations the callee-summary
|
||||
/// inputs to inline analysis may have changed, so results cached under a
|
||||
/// stale snapshot must not leak into the next iteration — otherwise the
|
||||
/// engine could converge to a non-fixed-point (reporting a taint result
|
||||
/// that would not reproduce on a fresh run of the same file order).
|
||||
///
|
||||
/// The per-file inline cache is already reconstructed fresh at the top of
|
||||
/// each [`crate::taint::analyse_file`] call, so in the current code this
|
||||
/// call is effectively a no-op plumbing hook. Keeping the method (instead
|
||||
/// of relying on ambient re-construction) makes the lifecycle explicit for
|
||||
/// any future refactor that moves the cache up into the SCC orchestrator.
|
||||
#[allow(dead_code)] // semantic hook; used by tests and future shared-cache refactor
|
||||
/// Drop every entry from the inline cache between SCC fixpoint
|
||||
/// iterations so stale results don't leak forward.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn inline_cache_clear_epoch(cache: &mut InlineCache) {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
/// Set-equal fingerprint of an inline cache, used by the SCC orchestrator
|
||||
/// to detect when cross-file inline analysis has reached a fixed point
|
||||
/// alongside summary convergence.
|
||||
///
|
||||
/// Returns a `HashMap` mapping each `(FuncKey, ArgTaintSig)` cache key to
|
||||
/// the return-value capability bits of its inline result. `HashMap`
|
||||
/// equality is set-equal (unordered), so two caches with the same entries
|
||||
/// compare equal regardless of insertion order.
|
||||
///
|
||||
/// Origins are intentionally omitted — they are non-deterministic across
|
||||
/// callers with identical caps (see the module-level note on origin
|
||||
/// attribution) and would cause the fingerprint to oscillate without
|
||||
/// reflecting a real precision change.
|
||||
#[allow(dead_code)] // observability hook; used by tests and future shared-cache refactor
|
||||
/// Set-equal fingerprint of the inline cache, used by the SCC
|
||||
/// orchestrator to detect convergence.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn inline_cache_fingerprint(
|
||||
cache: &InlineCache,
|
||||
) -> HashMap<(FuncKey, ArgTaintSig), u16> {
|
||||
|
|
@ -206,24 +110,11 @@ pub(crate) fn inline_cache_fingerprint(
|
|||
|
||||
/// CFG node metadata embedded in cross-file callee bodies.
|
||||
///
|
||||
/// ## Why a full [`crate::cfg::NodeInfo`] lives here
|
||||
///
|
||||
/// An earlier variant carried only the two fields the symex executor reads
|
||||
/// (`bin_op`, `labels`). That was sufficient for symex but not for the
|
||||
/// taint engine, which reads ~20 fields off `cfg[inst.cfg_node]` across
|
||||
/// `transfer_inst`, `collect_block_events`, `compute_succ_states`, and
|
||||
/// helpers (callee name, `arg_uses`, `arg_callees`, `call_ordinal`,
|
||||
/// `outer_callee`, `kwargs`, `arg_string_literals`, `ast.span`,
|
||||
/// `ast.enclosing_func`, `condition_*`, `all_args_literal`, `catch_param`,
|
||||
/// `parameterized_query`, `in_defer`, `cast_target_type`, `string_prefix`,
|
||||
/// `taint.uses`, `taint.defines`, `taint.extra_defines`,
|
||||
/// `taint.const_text`, …). Rather than shuttling each of those through a
|
||||
/// `CfgView` accessor at every callsite, we store a full serde-able
|
||||
/// [`crate::cfg::NodeInfo`] snapshot here so the indexed-scan path can
|
||||
/// rehydrate an equivalent `Cfg` on load (see [`rebuild_body_graph`]).
|
||||
/// Both scan paths then feed the same `&Cfg` into the taint engine, and
|
||||
/// cross-file inline fires regardless of whether the body came from pass
|
||||
/// 1 or from SQLite.
|
||||
/// Stores a full serde-able [`crate::cfg::NodeInfo`] snapshot rather
|
||||
/// than projecting individual fields, so the indexed-scan path can
|
||||
/// rehydrate an equivalent `Cfg` (see [`rebuild_body_graph`]) and feed
|
||||
/// the same `&Cfg` into the taint engine regardless of whether the
|
||||
/// body came from pass 1 or SQLite.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CrossFileNodeMeta {
|
||||
/// Full `NodeInfo` snapshot for this body-local NodeIndex.
|
||||
|
|
@ -268,7 +159,7 @@ pub fn populate_node_meta(body: &mut CalleeSsaBody, cfg: &crate::cfg::Cfg) -> bo
|
|||
// `compute_succ_states` via `cfg[*cond]`, so without it the synthesized
|
||||
// cross-file proxy CFG (`rebuild_body_graph`) ends up too small whenever
|
||||
// the callee body has any conditional branch whose `cond` index sits
|
||||
// past the maximum `inst.cfg_node` index — inline analysis then panics
|
||||
// past the maximum `inst.cfg_node` index, inline analysis then panics
|
||||
// with an out-of-bounds index.
|
||||
let mut referenced: Vec<NodeIndex> = Vec::new();
|
||||
for block in &body.ssa.blocks {
|
||||
|
|
@ -320,7 +211,7 @@ pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
|
|||
// index. We fill any unreferenced intermediate indices with
|
||||
// `NodeInfo::default()`.
|
||||
//
|
||||
// Walks both instruction `cfg_node`s and `Terminator::Branch.cond` —
|
||||
// Walks both instruction `cfg_node`s and `Terminator::Branch.cond` ,
|
||||
// the latter is read by `compute_succ_states` via `cfg[*cond]`, so
|
||||
// missing it produces an OOB panic when a conditional branch's cond
|
||||
// node has a higher index than any `inst.cfg_node` in the body.
|
||||
|
|
@ -339,7 +230,7 @@ pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Also consider node_meta keys — they should be a subset of the
|
||||
// Also consider node_meta keys, they should be a subset of the
|
||||
// SSA-referenced indices, but be defensive.
|
||||
for &k in body.node_meta.keys() {
|
||||
if k > max_idx {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -2,7 +2,7 @@
|
|||
//! the original monolithic `ssa_transfer.rs`.
|
||||
//!
|
||||
//! Contains:
|
||||
//! * [`SsaTaintState`] — the per-block lattice value with `values`,
|
||||
//! * [`SsaTaintState`], the per-block lattice value with `values`,
|
||||
//! `validated_must`/`validated_may`, `predicates`, `heap`, `path_env`,
|
||||
//! `abstract_state`.
|
||||
//! * [`BindingKey`] / [`seed_lookup`] for cross-body taint seeding.
|
||||
|
|
@ -25,7 +25,7 @@ use std::collections::HashMap;
|
|||
|
||||
// NOTE: The per-SSA-value origin cap used to be a hardcoded
|
||||
// `MAX_ORIGINS: usize = 4`. It is now governed by the stable
|
||||
// `analysis.engine.max_origins` option (default `32`) — see
|
||||
// `analysis.engine.max_origins` option (default `32`), see
|
||||
// `crate::utils::analysis_options` and [`effective_max_origins`]. The
|
||||
// test-only override below still short-circuits the config read so
|
||||
// `engine_notes_tests.rs` can force a tiny cap to trigger truncation
|
||||
|
|
@ -42,7 +42,7 @@ static WORKLIST_CAP_OVERRIDE: std::sync::atomic::AtomicUsize =
|
|||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Records the MAX iteration count observed across every
|
||||
/// `run_ssa_taint_full` call since the most recent reset. Cheaper and
|
||||
/// more useful for regression tests than the last-call value — a cap
|
||||
/// more useful for regression tests than the last-call value, a cap
|
||||
/// hit anywhere in the scan is remembered.
|
||||
pub(super) static MAX_WORKLIST_ITERATIONS: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
|
|
@ -90,7 +90,7 @@ pub fn reset_worklist_observability() {
|
|||
/// force `OriginsTruncated` emission on small fixtures.
|
||||
static MAX_ORIGINS_OVERRIDE: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Total number of origins dropped since the most recent reset — captured
|
||||
/// Total number of origins dropped since the most recent reset, captured
|
||||
/// from `merge_origins` and the post-hoc saturation scan. Used by tests
|
||||
/// to detect truncation events that don't propagate to a finding (e.g.
|
||||
/// when the cap is so tight no taint flow survives to emit a sink event).
|
||||
|
|
@ -136,7 +136,7 @@ pub fn reset_origins_observability() {
|
|||
thread_local! {
|
||||
/// Per-body engine-note collector. Cleared at the start of each
|
||||
/// `analyse_body_with_seed` invocation and drained after
|
||||
/// `run_ssa_taint_full` returns — notes are then attached to every
|
||||
/// `run_ssa_taint_full` returns, notes are then attached to every
|
||||
/// finding emitted from that body. Living as a thread-local avoids
|
||||
/// threading a `&RefCell` through the nearly-10-argument transfer
|
||||
/// struct; inline analysis recursion is intentionally allowed to
|
||||
|
|
@ -148,7 +148,7 @@ thread_local! {
|
|||
/// was suppressed by an SSA-engine path-safety proof (PathFact
|
||||
/// `dotdot=No && absolute=No`). Populated by `is_path_safe_for_sink`
|
||||
/// and consumed by the state-analysis pass to suppress
|
||||
/// `state-unauthed-access` on the same sink — when the taint engine
|
||||
/// `state-unauthed-access` on the same sink, when the taint engine
|
||||
/// has already proved the user-controlled input cannot escape into a
|
||||
/// privileged location, the auth concern on that sink is reduced.
|
||||
/// Reset at start of `analyse_file`, drained before state analysis.
|
||||
|
|
@ -156,7 +156,7 @@ thread_local! {
|
|||
RefCell::new(std::collections::HashSet::new());
|
||||
|
||||
/// File-level set of CFG sink spans where the SSA engine emitted an
|
||||
/// `all_validated` event — every tainted input to the sink passed
|
||||
/// `all_validated` event, every tainted input to the sink passed
|
||||
/// through a recognised validation/sanitisation predicate before
|
||||
/// reaching it. Distinct from `PATH_SAFE_SUPPRESSED_SPANS`, which
|
||||
/// is FILE_IO-scoped and feeds state analysis: this set is
|
||||
|
|
@ -167,7 +167,7 @@ thread_local! {
|
|||
///
|
||||
/// Without this signal the suppression gate has to fall back to
|
||||
/// "function emitted at least one taint-unsanitised-flow finding"
|
||||
/// or "function contains a labelled Sanitizer node" — both of
|
||||
/// or "function contains a labelled Sanitizer node", both of
|
||||
/// which miss validated/dominated/early-return safety where the
|
||||
/// engine cleared the flow without firing or hitting an explicit
|
||||
/// sanitiser.
|
||||
|
|
@ -227,7 +227,7 @@ pub fn take_path_safe_suppressed_spans() -> std::collections::HashSet<(usize, us
|
|||
|
||||
/// Record a sink CFG-node span where the SSA engine proved every
|
||||
/// tainted input was validated (`SsaTaintEvent::all_validated`).
|
||||
/// Cap-agnostic — fires for any sink the engine evaluated and cleared.
|
||||
/// Cap-agnostic, fires for any sink the engine evaluated and cleared.
|
||||
/// Consumed by `TaintSuppressionCtx::build` as positive evidence that
|
||||
/// taint analysis reached this line and proved safety, so AST-pattern
|
||||
/// findings on the same line can be suppressed without misclassifying
|
||||
|
|
@ -263,7 +263,7 @@ pub fn take_all_validated_spans() -> std::collections::HashSet<(usize, usize)> {
|
|||
/// into the seed map always specify the owning body's id; readers look
|
||||
/// up by the scope they know they want (typically their own
|
||||
/// `parent_body_id`, with a fallback to `BodyId(0)` for entries that
|
||||
/// the JS/TS two-level solve has re-keyed onto the top-level scope —
|
||||
/// the JS/TS two-level solve has re-keyed onto the top-level scope ,
|
||||
/// see [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct BindingKey {
|
||||
|
|
@ -284,7 +284,7 @@ impl BindingKey {
|
|||
/// Look up a binding in a seed map.
|
||||
///
|
||||
/// Thin wrapper over [`HashMap::get`] retained for call-site readability
|
||||
/// — every seed entry is now exactly scoped to a single `(name,
|
||||
///, every seed entry is now exactly scoped to a single `(name,
|
||||
/// BodyId)`, so the lookup is O(1) with no fallback. Writers that want
|
||||
/// cross-scope reachability must explicitly re-key their entries (see
|
||||
/// [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
|
||||
|
|
@ -299,7 +299,7 @@ pub fn seed_lookup<'a>(
|
|||
|
||||
/// Compact key for a heap-field taint cell.
|
||||
///
|
||||
/// `(loc, field)` — `loc` is the abstract location of the *parent*
|
||||
/// `(loc, field)`, `loc` is the abstract location of the *parent*
|
||||
/// (interned by the body's [`crate::pointer::LocInterner`]), `field`
|
||||
/// is the [`FieldId`] of the projected field. The pair survives lattice
|
||||
/// joins / leq comparisons by `Ord`-derived sort.
|
||||
|
|
@ -309,16 +309,16 @@ pub struct FieldTaintKey {
|
|||
pub field: FieldId,
|
||||
}
|
||||
|
||||
/// Pointer-Phase 4 / W4: per-field-cell taint record.
|
||||
/// per-field-cell taint record.
|
||||
///
|
||||
/// Carries the union of writers' taint for the abstract field cell plus
|
||||
/// two validation channels:
|
||||
/// * `validated_must` — set when *every* writer recorded a value that was
|
||||
/// * `validated_must`, set when *every* writer recorded a value that was
|
||||
/// `validated_must` in its own SSA scope. Lattice join intersects
|
||||
/// (`AND`) — matching the symbol-keyed [`SsaTaintState::validated_must`]
|
||||
/// (`AND`), matching the symbol-keyed [`SsaTaintState::validated_must`]
|
||||
/// semantics for "validated on every path".
|
||||
/// * `validated_may` — set when *any* writer recorded a `validated_may`
|
||||
/// value. Lattice join unions (`OR`) — matching the symbol-keyed
|
||||
/// * `validated_may`, set when *any* writer recorded a `validated_may`
|
||||
/// value. Lattice join unions (`OR`), matching the symbol-keyed
|
||||
/// [`SsaTaintState::validated_may`] semantics for "validated on some
|
||||
/// path".
|
||||
///
|
||||
|
|
@ -332,7 +332,7 @@ pub struct FieldCell {
|
|||
}
|
||||
|
||||
impl FieldCell {
|
||||
/// Construct a cell with no validation bits — convenience for the
|
||||
/// Construct a cell with no validation bits, convenience for the
|
||||
/// pre-W4 callers that don't propagate symbol-level validation.
|
||||
pub fn unvalidated(taint: VarTaint) -> Self {
|
||||
Self {
|
||||
|
|
@ -365,17 +365,17 @@ pub struct SsaTaintState {
|
|||
/// interpretation is disabled (`analysis.engine.abstract_interpretation
|
||||
/// = false`).
|
||||
pub abstract_state: Option<AbstractState>,
|
||||
/// Pointer-Phase 3: per-heap-field taint cells, keyed by
|
||||
/// per-heap-field taint cells, keyed by
|
||||
/// `(parent_loc, field)`. Sorted by [`FieldTaintKey`] for O(n)
|
||||
/// merge-join. Populated only when the body's
|
||||
/// [`crate::pointer::PointsToFacts`] is available
|
||||
/// (`NYX_POINTER_ANALYSIS=1`); empty otherwise so the lattice join
|
||||
/// is a strict no-op for pointer-disabled runs. Field reads
|
||||
/// (`SsaOp::FieldProj`) consult the cells; field writes record into
|
||||
/// them. Cross-call propagation lands in Phase 5 via the
|
||||
/// them. Cross-call propagation lands during lowering via the
|
||||
/// field-granularity `PointsToSummary`.
|
||||
///
|
||||
/// Cell shape (Phase 4 / W4): [`FieldCell`] carries `taint` plus
|
||||
/// Cell shape: [`FieldCell`] carries `taint` plus
|
||||
/// `validated_must` / `validated_may` flags so validation flows
|
||||
/// through abstract field / element identity.
|
||||
pub field_taint: SmallVec<[(FieldTaintKey, FieldCell); 4]>,
|
||||
|
|
@ -403,7 +403,7 @@ impl SsaTaintState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3: read the field cell at `key`. Returns `None`
|
||||
/// read the field cell at `key`. Returns `None`
|
||||
/// when no cell has been recorded (caller should treat as
|
||||
/// untainted). O(log n) on the sorted [`field_taint`] list.
|
||||
pub fn get_field(&self, key: FieldTaintKey) -> Option<&FieldCell> {
|
||||
|
|
@ -413,13 +413,13 @@ impl SsaTaintState {
|
|||
.map(|idx| &self.field_taint[idx].1)
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3 / W4: union `t` into the field cell at `key`,
|
||||
/// union `t` into the field cell at `key`,
|
||||
/// recording per-write `validated_must` / `validated_may` channels.
|
||||
///
|
||||
/// Maintains sorted invariant. No-op when `t.caps` is empty (so the
|
||||
/// lattice bottom stays `[]`). When the cell already exists, the
|
||||
/// validation channels merge with the lattice-join semantics —
|
||||
/// `must` AND-intersects, `may` OR-unions — matching the symbol-
|
||||
/// validation channels merge with the lattice-join semantics ,
|
||||
/// `must` AND-intersects, `may` OR-unions, matching the symbol-
|
||||
/// keyed [`SsaTaintState::validated_must`] / `validated_may`
|
||||
/// semantics so a write coming through a non-validated path tears
|
||||
/// down `must` while preserving `may` of any earlier validated path.
|
||||
|
|
@ -563,15 +563,15 @@ impl Lattice for SsaTaintState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3 / W4: merge-join two sorted `field_taint` lists.
|
||||
/// merge-join two sorted `field_taint` lists.
|
||||
/// Same shape as [`merge_join_ssa_vars`] but keyed on [`FieldTaintKey`]:
|
||||
/// * `taint.caps` — OR-union
|
||||
/// * `taint.origins` — merged with cap-respecting de-dup
|
||||
/// * `taint.uses_summary` — OR-union
|
||||
/// * `validated_must` — AND-intersect (matches the symbol-keyed
|
||||
/// * `taint.caps` , OR-union
|
||||
/// * `taint.origins`, merged with cap-respecting de-dup
|
||||
/// * `taint.uses_summary`, OR-union
|
||||
/// * `validated_must`, AND-intersect (matches the symbol-keyed
|
||||
/// `validated_must` lattice: a path that didn't validate this cell
|
||||
/// breaks the invariant)
|
||||
/// * `validated_may` — OR-union (any path's validation contributes)
|
||||
/// * `validated_may`, OR-union (any path's validation contributes)
|
||||
pub(super) fn merge_join_field_taint(
|
||||
a: &[(FieldTaintKey, FieldCell)],
|
||||
b: &[(FieldTaintKey, FieldCell)],
|
||||
|
|
@ -581,7 +581,7 @@ pub(super) fn merge_join_field_taint(
|
|||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
// Cell present only in `a` — counterpart in `b` is the
|
||||
// Cell present only in `a`, counterpart in `b` is the
|
||||
// lattice bottom (no validation, no taint), so:
|
||||
// must = a.must AND false = false
|
||||
// may = a.may OR false = a.may
|
||||
|
|
@ -637,11 +637,11 @@ pub(super) fn merge_join_field_taint(
|
|||
/// `a ≤ b` for sorted `field_taint` lists. Used by the convergence
|
||||
/// check in [`Lattice::leq`]. Per-cell criteria:
|
||||
///
|
||||
/// * `taint.caps` — `a ⊆ b` (sub-state on caps; matches per-SSA-value
|
||||
/// * `taint.caps`, `a ⊆ b` (sub-state on caps; matches per-SSA-value
|
||||
/// `ssa_vars_leq`).
|
||||
/// * `validated_must` — `a.must ⊇ b.must` (super-state on must; same
|
||||
/// * `validated_must`, `a.must ⊇ b.must` (super-state on must; same
|
||||
/// shape as the symbol-keyed `validated_must` leq).
|
||||
/// * `validated_may` — `a.may ⊆ b.may` (sub-state on may).
|
||||
/// * `validated_may`, `a.may ⊆ b.may` (sub-state on may).
|
||||
///
|
||||
/// When `b` lacks a key present in `a`, `b`'s side is the lattice
|
||||
/// bottom: no caps, no validation. `a`'s caps must also be empty
|
||||
|
|
@ -669,12 +669,12 @@ pub(super) fn field_taint_leq(
|
|||
if (ca.taint.caps - cb.taint.caps).bits() != 0 {
|
||||
return false;
|
||||
}
|
||||
// Must: a ⊇ b — every must-validated key in b is must-validated
|
||||
// Must: a ⊇ b, every must-validated key in b is must-validated
|
||||
// in a. Equivalently: !cb.must OR ca.must.
|
||||
if cb.validated_must && !ca.validated_must {
|
||||
return false;
|
||||
}
|
||||
// May: a ⊆ b — every may-validated key in a is may-validated
|
||||
// May: a ⊆ b, every may-validated key in a is may-validated
|
||||
// in b. Equivalently: !ca.may OR cb.may.
|
||||
if ca.validated_may && !cb.validated_may {
|
||||
return false;
|
||||
|
|
@ -735,7 +735,7 @@ pub(super) fn merge_join_ssa_vars(
|
|||
///
|
||||
/// Ordering is lexicographic over
|
||||
/// `(source_span_start, source_span_end, source_kind_tag, node_index)`.
|
||||
/// `source_span` is the most stable component across bodies — cross-body
|
||||
/// `source_span` is the most stable component across bodies, cross-body
|
||||
/// remapped origins carry the original byte span explicitly; intra-body
|
||||
/// origins default to `(0, 0)` and fall through to the secondary keys.
|
||||
///
|
||||
|
|
@ -760,7 +760,7 @@ fn origin_sort_key(o: &TaintOrigin) -> (usize, usize, u8, usize) {
|
|||
/// Bounded, deterministic insertion of an origin into a sorted origin
|
||||
/// set. Returns `true` when `new` was admitted (or de-duplicated against
|
||||
/// an existing entry), `false` when the cap forced a drop. On drop,
|
||||
/// the origin with the *largest* sort key is evicted first — the caller
|
||||
/// the origin with the *largest* sort key is evicted first, the caller
|
||||
/// sees a survivor set that depends only on the input multiset and
|
||||
/// [`effective_max_origins`], not on insertion order.
|
||||
///
|
||||
|
|
@ -774,7 +774,7 @@ pub(crate) fn push_origin_bounded(
|
|||
) -> bool {
|
||||
// Identity check: same node counts as the same origin. We keep
|
||||
// node-only dedup to match [`ssa_vars_leq`], which compares origin
|
||||
// sets by node membership — widening dedup here without tightening
|
||||
// sets by node membership, widening dedup here without tightening
|
||||
// there would break the monotonicity invariant.
|
||||
if target.iter().any(|o| o.node == new.node) {
|
||||
return true;
|
||||
|
|
@ -814,7 +814,7 @@ pub(crate) fn push_origin_bounded(
|
|||
target.insert(pos, new);
|
||||
true
|
||||
} else {
|
||||
// `new` itself is the worst — drop it instead of the survivor.
|
||||
// `new` itself is the worst, drop it instead of the survivor.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
|
@ -829,7 +829,7 @@ pub(super) fn merge_origins(
|
|||
a: &SmallVec<[TaintOrigin; 2]>,
|
||||
b: &SmallVec<[TaintOrigin; 2]>,
|
||||
) -> SmallVec<[TaintOrigin; 2]> {
|
||||
// Seed the result with `a` — but re-sort defensively in case the
|
||||
// Seed the result with `a`, but re-sort defensively in case the
|
||||
// caller constructed `a` through non-bounded paths. Historically
|
||||
// every write goes through `push_origin_bounded` (or `merge_origins`
|
||||
// itself), so this resort is a no-op on the steady state but costs
|
||||
|
|
@ -911,7 +911,7 @@ pub(super) fn merge_join_ssa_predicates(
|
|||
mod origin_cap_tests {
|
||||
//! Tests for the deterministic, config-driven origin cap. These
|
||||
//! cover the behavior at the `push_origin_bounded` / `merge_origins`
|
||||
//! boundary — the end-to-end engine-note signal is exercised in
|
||||
//! boundary, the end-to-end engine-note signal is exercised in
|
||||
//! `tests/engine_notes_tests.rs`.
|
||||
|
||||
use super::*;
|
||||
|
|
@ -1037,7 +1037,7 @@ mod origin_cap_tests {
|
|||
fn effective_cap_reads_runtime_config_when_override_zero() {
|
||||
// Override takes priority; override=0 falls through to config.
|
||||
// `current()` returns the default (32) when no runtime is
|
||||
// installed — which is the state the rest of the test suite runs
|
||||
// installed, which is the state the rest of the test suite runs
|
||||
// under. Guard that the fallback path reaches 32.
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(0);
|
||||
|
|
@ -1053,7 +1053,7 @@ mod origin_cap_tests {
|
|||
|
||||
#[cfg(test)]
|
||||
mod field_taint_tests {
|
||||
//! Pointer-Phase 3: tests for the heap-field taint cells on
|
||||
//!: tests for the heap-field taint cells on
|
||||
//! [`SsaTaintState`]. Cover get/add round-trip, lattice join
|
||||
//! (cap union + origin merge), and `leq` convergence semantics.
|
||||
use super::*;
|
||||
|
|
@ -1202,7 +1202,7 @@ mod field_taint_tests {
|
|||
assert!(cell.validated_must, "a.must AND b.must = true");
|
||||
assert!(cell.validated_may);
|
||||
|
||||
// Now make `b`'s validated_must false — must should drop to
|
||||
// Now make `b`'s validated_must false, must should drop to
|
||||
// false on the join, may stays at OR.
|
||||
let mut c = SsaTaintState::initial();
|
||||
c.add_field(k, taint(Cap::ENV_VAR), false, true);
|
||||
|
|
@ -1213,7 +1213,7 @@ mod field_taint_tests {
|
|||
}
|
||||
|
||||
/// W4 audit: `merge_join_field_taint` OR-unions `validated_may`
|
||||
/// — any path's may-validation contributes to the joined cell.
|
||||
///, any path's may-validation contributes to the joined cell.
|
||||
#[test]
|
||||
fn lattice_validated_may_unions_on_join() {
|
||||
let k = key(1, 7);
|
||||
|
|
@ -1275,7 +1275,7 @@ mod field_taint_tests {
|
|||
a.leq(&b),
|
||||
"must super-state and equal caps: a ≤ b should hold"
|
||||
);
|
||||
// Reverse: b.must=false, a.must=true — for b ≤ a, we need
|
||||
// Reverse: b.must=false, a.must=true, for b ≤ a, we need
|
||||
// b.must ⊇ a.must which is false ⊇ true = false. So b ≤ a
|
||||
// must fail.
|
||||
assert!(!b.leq(&a), "b lacks the must invariant a holds");
|
||||
|
|
@ -1289,7 +1289,7 @@ mod field_taint_tests {
|
|||
assert!(!a2.leq(&b2), "a.may=true is NOT ⊆ b.may=false");
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3 / A8 audit: the field_taint lattice is monotone
|
||||
/// the field_taint lattice is monotone
|
||||
/// and converges under a deterministic enumeration of inputs.
|
||||
/// Caps grow (OR), `uses_summary` grows (OR), origins grow modulo
|
||||
/// the cap (merge_origins is bounded). Joins must:
|
||||
|
|
@ -1409,7 +1409,7 @@ mod field_taint_tests {
|
|||
|
||||
/// `field_taint_leq` is the soundness gate for worklist
|
||||
/// convergence: once `next ≤ acc`, the worklist halts. Pin that
|
||||
/// `leq` is consistent with `join` — i.e. `s.leq(s.join(t))` holds
|
||||
/// `leq` is consistent with `join`, i.e. `s.leq(s.join(t))` holds
|
||||
/// for any `s, t`. Without this, the worklist could loop
|
||||
/// indefinitely on inputs whose join produces a state not
|
||||
/// dominated by both inputs.
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
//! SSA function-summary and container-flow extraction.
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`extract_ssa_func_summary`] — runs per-parameter taint probes and
|
||||
//! * [`extract_ssa_func_summary`], runs per-parameter taint probes and
|
||||
//! synthesises an [`crate::summary::ssa_summary::SsaFuncSummary`] with
|
||||
//! source caps, return transforms, per-path transforms, and sink site
|
||||
//! attribution.
|
||||
//! * [`extract_container_flow_summary`] — structural scan for
|
||||
//! * [`extract_container_flow_summary`], structural scan for
|
||||
//! `param_container_to_return` + `param_to_container_store` pairs.
|
||||
//! * Private helpers for predicate-hash summarisation, abstract-transfer
|
||||
//! derivation, callback source detection, and return-type inference.
|
||||
|
|
@ -123,15 +123,15 @@ pub fn extract_ssa_func_summary_full(
|
|||
.collect();
|
||||
|
||||
// Collect all param SSA values to exclude from return cap collection.
|
||||
// Param values persist with their seeded taint throughout the function —
|
||||
// Param values persist with their seeded taint throughout the function ,
|
||||
// we only want caps on derived values (call results, assigns) at return.
|
||||
let all_param_values: std::collections::HashSet<SsaValue> =
|
||||
param_info.iter().map(|(_, _, v)| *v).collect();
|
||||
|
||||
// Per-return-block observation captured alongside the aggregate return
|
||||
// caps. Each entry records one return block's exit state — caps
|
||||
// caps. Each entry records one return block's exit state, caps
|
||||
// contributed on that path, path-predicate hash, known_true/false bits,
|
||||
// and the return SSA value's abstract fact — so the per-param loop can
|
||||
// and the return SSA value's abstract fact, so the per-param loop can
|
||||
// emit one [`ReturnPathTransform`] per distinct predicate gate.
|
||||
struct ReturnBlockObs {
|
||||
/// Caps at the return SSA value (or joined live values for
|
||||
|
|
@ -141,7 +141,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
/// (passthrough fallback).
|
||||
param_caps: Cap,
|
||||
/// Deterministic hash of the predicate gate at this return.
|
||||
/// `0` means "no predicate gate" — an unguarded return.
|
||||
/// `0` means "no predicate gate", an unguarded return.
|
||||
predicate_hash: u64,
|
||||
/// `PredicateSummary::known_true` bits intersected across all
|
||||
/// tracked variables at this return. Encoded via
|
||||
|
|
@ -268,7 +268,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// Return(None): implicit return — fall back to all live values.
|
||||
// Return(None): implicit return, fall back to all live values.
|
||||
for (val, taint) in &exit.values {
|
||||
if all_param_values.contains(val) {
|
||||
block_param_caps |= taint.caps;
|
||||
|
|
@ -348,7 +348,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
|
||||
// Per-return-path PathFact decomposition derived from the baseline
|
||||
// probe (no seeded taint). Abstract facts on the return rv are
|
||||
// independent of taint seeding — they describe the function's
|
||||
// independent of taint seeding, they describe the function's
|
||||
// intrinsic narrowing, so the baseline run captures them without
|
||||
// per-param noise.
|
||||
//
|
||||
|
|
@ -388,7 +388,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new();
|
||||
let mut param_to_sink_param = Vec::new();
|
||||
// Per-param return-path decomposition. Populated only when the param
|
||||
// has ≥2 distinct return-block predicate hashes — a single-return-path
|
||||
// has ≥2 distinct return-block predicate hashes, a single-return-path
|
||||
// callee is already precise via `param_to_return`.
|
||||
let mut param_return_paths: Vec<(
|
||||
usize,
|
||||
|
|
@ -417,7 +417,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// expressions (e.g. `file._source.uri`) as their own
|
||||
// [`SsaOp::Param`] ops with composite `var_name`s like
|
||||
// `"file._source.uri"`. These phantom Params are the values
|
||||
// actually used as call arguments — not the formal-param SSA
|
||||
// actually used as call arguments, not the formal-param SSA
|
||||
// value the seed targets. Without this, the per-param probe
|
||||
// misses cross-call sinks because the call's arg SSA value is
|
||||
// a phantom Param with no seed entry, so `transfer_inst::Param`
|
||||
|
|
@ -447,7 +447,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
|
||||
let (return_caps, events, _, per_return_obs) = run_probe(seed);
|
||||
|
||||
// Subtract baseline source_caps — we only want param-contributed caps
|
||||
// Subtract baseline source_caps, we only want param-contributed caps
|
||||
let param_return_caps = return_caps & !source_caps;
|
||||
|
||||
if !param_return_caps.is_empty() {
|
||||
|
|
@ -464,7 +464,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// observed return block, derive a `ReturnPathTransform` mirroring
|
||||
// the aggregate logic (prefer derived caps, fall back to param
|
||||
// caps, strip baseline source caps). Only emit when ≥2 distinct
|
||||
// predicate hashes are present — a single-hash summary adds no
|
||||
// predicate hashes are present, a single-hash summary adds no
|
||||
// signal over the aggregate `param_to_return`.
|
||||
if per_return_obs.len() >= 2 {
|
||||
let mut per_path: SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]> =
|
||||
|
|
@ -477,7 +477,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
};
|
||||
let block_contributed = block_return_caps & !source_caps;
|
||||
let transform_kind = if block_contributed.is_empty() {
|
||||
// No caps on this path — param does not reach return
|
||||
// No caps on this path, param does not reach return
|
||||
// under this predicate. A `StripBits(all)` records
|
||||
// "all bits cleared" so downstream join preserves the
|
||||
// disparity with other paths.
|
||||
|
|
@ -513,9 +513,31 @@ pub fn extract_ssa_func_summary_full(
|
|||
}
|
||||
}
|
||||
|
||||
// Collect sink caps + primary-location sites from events + per-arg-position detail
|
||||
// Collect sink caps + primary-location sites from events + per-arg-position detail.
|
||||
//
|
||||
// Skip events flagged `all_validated`: every tainted SSA value
|
||||
// that reached the sink was already proved validated by a
|
||||
// dominating predicate (AllowlistCheck / TypeCheck /
|
||||
// ValidationCall, including the indirect-validator branch
|
||||
// narrowing for `validate*` / `is_valid*` callees). Those
|
||||
// events would have been dropped by `ssa_events_to_findings` at
|
||||
// the per-file finding step; carrying them into
|
||||
// `param_to_sink` / `param_to_sink_param` re-publishes a sink
|
||||
// attribution callers can no longer suppress, because the
|
||||
// caller can't see the validator that lives inside the
|
||||
// callee body.
|
||||
//
|
||||
// Strict-additive: `all_validated` is set only when every
|
||||
// tainted operand at the sink has its `var_name` in
|
||||
// `state.validated_may`, single-path single-validator helpers
|
||||
// cleanly skip; mixed-tainted-with-some-unvalidated events
|
||||
// still propagate. Closes the helper-summary precision gap
|
||||
// surfaced by Novu CVE GHSA-4x48-cgf9-q33f.
|
||||
let mut param_sites: SmallVec<[SinkSite; 1]> = SmallVec::new();
|
||||
for event in &events {
|
||||
if event.all_validated {
|
||||
continue;
|
||||
}
|
||||
for pos in extract_sink_arg_positions(event, ssa) {
|
||||
param_to_sink_param.push((idx, pos, event.sink_caps));
|
||||
}
|
||||
|
|
@ -601,14 +623,14 @@ pub fn extract_ssa_func_summary_full(
|
|||
|
||||
// Per-parameter abstract-domain transfers.
|
||||
//
|
||||
// Derived structurally from the SSA body — no additional taint probes.
|
||||
// Derived structurally from the SSA body, no additional taint probes.
|
||||
// Three-step inference per parameter:
|
||||
// 1. Identity: return SSA value at every return block traces back to
|
||||
// this parameter (possibly through assigns / phi merges all feeding
|
||||
// from the same param).
|
||||
// 2. Callee-intrinsic bound: baseline `return_abstract` carries a
|
||||
// concrete fact (bounded interval or known prefix) that holds
|
||||
// regardless of caller input — record it once per parameter as
|
||||
// regardless of caller input, record it once per parameter as
|
||||
// `Clamped` / `LiteralPrefix` so the caller sees the bound even
|
||||
// when it has no abstract info on its own argument.
|
||||
// 3. Top: default; the entry is omitted (empty transfer is meaningless).
|
||||
|
|
@ -630,14 +652,14 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_return_paths,
|
||||
return_path_facts,
|
||||
points_to,
|
||||
// Pointer-Phase 5 extension — empty until the field-granularity
|
||||
// extension, empty until the field-granularity
|
||||
// extractor is wired (`NYX_POINTER_ANALYSIS=1` only). Default
|
||||
// path stays bit-identical to today.
|
||||
field_points_to: crate::summary::points_to::FieldPointsToSummary::empty(),
|
||||
// Populated post-extraction in
|
||||
// `taint::lower_all_functions_from_bodies` once SSA optimisation
|
||||
// has computed `opt.type_facts`. Empty here means the
|
||||
// extractor itself doesn't carry receiver-type info — the
|
||||
// extractor itself doesn't carry receiver-type info, the
|
||||
// caller patches it in.
|
||||
typed_call_receivers: Vec::new(),
|
||||
}
|
||||
|
|
@ -699,14 +721,14 @@ pub(super) fn summarise_return_predicates(state: &SsaTaintState) -> (u64, u8, u8
|
|||
///
|
||||
/// `return_abstract` is the callee's intrinsic baseline (from the no-seed
|
||||
/// probe). When present, it describes a fact that holds for the return
|
||||
/// regardless of parameter input — so it can be attached as a
|
||||
/// regardless of parameter input, so it can be attached as a
|
||||
/// `Clamped` / `LiteralPrefix` transform to every parameter that flows to
|
||||
/// the return.
|
||||
///
|
||||
/// Identity detection is structural: walk the return values back through
|
||||
/// [`SsaOp::Assign`] / [`SsaOp::Phi`] chains (bounded) and check whether
|
||||
/// every leaf resolves to the same [`SsaOp::Param`]. The trace is cheap
|
||||
/// and can only produce `Identity` for passthrough callees — anything
|
||||
/// and can only produce `Identity` for passthrough callees, anything
|
||||
/// more complex degrades to the baseline fact or `Top`.
|
||||
fn derive_abstract_transfer(
|
||||
ssa: &SsaBody,
|
||||
|
|
@ -780,7 +802,7 @@ fn derive_abstract_transfer(
|
|||
}
|
||||
|
||||
// Derive a baseline-invariant transform from `return_abstract`. This is
|
||||
// the "callee intrinsic" fact that always holds — each parameter that
|
||||
// the "callee intrinsic" fact that always holds, each parameter that
|
||||
// flows to the return gets it attached as the conservative transfer.
|
||||
let baseline_invariant: Option<AbstractTransfer> = return_abstract.map(|av| {
|
||||
let interval = match (av.interval.lo, av.interval.hi) {
|
||||
|
|
@ -805,7 +827,7 @@ fn derive_abstract_transfer(
|
|||
} else if let Some(base) = baseline_invariant.as_ref() {
|
||||
// Baseline intrinsic bound applies to every parameter that could
|
||||
// reach the return. We conservatively attach it to all params
|
||||
// — at apply time the caller meets it with the real return
|
||||
//, at apply time the caller meets it with the real return
|
||||
// abstract (also from this same summary), so double-counting
|
||||
// would collapse to the tighter of the two.
|
||||
transfer = base.clone();
|
||||
|
|
@ -879,7 +901,7 @@ fn infer_summary_return_type(
|
|||
lang: Lang,
|
||||
) -> Option<crate::ssa::type_facts::TypeKind> {
|
||||
// Find blocks with Return terminators, then look at the last defined value
|
||||
// in those blocks — if it's a Call with a known constructor, that's our type.
|
||||
// in those blocks, if it's a Call with a known constructor, that's our type.
|
||||
for block in &ssa.blocks {
|
||||
if !matches!(block.terminator, Terminator::Return(_)) {
|
||||
continue;
|
||||
|
|
@ -965,7 +987,7 @@ pub(crate) fn extract_container_flow_summary(
|
|||
// `trace_to_param` will happily return any `SsaOp::Param { index }`, but
|
||||
// scoped lowering synthesises `Param` ops for external captures (module
|
||||
// imports, free identifiers) at indices beyond the formal parameter count.
|
||||
// Those must not enter the summary — the key's arity only covers formal
|
||||
// Those must not enter the summary, the key's arity only covers formal
|
||||
// params, and an out-of-range index trips `ssa_summary_fits_arity`, forcing
|
||||
// the reconciliation probe to generate a synthetic disambiguator that no
|
||||
// caller will ever look up.
|
||||
|
|
@ -1035,7 +1057,7 @@ pub(crate) fn extract_container_flow_summary(
|
|||
};
|
||||
|
||||
// Trace container to positional param (SelfParam → None, so
|
||||
// when the container is the receiver we skip — the caller
|
||||
// when the container is the receiver we skip, the caller
|
||||
// tracks that via `receiver_to_container_store` if needed).
|
||||
// Same arity filter as above: reject synthetic Param ops that
|
||||
// were injected for free captures.
|
||||
|
|
|
|||
|
|
@ -221,7 +221,7 @@ mod cross_file_tests {
|
|||
mod inline_cache_epoch_tests {
|
||||
//! Hooks for cross-file SCC joint fixed-point iteration.
|
||||
//!
|
||||
//! These do not exercise the full inline pipeline — they lock down the
|
||||
//! These do not exercise the full inline pipeline, they lock down the
|
||||
//! semantic contract of [`inline_cache_clear_epoch`] and
|
||||
//! [`inline_cache_fingerprint`] so the SCC orchestrator can rely on:
|
||||
//!
|
||||
|
|
@ -229,7 +229,7 @@ mod inline_cache_epoch_tests {
|
|||
//! * `fingerprint` is deterministic across equivalent caches (same
|
||||
//! keys → same bytes). Two caches with identical entries produce
|
||||
//! identical fingerprints regardless of insertion order.
|
||||
//! * `fingerprint` changes when return caps change — the signal the
|
||||
//! * `fingerprint` changes when return caps change, the signal the
|
||||
//! orchestrator will use to detect inline-cache convergence.
|
||||
|
||||
use super::super::*;
|
||||
|
|
@ -675,7 +675,7 @@ mod worklist_tests {
|
|||
|
||||
#[test]
|
||||
fn dense_successors_no_duplicates() {
|
||||
// Many successors, some repeated — old O(n) contains() would be slow here
|
||||
// Many successors, some repeated, old O(n) contains() would be slow here
|
||||
let mut wl = VecDeque::new();
|
||||
let mut in_wl = HashSet::new();
|
||||
|
||||
|
|
@ -735,8 +735,8 @@ mod primary_sink_location_tests {
|
|||
//! [`SsaTaintEvent::primary_sink_site`] →
|
||||
//! [`crate::taint::Finding::primary_location`].
|
||||
//!
|
||||
//! The test is deliberately low-level — it wires up synthetic SSA and
|
||||
//! drives the three emission stages directly — so any future refactor
|
||||
//! The test is deliberately low-level, it wires up synthetic SSA and
|
||||
//! drives the three emission stages directly, so any future refactor
|
||||
//! that drops the site on the floor between stages fails here rather
|
||||
//! than only at the corpus/benchmark layer.
|
||||
use super::super::*;
|
||||
|
|
@ -841,7 +841,7 @@ mod primary_sink_location_tests {
|
|||
/// If this fails, something on the summary→event→finding path
|
||||
/// (`pick_primary_sink_sites`, `emit_ssa_taint_events`, or
|
||||
/// `ssa_events_to_findings`) has silently stopped forwarding
|
||||
/// coordinates. Fixing that path — not this test — is the right
|
||||
/// coordinates. Fixing that path, not this test, is the right
|
||||
/// response.
|
||||
#[test]
|
||||
fn ssa_summary_sinksite_surfaces_as_finding_primary_location() {
|
||||
|
|
@ -863,7 +863,7 @@ mod primary_sink_location_tests {
|
|||
};
|
||||
|
||||
// Drive the three emission stages with the summary's own
|
||||
// `param_to_sink` — that is what summary resolution feeds in the
|
||||
// `param_to_sink`, that is what summary resolution feeds in the
|
||||
// real pipeline.
|
||||
let tainted: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)> = vec![(
|
||||
SsaValue(0),
|
||||
|
|
@ -944,7 +944,7 @@ mod goto_succ_propagation_tests {
|
|||
|
||||
#[test]
|
||||
fn goto_propagates_to_every_succ_on_three_way_collapse() {
|
||||
// Build a block with Terminator::Goto(1) but succs = [1, 2, 3] — the
|
||||
// Build a block with Terminator::Goto(1) but succs = [1, 2, 3], the
|
||||
// shape lowering emits for a 3-way fanout.
|
||||
let block = SsaBlock {
|
||||
id: BlockId(0),
|
||||
|
|
@ -1001,7 +1001,7 @@ mod goto_succ_propagation_tests {
|
|||
pointer_facts: None,
|
||||
};
|
||||
|
||||
// A non-bottom exit state — the test only cares that *every* succ
|
||||
// A non-bottom exit state, the test only cares that *every* succ
|
||||
// receives a clone of it, so any distinguishable state works.
|
||||
let mut exit_state = SsaTaintState::initial();
|
||||
exit_state.values.push((
|
||||
|
|
@ -1259,7 +1259,7 @@ mod goto_succ_propagation_tests {
|
|||
fn is_path_safe_for_sink_unknown_axis_returns_false() {
|
||||
use crate::abstract_interp::PathFact;
|
||||
|
||||
// Only dotdot is cleared — absolute stays Maybe → not path-safe.
|
||||
// Only dotdot is cleared, absolute stays Maybe → not path-safe.
|
||||
let half_fact = PathFact::default().with_dotdot_cleared();
|
||||
assert!(!half_fact.is_path_safe());
|
||||
}
|
||||
|
|
@ -1328,9 +1328,9 @@ mod goto_succ_propagation_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Phase 4.2: receiver_candidates_for_type_lookup walks FieldProj ──────
|
||||
// ── receiver_candidates_for_type_lookup walks FieldProj ──────
|
||||
//
|
||||
// After Phase 2 SSA decomposition, `c.client.send(req)` lowers to
|
||||
// After SSA decomposition, `c.client.send(req)` lowers to
|
||||
// v_c = Param("c", 0)
|
||||
// v_client = FieldProj(v_c, "client")
|
||||
// v_call = Call("send", receiver: v_client, args: [v_req])
|
||||
|
|
@ -1430,7 +1430,7 @@ mod receiver_candidates_field_proj_tests {
|
|||
fn field_proj_receiver_walks_to_typed_root_in_go() {
|
||||
// Go is not Rust, so pre-Phase-4 the candidate walk would have
|
||||
// returned ONLY the immediate receiver (v2 = FieldProj). With
|
||||
// Phase 4 we walk through FieldProj.receiver to recover v0 (the
|
||||
// We walk through FieldProj.receiver to recover v0 (the
|
||||
// typed root `c`).
|
||||
let body = body_with_field_proj_chain();
|
||||
let cands =
|
||||
|
|
@ -1516,7 +1516,7 @@ mod receiver_candidates_field_proj_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Phase 6 hierarchy fan-out: ResolvedSummary union semantics ──────────
|
||||
// ── Hierarchy: ResolvedSummary union semantics ──────────
|
||||
//
|
||||
// `merge_resolved_summaries_fanout` is invoked at virtual-dispatch call
|
||||
// sites where the receiver's static type has multiple concrete
|
||||
|
|
@ -1553,7 +1553,7 @@ mod fanout_merge_tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// B1 — caps that grow taint signal (source/sink/receiver_to_sink)
|
||||
/// B1, caps that grow taint signal (source/sink/receiver_to_sink)
|
||||
/// are unioned. sanitizer_caps are intersected so only bits
|
||||
/// stripped by EVERY implementer count as cleared at the call site.
|
||||
#[test]
|
||||
|
|
@ -1581,7 +1581,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B2 — propagates_taint is OR'd; propagating_params is the union
|
||||
/// B2, propagates_taint is OR'd; propagating_params is the union
|
||||
/// (any implementer's propagator counts).
|
||||
#[test]
|
||||
fn merge_propagation_unions() {
|
||||
|
|
@ -1600,7 +1600,7 @@ mod fanout_merge_tests {
|
|||
assert_eq!(params, vec![0, 1, 2]);
|
||||
}
|
||||
|
||||
/// B3 — param_to_sink merges per-parameter caps (OR). An impl
|
||||
/// B3, param_to_sink merges per-parameter caps (OR). An impl
|
||||
/// that adds a sink at param N composes with another impl that
|
||||
/// adds a different cap at the same N.
|
||||
#[test]
|
||||
|
|
@ -1630,7 +1630,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B4 — param_to_sink_sites merges per-parameter site lists with
|
||||
/// B4, param_to_sink_sites merges per-parameter site lists with
|
||||
/// PartialEq dedup. The same site appearing in both impls (e.g.
|
||||
/// inherited definition) must not be reported twice.
|
||||
#[test]
|
||||
|
|
@ -1675,7 +1675,7 @@ mod fanout_merge_tests {
|
|||
assert!(sites.iter().any(|s| s == &unique_b));
|
||||
}
|
||||
|
||||
/// B5 — SSA-precision fields are dropped on disagreement. Two
|
||||
/// B5, SSA-precision fields are dropped on disagreement. Two
|
||||
/// summaries with different `return_type` collapse to None;
|
||||
/// agreement is preserved.
|
||||
#[test]
|
||||
|
|
@ -1704,7 +1704,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B6 — abstract_transfer + param_return_paths drop on
|
||||
/// B6, abstract_transfer + param_return_paths drop on
|
||||
/// disagreement (precise predicate-path data is not safely
|
||||
/// composable across distinct function bodies).
|
||||
#[test]
|
||||
|
|
@ -1737,7 +1737,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B7 — empty + empty = empty (no panic on degenerate inputs).
|
||||
/// B7, empty + empty = empty (no panic on degenerate inputs).
|
||||
#[test]
|
||||
fn merge_empties_is_identity() {
|
||||
let m = merge_resolved_summaries_fanout(empty(), empty());
|
||||
|
|
@ -1748,7 +1748,7 @@ mod fanout_merge_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 3 / W1: synthetic field-WRITE round-trip ──────────────
|
||||
//── synthetic field-WRITE round-trip ──────────────
|
||||
//
|
||||
// SSA lowering populates `SsaBody.field_writes` with entries that lift a
|
||||
// synthetic base-update Assign (`obj.f = rhs`) into a structural field
|
||||
|
|
@ -1918,8 +1918,8 @@ mod field_write_tests {
|
|||
crate::pointer::analyse_body(body, crate::cfg::BodyId(7))
|
||||
}
|
||||
|
||||
/// Reuse `make_cfg`'s nodes — the body's instructions all reference
|
||||
/// them — so `transfer_inst` can index `cfg[cfg_node]`.
|
||||
/// Reuse `make_cfg`'s nodes, the body's instructions all reference
|
||||
/// them, so `transfer_inst` can index `cfg[cfg_node]`.
|
||||
fn drive(body: &SsaBody, pf: &PointsToFacts) -> SsaTaintState {
|
||||
// We need a CFG that contains the bodies' cfg_nodes.
|
||||
let (cfg, _, _, _, _) = make_cfg();
|
||||
|
|
@ -1998,7 +1998,7 @@ mod field_write_tests {
|
|||
|
||||
/// Pointer-disabled run (`pointer_facts: None`): no field cell is
|
||||
/// recorded, no taint flows through the `obj.cache` projection. The
|
||||
/// strict-additive contract — pointer-disabled behaviour is the
|
||||
/// strict-additive contract, pointer-disabled behaviour is the
|
||||
/// pre-W1 baseline.
|
||||
#[test]
|
||||
fn pointer_disabled_run_produces_no_field_taint() {
|
||||
|
|
@ -2047,8 +2047,8 @@ mod field_write_tests {
|
|||
state.field_taint.is_empty(),
|
||||
"pointer-disabled run must not populate field_taint",
|
||||
);
|
||||
// FieldProj reads still produce the receiver's existing taint —
|
||||
// none — so no entry for SsaValue(3) either.
|
||||
// FieldProj reads still produce the receiver's existing taint ,
|
||||
// none, so no entry for SsaValue(3) either.
|
||||
assert!(state.get(SsaValue(3)).is_none());
|
||||
let _ = cache_id;
|
||||
}
|
||||
|
|
@ -2059,7 +2059,7 @@ mod field_write_tests {
|
|||
/// projected value's symbol-level `validated_must` from the cell.
|
||||
///
|
||||
/// This is the key invariant: validation flows *through* abstract
|
||||
/// field identity — the read recovers what the write recorded.
|
||||
/// field identity, the read recovers what the write recorded.
|
||||
#[test]
|
||||
fn write_then_read_preserves_validated_must() {
|
||||
let (body, cache_id) = make_body();
|
||||
|
|
@ -2208,7 +2208,7 @@ mod field_write_tests {
|
|||
},
|
||||
};
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
|
||||
// v0 is Const → empty pt — the hook should not insert anything.
|
||||
// v0 is Const → empty pt, the hook should not insert anything.
|
||||
assert!(
|
||||
pf.pt(SsaValue(0)).is_empty(),
|
||||
"Const value should have empty pt set",
|
||||
|
|
@ -2259,7 +2259,7 @@ mod field_write_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 4 / W2: container ELEM write/read round-trip ──────────
|
||||
//── container ELEM write/read round-trip ──────────
|
||||
//
|
||||
// Container methods like `arr.push(v)` / `arr.shift()` flow per-element
|
||||
// taint through the `Field(_, ELEM)` cells on `SsaTaintState`. These
|
||||
|
|
@ -2351,7 +2351,7 @@ mod container_elem_tests {
|
|||
state
|
||||
}
|
||||
|
||||
/// `arr.push(source()); arr.shift()` — the read picks the source's
|
||||
/// `arr.push(source()); arr.shift()`, the read picks the source's
|
||||
/// caps up via the ELEM cell.
|
||||
#[test]
|
||||
fn container_write_then_read_round_trips_taint() {
|
||||
|
|
@ -2456,7 +2456,7 @@ mod container_elem_tests {
|
|||
);
|
||||
|
||||
// Drive the transfer. `e := arr.shift()` goes through the
|
||||
// existing Call arm — the W2 path is the *write* on `push`.
|
||||
// existing Call arm, the W2 path is the *write* on `push`.
|
||||
// The element-read side already exists on `analyse_body`; the
|
||||
// taint engine doesn't yet read field cells through call-result
|
||||
// paths (Call args are walked by Call's own argument-taint
|
||||
|
|
@ -2482,7 +2482,7 @@ mod container_elem_tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// W4: `arr.push(validate(src)); arr.shift()` — the push records
|
||||
/// W4: `arr.push(validate(src)); arr.shift()`, the push records
|
||||
/// `validated_must = true` on the ELEM cell because the pushed
|
||||
/// value's symbol carried `validated_must`. The shift call result
|
||||
/// reads through the cell and seeds the result symbol's
|
||||
|
|
@ -2761,7 +2761,7 @@ mod container_elem_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 5 / W3: cross-call field-points-to application ────────
|
||||
//── cross-call field-points-to application ────────
|
||||
//
|
||||
// `apply_field_points_to_writes` is the resolver-side hook that turns
|
||||
// callee-summary `field_points_to.param_field_writes` into caller-side
|
||||
|
|
@ -2783,7 +2783,7 @@ mod cross_call_field_tests {
|
|||
use smallvec::smallvec;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// W3 / W4: shared empty interner — these unit tests don't seed
|
||||
/// W3 / W4: shared empty interner, these unit tests don't seed
|
||||
/// validation bits, so a fresh interner is sufficient for the
|
||||
/// `interner` parameter on `apply_field_points_to_writes`.
|
||||
fn empty_interner() -> SymbolInterner {
|
||||
|
|
@ -2861,23 +2861,23 @@ mod cross_call_field_tests {
|
|||
state
|
||||
}
|
||||
|
||||
/// Callee summary with `param_field_writes[(0, ["cache"])]` —
|
||||
/// Callee summary with `param_field_writes[(0, ["cache"])]` ,
|
||||
/// "callee writes cache field on parameter 0 (obj)".
|
||||
/// Caller passes `(obj, source)` to this callee — `arg 0 = obj`,
|
||||
/// Caller passes `(obj, source)` to this callee, `arg 0 = obj`,
|
||||
/// but the W3 hook resolves the *value at arg position 0* as the
|
||||
/// receiver of the field write, populating its pt's cells.
|
||||
///
|
||||
/// We model the caller as `callee(obj, source)` with arg 0 = obj
|
||||
/// (the receiver) and arg 1 = source (the value being written).
|
||||
/// The callee's signature is `fn store(obj, value) { obj.cache = value; }`
|
||||
/// — so the field write on param 0 is keyed by `pt(obj)` and the
|
||||
///, so the field write on param 0 is keyed by `pt(obj)` and the
|
||||
/// taint comes from arg 1's caps. Our helper conservatively unions
|
||||
/// every arg's taint into the cell — which over-tints (for this
|
||||
/// every arg's taint into the cell, which over-tints (for this
|
||||
/// shape, arg 0's pt member becomes the loc, with arg 0's own taint
|
||||
/// applied), but is sound.
|
||||
///
|
||||
/// To make the test precise, we model the simpler shape `fn store(obj)
|
||||
/// { obj.cache = source(); }` — callee writes a literal source into
|
||||
/// { obj.cache = source(); }`, callee writes a literal source into
|
||||
/// `obj.cache`, with no value parameter. Then the caller-side hook
|
||||
/// only sees param 0's taint (zero), so the cell is empty and the
|
||||
/// test fails.
|
||||
|
|
@ -2886,7 +2886,7 @@ mod cross_call_field_tests {
|
|||
/// at the call site arg 0 carries source taint. The hook then
|
||||
/// records (pt(arg0_value), cache) ← arg0_value's taint. In a
|
||||
/// real callee this corresponds to "callee writes its parameter
|
||||
/// value into a self.cache field internally" — but the spread we
|
||||
/// value into a self.cache field internally", but the spread we
|
||||
/// validate is just substitute-and-mirror.
|
||||
#[test]
|
||||
fn cross_call_writes_into_param_field_cell() {
|
||||
|
|
@ -2947,7 +2947,7 @@ mod cross_call_field_tests {
|
|||
fn cross_call_receiver_field_uses_max_sentinel() {
|
||||
let (body, cache_id, pf) = caller_body();
|
||||
let mut state = SsaTaintState::initial();
|
||||
// Seed receiver with taint — SsaValue(0) is the param/receiver.
|
||||
// Seed receiver with taint, SsaValue(0) is the param/receiver.
|
||||
state.set(
|
||||
SsaValue(0),
|
||||
VarTaint {
|
||||
|
|
@ -3026,7 +3026,7 @@ mod cross_call_field_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Field names the caller never interned are skipped silently —
|
||||
/// Field names the caller never interned are skipped silently ,
|
||||
/// no FieldProj read in the caller could observe such a cell.
|
||||
#[test]
|
||||
fn cross_call_unknown_field_name_skipped() {
|
||||
|
|
@ -3062,7 +3062,7 @@ mod cross_call_field_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Overflow summary is treated conservatively as no-op — the
|
||||
/// Overflow summary is treated conservatively as no-op, the
|
||||
/// engine cannot soundly cell-flood, so it skips entirely.
|
||||
#[test]
|
||||
fn cross_call_overflow_summary_is_noop() {
|
||||
|
|
@ -3117,7 +3117,7 @@ mod cross_call_field_tests {
|
|||
//
|
||||
// `SsaTaintState.add_field` already routes through `merge_origins`, but
|
||||
// the FieldProj READ path used to walk the cell's origins inline,
|
||||
// deduping by node only — meaning a cell with N>cap origins surfaced
|
||||
// deduping by node only, meaning a cell with N>cap origins surfaced
|
||||
// all N to the projected SSA value. After A7, the read path uses
|
||||
// `push_origin_bounded`, ensuring the cap-driven survivor selection
|
||||
// applies on read too.
|
||||
|
|
@ -3225,7 +3225,7 @@ mod field_taint_origin_cap_tests {
|
|||
let (body, cache_id, cfg, _n_proj) = build_body();
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
|
||||
|
||||
// Pre-populate the (Param, cache) cell with 4 origins —
|
||||
// Pre-populate the (Param, cache) cell with 4 origins ,
|
||||
// 2× the cap. The `add_field` path already truncates via
|
||||
// `merge_origins`, so we go through it 4 times to grow.
|
||||
let mut state = SsaTaintState::initial();
|
||||
|
|
@ -3326,14 +3326,14 @@ mod field_taint_origin_cap_tests {
|
|||
// the field_taint cells.
|
||||
//
|
||||
// Two scenarios:
|
||||
// 1. `must_validated_flows_through_join` — both predecessor blocks
|
||||
// 1. `must_validated_flows_through_join`, both predecessor blocks
|
||||
// write the cell with `validated_must = true`. After the join, the
|
||||
// cell at the read site retains `validated_must = true` (AND
|
||||
// intersection of two `true`s).
|
||||
// 2. `early_exit_branch_drops_validated_must` — only one predecessor
|
||||
// 2. `early_exit_branch_drops_validated_must`, only one predecessor
|
||||
// writes; the other reaches the read block via an empty branch.
|
||||
// After the join, the cell has `validated_must = false`,
|
||||
// `validated_may = true` — W4's must/may intersection in action.
|
||||
// `validated_may = true`, W4's must/may intersection in action.
|
||||
#[cfg(test)]
|
||||
mod pointer_lattice_worklist_tests {
|
||||
use super::super::*;
|
||||
|
|
@ -3425,7 +3425,7 @@ mod pointer_lattice_worklist_tests {
|
|||
succs: smallvec![BlockId(1), BlockId(2)],
|
||||
};
|
||||
|
||||
// Block 1: synth `obj.cache = src` — field_writes[v2] = (v0, cache_id)
|
||||
// Block 1: synth `obj.cache = src`, field_writes[v2] = (v0, cache_id)
|
||||
let block1 = SsaBlock {
|
||||
id: BlockId(1),
|
||||
phis: vec![],
|
||||
|
|
@ -3441,7 +3441,7 @@ mod pointer_lattice_worklist_tests {
|
|||
succs: smallvec![BlockId(3)],
|
||||
};
|
||||
|
||||
// Block 2: identical synth write — keeps both branches
|
||||
// Block 2: identical synth write, keeps both branches
|
||||
// contributing the same cell so AND-intersection of must
|
||||
// preserves true on the join.
|
||||
let block2 = SsaBlock {
|
||||
|
|
@ -3459,7 +3459,7 @@ mod pointer_lattice_worklist_tests {
|
|||
succs: smallvec![BlockId(3)],
|
||||
};
|
||||
|
||||
// Block 3: read — FieldProj uses obj from a phi between B1 and B2.
|
||||
// Block 3: read, FieldProj uses obj from a phi between B1 and B2.
|
||||
let block3 = SsaBlock {
|
||||
id: BlockId(3),
|
||||
phis: vec![SsaInst {
|
||||
|
|
@ -3634,7 +3634,7 @@ mod pointer_lattice_worklist_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// A2.b: early-exit branch — only B1 writes, B2 reaches B3 via
|
||||
/// A2.b: early-exit branch, only B1 writes, B2 reaches B3 via
|
||||
/// an empty body. After the join, the cell exists (B1 wrote
|
||||
/// it), but `validated_must` is `false` (B2 didn't write, the
|
||||
/// orphan-side merge clears `must` per the W4 lattice rule);
|
||||
|
|
@ -3642,7 +3642,7 @@ mod pointer_lattice_worklist_tests {
|
|||
///
|
||||
/// To exercise the validation channels we synthesise the cell
|
||||
/// directly at the appropriate exit state, then run the
|
||||
/// worklist's join via two `SsaTaintState::join()` calls — the
|
||||
/// worklist's join via two `SsaTaintState::join()` calls, the
|
||||
/// body's worklist itself doesn't seed `validated_must` on the
|
||||
/// rhs of an Assign, so we model the "writer recorded must=true"
|
||||
/// scenario at the lattice level rather than driving it through
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue