mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-18 20:15:14 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -11,31 +11,10 @@ use std::hash::{Hash, Hasher};
|
|||
|
||||
// ── Sink site (primary sink-location attribution) ───────────────────────
|
||||
|
||||
/// A single dangerous-instruction site recorded inside a function's body.
|
||||
///
|
||||
/// `SinkSite` pairs a [`Cap`] (the bits this particular site consumes) with
|
||||
/// the file-relative source location of the instruction that consumes them.
|
||||
/// Carrying this alongside a summary's `param_to_sink` map lets cross-file
|
||||
/// findings attribute the finding line to the actual dangerous call inside
|
||||
/// the callee, rather than to the caller's call-site (which is all a
|
||||
/// bare `(param_idx, Cap)` pair could support).
|
||||
///
|
||||
/// Primary sink-location attribution stores this data in the summary so
|
||||
/// `build_taint_diag()` can consume it and overwrite the caller-site
|
||||
/// `Finding.line` when the sink was resolved via summary.
|
||||
///
|
||||
/// Fields
|
||||
/// ──────
|
||||
/// * `file_rel` — the callee file's path relative to the workspace root
|
||||
/// being scanned. Matches the `FuncKey::namespace` convention so the
|
||||
/// site's origin is addressable without additional workspace context.
|
||||
/// * `line` / `col` — 1-based source coordinates of the sink instruction.
|
||||
/// `0` indicates the extractor could not resolve coordinates (e.g. a
|
||||
/// pass-2 transient summary without tree access).
|
||||
/// * `snippet` — the trimmed source line, capped at 120 characters, empty
|
||||
/// when coordinates could not be resolved.
|
||||
/// * `cap` — the [`Cap`] bits this specific site consumes. A parameter's
|
||||
/// total sink caps is the union across every site associated with it.
|
||||
/// A single dangerous-instruction site inside a function's body.
|
||||
/// Pairs a [`Cap`] with the source location of the consuming
|
||||
/// instruction so cross-file findings can attribute to the callee
|
||||
/// rather than the caller call-site.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct SinkSite {
|
||||
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||
|
|
@ -50,19 +29,14 @@ pub struct SinkSite {
|
|||
}
|
||||
|
||||
impl SinkSite {
|
||||
/// Dedup key comparing the full identity of a site. Two sites with the
|
||||
/// same `(file_rel, line, col, cap)` describe the same consumption of
|
||||
/// the same bits at the same source location and should collapse when
|
||||
/// summaries are merged.
|
||||
/// Dedup key: two sites with the same `(file_rel, line, col, cap)`
|
||||
/// describe the same consumption and collapse on merge.
|
||||
pub(crate) fn dedup_key(&self) -> (&str, u32, u32, u16) {
|
||||
(self.file_rel.as_str(), self.line, self.col, self.cap.bits())
|
||||
}
|
||||
|
||||
/// Build a site that only carries a [`Cap`] — no resolved source
|
||||
/// coordinates. Used by extraction paths that have no tree/bytes
|
||||
/// context (e.g. pass-2 transient summaries), so downstream consumers
|
||||
/// unioning caps across sites still see the correct bits even when
|
||||
/// primary-location attribution is not available.
|
||||
/// Build a cap-only site for extraction paths with no tree/bytes
|
||||
/// context (pass-2 transient summaries).
|
||||
pub fn cap_only(cap: Cap) -> Self {
|
||||
Self {
|
||||
file_rel: String::new(),
|
||||
|
|
@ -75,13 +49,8 @@ impl SinkSite {
|
|||
}
|
||||
|
||||
/// Tree/bytes context for resolving a CFG span to a [`SinkSite`].
|
||||
///
|
||||
/// Summary extraction runs deep inside the taint engine, far from the
|
||||
/// `ParsedFile` that owns the tree; `SinkSiteLocator` is the narrow
|
||||
/// reference bundle the extractor needs to populate `SinkSite.line`,
|
||||
/// `col`, and `snippet`. The struct is intentionally plain references
|
||||
/// so construction is free and threading it as `Option<&Locator>` is
|
||||
/// cheap.
|
||||
/// Threaded as `Option<&Locator>` so extraction paths without tree
|
||||
/// access can pass `None` cheaply.
|
||||
pub struct SinkSiteLocator<'a> {
|
||||
pub tree: &'a tree_sitter::Tree,
|
||||
pub bytes: &'a [u8],
|
||||
|
|
@ -89,10 +58,8 @@ pub struct SinkSiteLocator<'a> {
|
|||
}
|
||||
|
||||
impl<'a> SinkSiteLocator<'a> {
|
||||
/// Resolve a `(start_byte, end_byte)` span to a [`SinkSite`] with the
|
||||
/// given `cap`. Coordinates fall back to `(0, 0)` and the snippet to
|
||||
/// empty when the byte offset is out of range (should not happen for
|
||||
/// spans that came from the same tree).
|
||||
/// Resolve a span to a [`SinkSite`]. Coordinates fall back to
|
||||
/// `(0, 0)` and the snippet to empty when out of range.
|
||||
pub fn site_for_span(&self, span: (usize, usize), cap: Cap) -> SinkSite {
|
||||
let byte = span.0;
|
||||
let point = self
|
||||
|
|
@ -148,7 +115,7 @@ pub(crate) fn union_param_sink_sites(
|
|||
/// Real disambigs come from `tree_sitter::Node::start_byte` (see
|
||||
/// `cfg.rs:fn_disambig`), which is a byte offset into the source file.
|
||||
/// Source files in practice are far below 2 GiB, so bit 31 of a real
|
||||
/// disambig is always zero — setting it marks a value as synthetic and
|
||||
/// disambig is always zero, setting it marks a value as synthetic and
|
||||
/// keeps it in a disjoint namespace from byte-offset disambigs.
|
||||
const SYNTHETIC_DISAMBIG_BIT: u32 = 0x8000_0000;
|
||||
|
||||
|
|
@ -160,17 +127,17 @@ const SYNTHETIC_DISAMBIG_BIT: u32 = 0x8000_0000;
|
|||
/// to disambiguate same-name overloads and method calls at resolution time
|
||||
/// without having to re-parse the raw callee string.
|
||||
///
|
||||
/// * `name` — the raw callee text as it appeared in source
|
||||
/// * `name`, the raw callee text as it appeared in source
|
||||
/// (`"obj.method"`, `"env::var"`, `"helper"`). Preserved for diagnostics.
|
||||
/// * `arity` — number of positional arguments at the call site. `None`
|
||||
/// * `arity`, number of positional arguments at the call site. `None`
|
||||
/// when splats / keyword-args / rest-params make the count unreliable.
|
||||
/// * `receiver` — structured receiver identifier for method calls
|
||||
/// * `receiver`, structured receiver identifier for method calls
|
||||
/// (e.g. `"obj"` in `obj.method()`). Carries the root receiver for
|
||||
/// chained calls; `None` for non-method or complex receivers.
|
||||
/// * `qualifier` — the segment immediately before the leaf for non-method
|
||||
/// * `qualifier`, the segment immediately before the leaf for non-method
|
||||
/// qualified calls (e.g. `"env"` in `env::var`). Extracted once at CFG
|
||||
/// time rather than re-parsed downstream.
|
||||
/// * `ordinal` — the per-function call ordinal matching
|
||||
/// * `ordinal`, the per-function call ordinal matching
|
||||
/// `CallMeta.call_ordinal`, allowing cross-file consumers to address a
|
||||
/// specific call site rather than just a callee name.
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Hash)]
|
||||
|
|
@ -293,15 +260,15 @@ pub struct FuncSummary {
|
|||
|
||||
// ── Taint behaviour ──────────────────────────────────────────────────
|
||||
// Stored as raw `u16` so serde doesn't need to know about `bitflags`.
|
||||
/// Caps this function **introduces** — i.e. the return value carries
|
||||
/// Caps this function **introduces**, i.e. the return value carries
|
||||
/// freshly‑tainted data even if no argument was tainted.
|
||||
pub source_caps: u16,
|
||||
|
||||
/// Caps this function **cleans** — passing tainted data through this
|
||||
/// Caps this function **cleans**, passing tainted data through this
|
||||
/// function strips the corresponding bits.
|
||||
pub sanitizer_caps: u16,
|
||||
|
||||
/// Caps this function **consumes unsafely** — calling it with tainted
|
||||
/// Caps this function **consumes unsafely**, calling it with tainted
|
||||
/// arguments that still carry these bits is a finding.
|
||||
pub sink_caps: u16,
|
||||
|
||||
|
|
@ -309,7 +276,7 @@ pub struct FuncSummary {
|
|||
#[serde(default)]
|
||||
pub propagating_params: Vec<usize>,
|
||||
|
||||
/// Legacy field — kept only for deserialising old JSON from SQLite.
|
||||
/// Legacy field, kept only for deserialising old JSON from SQLite.
|
||||
/// New code should use `propagating_params` instead.
|
||||
#[serde(default, skip_serializing)]
|
||||
pub propagates_taint: bool,
|
||||
|
|
@ -317,7 +284,7 @@ pub struct FuncSummary {
|
|||
/// Indices of parameters that flow to internal sinks (0‑based).
|
||||
pub tainted_sink_params: Vec<usize>,
|
||||
|
||||
/// Per-parameter [`SinkSite`] records — mirrors
|
||||
/// Per-parameter [`SinkSite`] records, mirrors
|
||||
/// [`SsaFuncSummary::param_to_sink`] so the coarse legacy summary also
|
||||
/// carries primary sink-location attribution through the two-pass
|
||||
/// architecture. Empty when the extractor lacked tree access.
|
||||
|
|
@ -394,7 +361,7 @@ pub struct FuncSummary {
|
|||
///
|
||||
/// Empty for files with no declared inheritance / impl
|
||||
/// relationships and for Go (which uses implicit interface
|
||||
/// satisfaction — Phase 6 does not try to compute it).
|
||||
/// satisfaction, not computed).
|
||||
///
|
||||
/// **Per-file duplication.** Every `FuncSummary` produced from a
|
||||
/// given file carries the **same** `hierarchy_edges` vector so the
|
||||
|
|
@ -457,7 +424,7 @@ pub enum CalleeResolution {
|
|||
Resolved(FuncKey),
|
||||
/// No candidates found at all.
|
||||
NotFound,
|
||||
/// Multiple candidates — ambiguous, cannot pick one.
|
||||
/// Multiple candidates, ambiguous, cannot pick one.
|
||||
Ambiguous(Vec<FuncKey>),
|
||||
}
|
||||
|
||||
|
|
@ -470,19 +437,19 @@ pub enum CalleeResolution {
|
|||
///
|
||||
/// Hint categories, ordered from strongest to weakest:
|
||||
///
|
||||
/// * `receiver_type` — authoritative class/impl/module name (e.g. from
|
||||
/// * `receiver_type`, authoritative class/impl/module name (e.g. from
|
||||
/// type inference or a `use ...` resolution). When set, the resolver
|
||||
/// *requires* the callee's container to equal this name and refuses to
|
||||
/// fall back to a leaf-name collision if the qualified lookup misses.
|
||||
/// * `namespace_qualifier` — syntactic qualifier parsed from the callee
|
||||
/// * `namespace_qualifier`, syntactic qualifier parsed from the callee
|
||||
/// (e.g. `"env"` in `env::var`, `"http"` in `http.Get`). Treated as a
|
||||
/// container hint but not authoritative: a miss falls through.
|
||||
/// * `receiver_var` — syntactic receiver variable name (e.g. `"obj"` in
|
||||
/// * `receiver_var`, syntactic receiver variable name (e.g. `"obj"` in
|
||||
/// `obj.method()`). Soft hint, used only to tie-break ambiguity.
|
||||
/// * `caller_container` — caller's own enclosing container, used to
|
||||
/// * `caller_container`, caller's own enclosing container, used to
|
||||
/// resolve bare self-calls inside a class/impl body.
|
||||
///
|
||||
/// `arity` is a hard filter — when `Some`, every candidate whose arity
|
||||
/// `arity` is a hard filter, when `Some`, every candidate whose arity
|
||||
/// differs is excluded from consideration.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CalleeQuery<'a> {
|
||||
|
|
@ -502,7 +469,7 @@ pub struct CalleeQuery<'a> {
|
|||
/// `std::env::var` in Rust the caller passes `"env"`; for `http.Get`
|
||||
/// in Go, `"http"`. Left `None` for purely bare calls.
|
||||
pub namespace_qualifier: Option<&'a str>,
|
||||
/// Syntactic receiver variable name. Used only as a tie-breaker — a
|
||||
/// Syntactic receiver variable name. Used only as a tie-breaker, a
|
||||
/// variable name is a weak proxy for a class name.
|
||||
pub receiver_var: Option<&'a str>,
|
||||
/// Positional-argument count at the call site. Hard filter when set.
|
||||
|
|
@ -527,14 +494,14 @@ impl<'a> CalleeQuery<'a> {
|
|||
///
|
||||
/// Functions are partitioned by language + namespace + name + arity. Two
|
||||
/// functions with the same bare name but different languages or namespaces
|
||||
/// are stored separately — no implicit cross-language merging occurs.
|
||||
/// are stored separately, no implicit cross-language merging occurs.
|
||||
///
|
||||
/// A secondary index `(Lang, name)` supports fast lookup by language + name
|
||||
/// for same-language resolution in the taint engine.
|
||||
#[derive(Default)]
|
||||
pub struct GlobalSummaries {
|
||||
by_key: HashMap<FuncKey, FuncSummary>,
|
||||
/// Bare leaf-name index — kept for compatibility with callers that only
|
||||
/// Bare leaf-name index, kept for compatibility with callers that only
|
||||
/// see an unqualified call string. A single name may map to many keys
|
||||
/// across containers / files / arities.
|
||||
by_lang_name: HashMap<(Lang, String), Vec<FuncKey>>,
|
||||
|
|
@ -548,7 +515,7 @@ pub struct GlobalSummaries {
|
|||
/// `module_path` set. Used by use-map driven resolution to look up
|
||||
/// candidates by their crate-relative module rather than their
|
||||
/// filesystem path. Same name / module / arity overloads land on the
|
||||
/// same vector — arity narrowing happens at resolution time.
|
||||
/// same vector, arity narrowing happens at resolution time.
|
||||
by_rust_module: HashMap<(String, String), Vec<FuncKey>>,
|
||||
/// Precise SSA-derived per-parameter summaries, keyed by `FuncKey`.
|
||||
/// These take precedence over `FuncSummary` during callee resolution.
|
||||
|
|
@ -562,14 +529,14 @@ pub struct GlobalSummaries {
|
|||
/// pass 1 and consumed by
|
||||
/// [`crate::auth_analysis::run_auth_analysis`] during pass 2.
|
||||
auth_by_key: HashMap<FuncKey, crate::auth_analysis::model::AuthCheckSummary>,
|
||||
/// Phase 6 type hierarchy index for runtime virtual-dispatch fan-out.
|
||||
/// Type hierarchy index for runtime virtual-dispatch fan-out.
|
||||
///
|
||||
/// Installed by [`Self::install_hierarchy`] after pass 1 from the
|
||||
/// merged `FuncSummary::hierarchy_edges` vectors. Consumed by
|
||||
/// [`Self::resolve_callee_widened`] during pass 2 so the taint
|
||||
/// engine sees every concrete implementer of a method when the
|
||||
/// receiver is statically typed as a super-class / trait /
|
||||
/// interface — recovering the dispatch precision that today's
|
||||
/// interface, recovering the dispatch precision that today's
|
||||
/// single-result [`Self::resolve_callee`] discards.
|
||||
///
|
||||
/// `None` until installed: every consumer treats `None` as
|
||||
|
|
@ -590,7 +557,7 @@ impl GlobalSummaries {
|
|||
/// Identity collisions are extraordinarily rare in practice (they
|
||||
/// require two structurally distinct functions to land on the same
|
||||
/// non-synthetic key, e.g. both with `disambig: None`). The loop
|
||||
/// bound is defensive — if synthetic probing still collides after
|
||||
/// bound is defensive, if synthetic probing still collides after
|
||||
/// 1024 attempts we fall through and let the caller merge, which
|
||||
/// degrades gracefully to the old behaviour rather than looping
|
||||
/// forever.
|
||||
|
|
@ -619,12 +586,12 @@ impl GlobalSummaries {
|
|||
/// SSA-summary variant of [`Self::reconcile_func_summary_key`].
|
||||
///
|
||||
/// Distinctness signals for SSA summaries are weaker than for
|
||||
/// coarse `FuncSummary`s — the summary itself carries no explicit
|
||||
/// coarse `FuncSummary`s, the summary itself carries no explicit
|
||||
/// `param_count`, only references to parameter indices. We combine:
|
||||
///
|
||||
/// * **Key arity fit** — any parameter index referenced by the new
|
||||
/// * **Key arity fit**, any parameter index referenced by the new
|
||||
/// summary that exceeds `key.arity` is a structural mismatch.
|
||||
/// * **Existing-entry compare** — if an entry already lives at
|
||||
/// * **Existing-entry compare**, if an entry already lives at
|
||||
/// this key and it disagrees on the set of referenced parameter
|
||||
/// indices, the two cannot both describe the same function.
|
||||
fn reconcile_ssa_summary_key(&self, mut key: FuncKey, summary: &SsaFuncSummary) -> FuncKey {
|
||||
|
|
@ -856,7 +823,7 @@ impl GlobalSummaries {
|
|||
pub fn merge(&mut self, other: GlobalSummaries) {
|
||||
// `insert` rebuilds every secondary index (by_lang_name, by_lang_qualified,
|
||||
// by_rust_module) from the summary itself, so we do not need to copy
|
||||
// `other.by_rust_module` explicitly — draining `other.by_key` is enough.
|
||||
// `other.by_rust_module` explicitly, draining `other.by_key` is enough.
|
||||
for (key, summary) in other.by_key {
|
||||
self.insert(key, summary);
|
||||
}
|
||||
|
|
@ -874,7 +841,7 @@ impl GlobalSummaries {
|
|||
}
|
||||
// Hierarchy index: invalidate after a merge so the next consumer
|
||||
// sees a freshly-built view that includes `other`'s edges. The
|
||||
// alternative — point-merging two indexes — is racy when the
|
||||
// alternative, point-merging two indexes, is racy when the
|
||||
// same `(lang, super)` key carries different sub-orderings in
|
||||
// each input; rebuild is O(n) over `by_key.iter()` and is the
|
||||
// single source of truth.
|
||||
|
|
@ -889,9 +856,9 @@ impl GlobalSummaries {
|
|||
/// caller genuinely wants the new one to replace the old.
|
||||
///
|
||||
/// When the existing entry is **incompatible** with the incoming
|
||||
/// one — the key's `arity` disagrees with the new summary's referenced
|
||||
/// one, the key's `arity` disagrees with the new summary's referenced
|
||||
/// parameter indices, or the two summaries would describe different
|
||||
/// functions — we synthesize a disambig so both are kept. Silent
|
||||
/// functions, we synthesize a disambig so both are kept. Silent
|
||||
/// replacement in that case would drop one function's cross-file
|
||||
/// taint signal entirely, which the caller cannot recover.
|
||||
///
|
||||
|
|
@ -906,23 +873,21 @@ impl GlobalSummaries {
|
|||
/// `ssa_summary_fits_arity` would reject the summary and
|
||||
/// `reconcile_ssa_summary_key` would synthesise a disambig that
|
||||
/// uncouples the SSA FuncKey from the matching FuncSummary FuncKey
|
||||
/// (audit gap A.2.1.G1 —
|
||||
/// (audit gap A.2.1.G1 ,
|
||||
/// `project_typed_callgraph_audit_gap_ssa_disambig.md`).
|
||||
pub fn insert_ssa(&mut self, key: FuncKey, summary: SsaFuncSummary) {
|
||||
// The summary may reference a parameter index ≥ `key.arity` when
|
||||
// scoped SSA lowering synthesised `Param` ops for **external
|
||||
// captures** (free identifiers like `this`, module imports,
|
||||
// unresolved method names) — see audit gap A.2.1.G1
|
||||
// unresolved method names), see audit gap A.2.1.G1
|
||||
// (`project_typed_callgraph_audit_gap_ssa_disambig.md`). These
|
||||
// synthetic refs are useful inside the file they were extracted
|
||||
// in (the caller's implicit-uses argument group at the same
|
||||
// index aligns with the synthetic Param) and stay useful when
|
||||
// resolved cross-file by name from this map (the same
|
||||
// implicit-uses alignment applies). But they would trip
|
||||
// [`ssa_summary_fits_arity`] inside [`reconcile_ssa_summary_key`],
|
||||
// forcing a synthetic disambig that uncouples the SSA FuncKey
|
||||
// from the matching FuncSummary FuncKey — and Phase 3's
|
||||
// `summaries.get_ssa(caller_key)` lookup (consuming
|
||||
// in (caller implicit-uses align with the synthetic Param) and
|
||||
// stay useful when resolved cross-file by name. But they trip
|
||||
// [`ssa_summary_fits_arity`] inside
|
||||
// [`reconcile_ssa_summary_key`], forcing a synthetic disambig
|
||||
// that uncouples the SSA FuncKey from the FuncSummary FuncKey
|
||||
//, `summaries.get_ssa(caller_key)` (consuming
|
||||
// `typed_call_receivers` at the FuncSummary-aligned key) would
|
||||
// miss.
|
||||
//
|
||||
|
|
@ -930,23 +895,22 @@ impl GlobalSummaries {
|
|||
// arity):
|
||||
//
|
||||
// * **No existing entry, or existing entry also has out-of-range
|
||||
// refs** — keep the (untrimmed) summary at the original key,
|
||||
// bypassing the disambig synthesis. Phase 3 finds the entry
|
||||
// under the FuncSummary's own disambig; cross-file resolvers
|
||||
// find the same entry with its full per-param signal
|
||||
// (closures, lambdas, captured-var sinks). The "existing also
|
||||
// refs**, keep the untrimmed summary at the original key,
|
||||
// bypassing disambig synthesis. Resolution finds the entry
|
||||
// under the FuncSummary's own disambig with its full
|
||||
// per-param signal (closures, lambdas, captured-var sinks). The "existing also
|
||||
// has out-of-range refs" branch covers the iterative-rescan
|
||||
// case where round 2's incoming summary lands on top of round
|
||||
// 1's already-installed copy of the same function.
|
||||
//
|
||||
// * **Existing entry fits arity (legit) but new doesn't** — fall
|
||||
// * **Existing entry fits arity (legit) but new doesn't**, fall
|
||||
// back to the disambig synthesis. This preserves the
|
||||
// `insert_ssa_arity_overflow_rekeys` invariant: a structurally
|
||||
// incompatible incoming summary (different function sharing
|
||||
// name + container + arity, with param refs at indices that
|
||||
// don't even exist in the legitimate function) cannot
|
||||
// dethrone the existing entry by silent overwrite. Both
|
||||
// summaries survive — the existing one at the original key,
|
||||
// summaries survive, the existing one at the original key,
|
||||
// the new one at the synthesised disambig.
|
||||
let key = if key.arity.is_some() && !ssa_summary_fits_arity(&summary, key.arity) {
|
||||
let existing_also_overflows = self
|
||||
|
|
@ -1044,7 +1008,7 @@ impl GlobalSummaries {
|
|||
}
|
||||
|
||||
/// Count of cross-file bodies currently loaded. Exposed for
|
||||
/// `tracing::debug!` observability — lets callers distinguish "no
|
||||
/// `tracing::debug!` observability, lets callers distinguish "no
|
||||
/// bodies available" from "bodies available but inline didn't fire".
|
||||
pub fn bodies_len(&self) -> usize {
|
||||
self.bodies_by_key.len()
|
||||
|
|
@ -1081,7 +1045,7 @@ impl GlobalSummaries {
|
|||
///
|
||||
/// Returns `(source_caps, sanitizer_caps, sink_caps, propagating_params)`
|
||||
/// per key. Used by the SCC fixed-point loop to detect when an iteration
|
||||
/// has not changed any summary — i.e. convergence.
|
||||
/// has not changed any summary, i.e. convergence.
|
||||
pub fn snapshot_caps(&self) -> HashMap<FuncKey, (u16, u16, u16, Vec<usize>)> {
|
||||
self.by_key
|
||||
.iter()
|
||||
|
|
@ -1127,7 +1091,7 @@ impl GlobalSummaries {
|
|||
/// `(wildcard_prefix, name)` in the module index. If across all
|
||||
/// wildcards exactly one arity-filtered candidate appears → resolved.
|
||||
/// 3. Otherwise fall through to [`resolve_callee_key_with_container`]
|
||||
/// with no `container_hint` — meaning only the existing namespace /
|
||||
/// with no `container_hint`, meaning only the existing namespace /
|
||||
/// arity disambiguation applies.
|
||||
///
|
||||
/// A `None` use_map (non-Rust file or no `use` declarations) makes this
|
||||
|
|
@ -1229,7 +1193,7 @@ impl GlobalSummaries {
|
|||
|
||||
/// Resolve a callee name with an optional container hint.
|
||||
///
|
||||
/// Legacy entry point — kept so tests and older callers compile
|
||||
/// Legacy entry point, kept so tests and older callers compile
|
||||
/// unchanged. `container_hint` is interpreted as a syntactic
|
||||
/// container qualifier (not an authoritative receiver type), so a
|
||||
/// miss is allowed to fall through to leaf-name lookup. New
|
||||
|
|
@ -1261,35 +1225,35 @@ impl GlobalSummaries {
|
|||
/// **New resolution order** (qualified identity primary, leaf name
|
||||
/// fallback):
|
||||
///
|
||||
/// 1. **Receiver-type qualified** — if `receiver_type` is set,
|
||||
/// 1. **Receiver-type qualified**, if `receiver_type` is set,
|
||||
/// consult `by_lang_qualified[{receiver_type}::{name}]` with the
|
||||
/// arity filter. Exactly-one → resolved; same-namespace
|
||||
/// tie-breaker if multiple. *Receiver types are authoritative*:
|
||||
/// a miss does not fall back to bare leaf lookup (that would be
|
||||
/// a silent reinterpretation).
|
||||
/// 2. **Namespace-qualifier qualified** — if `namespace_qualifier`
|
||||
/// 2. **Namespace-qualifier qualified**, if `namespace_qualifier`
|
||||
/// is set, try the qualified index with that container.
|
||||
/// Non-authoritative: a miss falls through.
|
||||
/// 3. **Caller-self-container** — when the caller lives inside a
|
||||
/// 3. **Caller-self-container**, when the caller lives inside a
|
||||
/// container (method body), try the qualified index against the
|
||||
/// caller's own container. Resolves bare `foo()` self-calls
|
||||
/// inside a class without collapsing into an unrelated same-leaf
|
||||
/// definition in another file.
|
||||
/// 4. **Same-namespace unique leaf** — intra-file bare-leaf call:
|
||||
/// 4. **Same-namespace unique leaf**, intra-file bare-leaf call:
|
||||
/// if the caller's namespace contains exactly one arity-matched
|
||||
/// candidate with this leaf, resolve to it.
|
||||
/// 5. **Receiver-variable tie-break** — if the same-namespace
|
||||
/// 5. **Receiver-variable tie-break**, if the same-namespace
|
||||
/// lookup misses but the raw call came with a receiver variable,
|
||||
/// try `{receiver_var}::{name}` as a last qualified attempt.
|
||||
///
|
||||
/// 5.5. **Bare-call free-function preference** — for a truly bare
|
||||
/// 5.5. **Bare-call free-function preference**, for a truly bare
|
||||
/// call (no receiver type, no namespace qualifier, no receiver
|
||||
/// variable), if exactly one same-namespace arity-matched
|
||||
/// candidate has an empty container, resolve to it. A class
|
||||
/// method cannot be invoked with bare-call syntax from outside
|
||||
/// its class, so this disambiguation is safe even when same-name
|
||||
/// methods exist elsewhere in the file.
|
||||
/// 6. **Leaf-name fallback** — arity-filtered same-language lookup.
|
||||
/// 6. **Leaf-name fallback**, arity-filtered same-language lookup.
|
||||
/// Unique → resolved. Multiple + we had any qualified hint →
|
||||
/// Ambiguous (refuse to guess when a qualifier exists but
|
||||
/// missed). Multiple + no qualified hint → narrow by namespace,
|
||||
|
|
@ -1411,7 +1375,7 @@ impl GlobalSummaries {
|
|||
// outside its own class (intra-class self-calls were already
|
||||
// resolved by step 3). When the same-namespace candidate set
|
||||
// contains exactly one empty-container entry, it is the
|
||||
// unambiguous target — returning Ambiguous here would be a
|
||||
// unambiguous target, returning Ambiguous here would be a
|
||||
// silent false negative whenever a top-level helper happens to
|
||||
// share a name with some method elsewhere in the file.
|
||||
let syntactic_bare = q.receiver_type.is_none()
|
||||
|
|
@ -1434,7 +1398,7 @@ impl GlobalSummaries {
|
|||
}
|
||||
|
||||
// Multiple arity-matched candidates remain. When a qualified
|
||||
// hint was supplied but missed, refuse to guess — a silent
|
||||
// hint was supplied but missed, refuse to guess, a silent
|
||||
// leaf-name pick would defeat the point of qualified-first
|
||||
// resolution. (`receiver_type` is handled in Step 1 and never
|
||||
// reaches here; `namespace_qualifier` / `caller_container`
|
||||
|
|
@ -1443,7 +1407,7 @@ impl GlobalSummaries {
|
|||
return CalleeResolution::Ambiguous(arity_filtered.into_iter().cloned().collect());
|
||||
}
|
||||
|
||||
// No qualified hints whatsoever — tolerate namespace narrowing.
|
||||
// No qualified hints whatsoever, tolerate namespace narrowing.
|
||||
match same_ns.len() {
|
||||
1 => CalleeResolution::Resolved(same_ns[0].clone()),
|
||||
0 => CalleeResolution::Ambiguous(arity_filtered.into_iter().cloned().collect()),
|
||||
|
|
@ -1452,11 +1416,11 @@ impl GlobalSummaries {
|
|||
}
|
||||
|
||||
/// Install / refresh the type-hierarchy index from the currently
|
||||
/// loaded summaries. Idempotent — calling twice rebuilds.
|
||||
/// loaded summaries. Idempotent, calling twice rebuilds.
|
||||
///
|
||||
/// Call this once after pass-1 merge (and again whenever
|
||||
/// summary state changes in a way that could affect virtual
|
||||
/// dispatch — typically: after the call-graph is rebuilt mid-fixed-point).
|
||||
/// dispatch, typically: after the call-graph is rebuilt mid-fixed-point).
|
||||
/// `merge()` automatically invalidates so a forgotten reinstall
|
||||
/// degrades to today's behaviour rather than a stale lookup.
|
||||
pub fn install_hierarchy(&mut self) {
|
||||
|
|
@ -1469,7 +1433,7 @@ impl GlobalSummaries {
|
|||
self.hierarchy.as_ref()
|
||||
}
|
||||
|
||||
/// Hard cap on hierarchy fan-out from a single call site — see
|
||||
/// Hard cap on hierarchy fan-out from a single call site, see
|
||||
/// [`Self::resolve_callee_widened`] for rationale. Public for tests
|
||||
/// that need to assert cap behaviour without hard-coding the value.
|
||||
pub const MAX_HIERARCHY_FANOUT: usize = 8;
|
||||
|
|
@ -1494,14 +1458,14 @@ impl GlobalSummaries {
|
|||
///
|
||||
/// Hard cap: at most [`Self::MAX_HIERARCHY_FANOUT`] keys are
|
||||
/// returned. When the cap fires, the cap-hit is logged at `debug`
|
||||
/// and the tail impls are silently dropped — over-fanning is a
|
||||
/// and the tail impls are silently dropped, over-fanning is a
|
||||
/// precision-tax knob, not a soundness one.
|
||||
///
|
||||
/// Empty result + non-empty `subs` triggers a
|
||||
/// secondary fall-through to [`Self::resolve_callee`] so a
|
||||
/// type-fact misclassification (receiver typed as a super-class
|
||||
/// that has no method by this name on any sub) does not silently
|
||||
/// regress to "no resolution at all" — the leaf-name path can still
|
||||
/// regress to "no resolution at all", the leaf-name path can still
|
||||
/// pick up a match. This preserves the
|
||||
/// "subset of today's targets, never a superset" rule under
|
||||
/// hierarchy-aware resolution failure.
|
||||
|
|
@ -1584,7 +1548,7 @@ impl GlobalSummaries {
|
|||
// Hierarchy widening produced nothing (e.g., none of the
|
||||
// recorded sub-types declare this method). Fall back to
|
||||
// today's qualified-first resolver so the misclassified-
|
||||
// type case still finds a leaf match — the same
|
||||
// type case still finds a leaf match, the same
|
||||
// "preserve today's behaviour on miss" rule the call-graph
|
||||
// builder applies.
|
||||
return single_fallback();
|
||||
|
|
@ -1615,15 +1579,15 @@ impl std::fmt::Debug for GlobalSummaries {
|
|||
///
|
||||
/// Comparison rules
|
||||
/// ────────────────
|
||||
/// * **`param_count` / `kind` / `container`** — unconditional agreement.
|
||||
/// * **`param_count` / `kind` / `container`**, unconditional agreement.
|
||||
/// Any mismatch is a hard collision between distinct functions.
|
||||
/// * **`file_path`** — agree when both sides are populated. A blank path
|
||||
/// * **`file_path`**, agree when both sides are populated. A blank path
|
||||
/// can come from synthetic summaries constructed in tests / interop
|
||||
/// configs and should not force a split.
|
||||
/// * **`param_names`** — agree when both sides are populated. Legacy
|
||||
/// * **`param_names`**, agree when both sides are populated. Legacy
|
||||
/// summaries may persist with empty names; treating empty as "unknown"
|
||||
/// avoids gratuitous splits while still catching real divergence.
|
||||
/// * **`module_path`** — Rust-only. Agreed when both sides are `Some`.
|
||||
/// * **`module_path`**, Rust-only. Agreed when both sides are `Some`.
|
||||
/// A missing module path on one side is legacy-compatible; two *distinct*
|
||||
/// `Some` values mean the two summaries belong to different crates'
|
||||
/// module trees.
|
||||
|
|
@ -1653,7 +1617,7 @@ pub(crate) fn summaries_compatible(a: &FuncSummary, b: &FuncSummary) -> bool {
|
|||
/// Derive a deterministic synthetic disambiguator from the
|
||||
/// identity-relevant fields of a `FuncSummary`.
|
||||
///
|
||||
/// The top bit is **not** set here — the caller composes the final value
|
||||
/// The top bit is **not** set here, the caller composes the final value
|
||||
/// via `SYNTHETIC_DISAMBIG_BIT | (hash & !SYNTHETIC_DISAMBIG_BIT)` so that
|
||||
/// (a) the caller can safely bump the low bits to probe for a free slot,
|
||||
/// and (b) the synthetic namespace stays disjoint from byte-offset
|
||||
|
|
@ -1678,7 +1642,7 @@ pub(crate) fn synthesize_disambig(summary: &FuncSummary) -> u32 {
|
|||
/// `SsaFuncSummary` carries no explicit `param_count`; we approximate
|
||||
/// it via the maximum parameter index referenced by either summary.
|
||||
/// Two summaries are compatible when neither references a parameter
|
||||
/// index the other cannot — an upward compatibility check, so a refined
|
||||
/// index the other cannot, an upward compatibility check, so a refined
|
||||
/// summary that merely adds flows for previously-silent parameters is
|
||||
/// still considered compatible.
|
||||
fn ssa_summaries_compatible(
|
||||
|
|
|
|||
|
|
@ -17,15 +17,15 @@
|
|||
//!
|
||||
//! Edges are directed `AliasEdge { source, target, kind }`:
|
||||
//!
|
||||
//! * `Source(Param(i)) → Target(Param(j))` — the callee stores data
|
||||
//! * `Source(Param(i)) → Target(Param(j))`, the callee stores data
|
||||
//! derived from parameter `i` into a field/element of parameter `j`.
|
||||
//! Mutation is observable to the caller through its argument for `j`.
|
||||
//! * `Source(Param(i)) → Target(Return)` — the return value aliases
|
||||
//! * `Source(Param(i)) → Target(Return)`, the return value aliases
|
||||
//! parameter `i`'s heap identity. Adds heap-level precision on top of
|
||||
//! the coarser [`TaintTransform::Identity`] view already carried in
|
||||
//! [`crate::summary::ssa_summary::SsaFuncSummary::param_to_return`].
|
||||
//!
|
||||
//! `MustAlias` is intentionally omitted — the ROI on
|
||||
//! `MustAlias` is intentionally omitted, the ROI on
|
||||
//! must-alias inference for cross-file summaries is low, and the soundness
|
||||
//! story for `MayAlias`-only application is straightforward ("take the
|
||||
//! union").
|
||||
|
|
@ -35,7 +35,7 @@
|
|||
//! Edge count is capped at [`MAX_ALIAS_EDGES`]. When a callee's alias
|
||||
//! graph exceeds the cap the summary records `overflow = true` and
|
||||
//! callers treat the function as "any tainted parameter may spread to
|
||||
//! every other parameter and to the return" — the conservative
|
||||
//! every other parameter and to the return", the conservative
|
||||
//! greatest-lower-bound over the alias lattice.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -46,7 +46,7 @@ use smallvec::SmallVec;
|
|||
/// Parameters are identified by their 0-based positional index as reported
|
||||
/// by [`crate::ssa::ir::SsaOp::Param`]; the implicit receiver (`self`/`this`)
|
||||
/// is handled outside this table and is deliberately not representable here.
|
||||
/// `Return` denotes the function's return SSA value — one per function, so
|
||||
/// `Return` denotes the function's return SSA value, one per function, so
|
||||
/// no further qualifier is needed.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum AliasPosition {
|
||||
|
|
@ -57,7 +57,7 @@ pub enum AliasPosition {
|
|||
}
|
||||
|
||||
/// Strength of an alias edge. Only [`AliasKind::MayAlias`] is emitted
|
||||
/// — the analysis over-approximates identity-level aliasing rather than
|
||||
///, the analysis over-approximates identity-level aliasing rather than
|
||||
/// proving must-alias. The variant is kept as an enum so a future
|
||||
/// extension that distinguishes the two can slot in without migrating
|
||||
/// on-disk data.
|
||||
|
|
@ -94,7 +94,7 @@ pub const MAX_ALIAS_EDGES: usize = 8;
|
|||
/// Parameter-granularity alias summary persisted in
|
||||
/// [`crate::summary::ssa_summary::SsaFuncSummary`].
|
||||
///
|
||||
/// The summary is empty by default — functions without any parameter /
|
||||
/// The summary is empty by default, functions without any parameter /
|
||||
/// return aliasing (pure transformers, sinks that consume but don't
|
||||
/// mutate their arguments) carry no edges and cost nothing on disk.
|
||||
///
|
||||
|
|
@ -109,13 +109,13 @@ pub struct PointsToSummary {
|
|||
/// tracking deserialise cleanly (no edges).
|
||||
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
||||
pub edges: SmallVec<[AliasEdge; 4]>,
|
||||
/// Conservative fallback flag — set when extraction hit
|
||||
/// Conservative fallback flag, set when extraction hit
|
||||
/// [`MAX_ALIAS_EDGES`] and refused to drop any edge silently. When
|
||||
/// `true`, callers treat the callee as "every parameter may alias
|
||||
/// every other parameter and the return value".
|
||||
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
||||
pub overflow: bool,
|
||||
/// At least one return path produces a *fresh* container allocation —
|
||||
/// At least one return path produces a *fresh* container allocation ,
|
||||
/// a container literal (`[]`, `{}`) or a known container constructor
|
||||
/// call (`new Map()`, `list()`, …) that does not trace back to any
|
||||
/// parameter. When this is `true` the caller synthesises a fresh
|
||||
|
|
@ -124,8 +124,8 @@ pub struct PointsToSummary {
|
|||
/// the call result (e.g. `bag[0]`, `fillBag(bag, …)`) can find a heap
|
||||
/// cell to read from or store into.
|
||||
///
|
||||
/// Closes the factory-pattern cross-file gap — `const bag = makeBag()`
|
||||
/// followed by `fillBag(bag, env)` and `exec(bag[0])` — by giving the
|
||||
/// Closes the factory-pattern cross-file gap, `const bag = makeBag()`
|
||||
/// followed by `fillBag(bag, env)` and `exec(bag[0])`, by giving the
|
||||
/// caller's heap analysis a stable identity to attach stores to.
|
||||
/// Combines freely with `Param(i) → Return` edges: a mixed-return
|
||||
/// function (one branch returns a param, another returns a fresh
|
||||
|
|
@ -136,7 +136,7 @@ pub struct PointsToSummary {
|
|||
}
|
||||
|
||||
impl PointsToSummary {
|
||||
/// Empty summary — no aliasing, no overflow. Equivalent to
|
||||
/// Empty summary, no aliasing, no overflow. Equivalent to
|
||||
/// [`Self::default`] but explicit at call sites.
|
||||
pub fn empty() -> Self {
|
||||
Self::default()
|
||||
|
|
@ -153,7 +153,7 @@ impl PointsToSummary {
|
|||
///
|
||||
/// Returns `true` when the edge was added, `false` when it was a
|
||||
/// duplicate or when the cap triggered an overflow. The caller can
|
||||
/// ignore the return — the summary always remains in a valid state.
|
||||
/// ignore the return, the summary always remains in a valid state.
|
||||
pub fn insert(&mut self, source: AliasPosition, target: AliasPosition, kind: AliasKind) {
|
||||
if self.overflow {
|
||||
return;
|
||||
|
|
@ -168,7 +168,7 @@ impl PointsToSummary {
|
|||
}
|
||||
if self.edges.len() >= MAX_ALIAS_EDGES {
|
||||
self.overflow = true;
|
||||
// Keep the existing edge list — a consumer that still reads
|
||||
// Keep the existing edge list, a consumer that still reads
|
||||
// the vector gets a strict *subset* of the sound over-
|
||||
// approximation conveyed by `overflow`. Correctness is
|
||||
// owned by the overflow flag; the residual edges are purely
|
||||
|
|
@ -337,7 +337,7 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 5: field-granularity points-to summary ──────────────
|
||||
// ── field-granularity points-to summary ──────────────
|
||||
|
||||
/// Maximum field names retained per parameter in [`FieldPointsToSummary`].
|
||||
///
|
||||
|
|
@ -345,12 +345,12 @@ mod tests {
|
|||
/// while leaving room for typical helpers (a handful of fields each).
|
||||
pub const MAX_FIELDS_PER_PARAM: usize = 8;
|
||||
|
||||
/// Pointer-Phase 5: field-granularity per-parameter points-to summary.
|
||||
/// field-granularity per-parameter points-to summary.
|
||||
///
|
||||
/// Records, for each positional parameter index, the set of field
|
||||
/// **names** read from and written to inside the callee body. Names
|
||||
/// (not [`crate::ssa::ir::FieldId`]) are persisted because field IDs
|
||||
/// are body-local — the per-body [`crate::ssa::ir::FieldInterner`]
|
||||
/// are body-local, the per-body [`crate::ssa::ir::FieldInterner`]
|
||||
/// reassigns IDs across files. Callers re-intern through their own
|
||||
/// body's interner before consulting `field_taint` cells.
|
||||
///
|
||||
|
|
@ -359,23 +359,23 @@ pub const MAX_FIELDS_PER_PARAM: usize = 8;
|
|||
/// same indexing convention as `SsaFuncSummary::receiver_to_*`
|
||||
/// (separate channel).
|
||||
///
|
||||
/// Empty by default — functions that don't read or write any field on
|
||||
/// Empty by default, functions that don't read or write any field on
|
||||
/// their parameters carry no entries and cost nothing on disk.
|
||||
#[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct FieldPointsToSummary {
|
||||
/// `(param_index, field_names_read)` — the callee projected each
|
||||
/// `(param_index, field_names_read)`, the callee projected each
|
||||
/// listed field on a value derived from `param_index` somewhere
|
||||
/// in its body. Sorted, deduped per-entry.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub param_field_reads: Vec<(u32, SmallVec<[String; 2]>)>,
|
||||
/// `(param_index, field_names_written)` — the callee assigned to
|
||||
/// `(param_index, field_names_written)`, the callee assigned to
|
||||
/// each listed field on a value derived from `param_index`.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub param_field_writes: Vec<(u32, SmallVec<[String; 2]>)>,
|
||||
/// Set when the read/write graph hit
|
||||
/// [`MAX_FIELDS_PER_PARAM`] for any parameter. Callers seeing
|
||||
/// `overflow=true` treat each parameter as reading/writing every
|
||||
/// field on every other parameter — the conservative greatest
|
||||
/// field on every other parameter, the conservative greatest
|
||||
/// lower bound that preserves soundness.
|
||||
#[serde(default, skip_serializing_if = "core::ops::Not::not")]
|
||||
pub overflow: bool,
|
||||
|
|
@ -441,7 +441,7 @@ impl FieldPointsToSummary {
|
|||
}
|
||||
|
||||
/// Union with `other`. Overflow propagates per
|
||||
/// [`PointsToSummary::merge`]'s semantics — once a callee is
|
||||
/// [`PointsToSummary::merge`]'s semantics, once a callee is
|
||||
/// "any field on any parameter", merging cannot recover precision.
|
||||
pub fn merge(&mut self, other: &Self) {
|
||||
if other.overflow {
|
||||
|
|
|
|||
|
|
@ -17,61 +17,35 @@ pub enum TaintTransform {
|
|||
AddBits(Cap),
|
||||
}
|
||||
|
||||
/// Maximum [`ReturnPathTransform`] entries retained per parameter.
|
||||
///
|
||||
/// Most functions have one or two return paths; eight is a generous bound
|
||||
/// that still keeps per-summary memory O(1). Beyond the cap, extraction
|
||||
/// joins the overflow into a single Top-predicate entry so the caller-side
|
||||
/// application always sees a bounded vector.
|
||||
/// Cap on per-parameter return-path entries. Overflow is joined into
|
||||
/// a single Top-predicate entry so callers always see a bounded vec.
|
||||
pub const MAX_RETURN_PATHS: usize = 8;
|
||||
|
||||
/// A single return-path entry in a per-parameter summary.
|
||||
///
|
||||
/// Per-return-path decomposition preserves callee-internal path splits that
|
||||
/// the aggregate [`TaintTransform`] would erase. Each entry records the
|
||||
/// path predicate under which this return is reached, the behavioural
|
||||
/// transform on that path, and (optionally) an abstract-domain contribution.
|
||||
///
|
||||
/// Callers carry their own path-state at the call site and apply only
|
||||
/// entries whose predicate is consistent with the caller's validated set;
|
||||
/// the remainder are skipped. Applicable entries are joined to produce
|
||||
/// the effective transform at the call site.
|
||||
///
|
||||
/// When a callee has a single return path, `param_return_paths` stays empty
|
||||
/// and the caller falls back to `param_to_return`'s union view.
|
||||
/// One return-path entry in a per-parameter summary. Records the path
|
||||
/// predicate, the transform on that path, and optionally an abstract
|
||||
/// contribution. Callers apply only entries consistent with their
|
||||
/// caller-side path state.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct ReturnPathTransform {
|
||||
/// Behavioural kind on this path (Identity / StripBits / AddBits).
|
||||
pub transform: TaintTransform,
|
||||
/// Deterministic hash of the path-predicate gate at this return.
|
||||
///
|
||||
/// `0` is reserved for "no predicate gate" — a return reached under
|
||||
/// no known predicate. Two return blocks whose path predicates are
|
||||
/// observationally equivalent hash to the same value and are joined.
|
||||
/// Deterministic hash of the path-predicate gate. `0` = no gate.
|
||||
/// Equivalent predicates collide and are joined.
|
||||
pub path_predicate_hash: u64,
|
||||
/// `PredicateSummary::known_true` bits that must hold on every path
|
||||
/// into this return. Encoded using [`crate::taint::domain::predicate_kind_bit`]:
|
||||
/// bit 0 = NullCheck, 1 = EmptyCheck, 2 = ErrorCheck.
|
||||
/// `known_true` predicate bits (bit 0 = NullCheck, 1 = EmptyCheck,
|
||||
/// 2 = ErrorCheck) that hold on every path into this return.
|
||||
pub known_true: u8,
|
||||
/// `PredicateSummary::known_false` bits at this return (same encoding
|
||||
/// as [`Self::known_true`]).
|
||||
/// `known_false` bits at this return.
|
||||
pub known_false: u8,
|
||||
/// Abstract contribution for this return path, when non-Top.
|
||||
///
|
||||
/// Callers combine this with their own abstract fact on the call
|
||||
/// site's argument using `AbstractValue::meet` to recover bounds that
|
||||
/// survive a specific return.
|
||||
/// Abstract contribution when non-Top. Callers `meet` it with the
|
||||
/// caller-side abstract fact.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub abstract_contribution: Option<AbstractValue>,
|
||||
}
|
||||
|
||||
impl ReturnPathTransform {
|
||||
/// Dedup key combining the semantic fields of a path entry. Two entries
|
||||
/// with the same `(path_predicate_hash, transform, known_true, known_false)`
|
||||
/// describe the same behaviour on paths gated by the same predicate and
|
||||
/// can collapse without losing information. `abstract_contribution` is
|
||||
/// deliberately ignored — the dedup path joins the two entries'
|
||||
/// abstract facts rather than dropping one.
|
||||
/// Dedup key. `abstract_contribution` is intentionally excluded
|
||||
///, colliding entries join their abstract facts.
|
||||
pub fn dedup_key(&self) -> (u64, &TaintTransform, u8, u8) {
|
||||
(
|
||||
self.path_predicate_hash,
|
||||
|
|
@ -234,7 +208,7 @@ pub struct SsaFuncSummary {
|
|||
/// abstract value. At cross-file call sites the caller applies each
|
||||
/// transfer to the corresponding argument's abstract state and joins
|
||||
/// the results (then `meet`s with [`Self::return_abstract`]) to
|
||||
/// synthesise the return abstract value — recovering interval bounds
|
||||
/// synthesise the return abstract value, recovering interval bounds
|
||||
/// and string prefixes that would otherwise be lost to the summary's
|
||||
/// Top-seeded baseline.
|
||||
///
|
||||
|
|
@ -254,8 +228,8 @@ pub struct SsaFuncSummary {
|
|||
/// consistent with the caller's validated set, joining the applicable
|
||||
/// set into the effective call-site transform.
|
||||
///
|
||||
/// Empty when the callee has a single return path — the aggregate
|
||||
/// [`param_to_return`] is already precise — or when extraction
|
||||
/// Empty when the callee has a single return path, the aggregate
|
||||
/// [`param_to_return`] is already precise, or when extraction
|
||||
/// could not derive per-return state (e.g. early-exit probes).
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub param_return_paths: Vec<(usize, SmallVec<[ReturnPathTransform; 2]>)>,
|
||||
|
|
@ -268,7 +242,7 @@ pub struct SsaFuncSummary {
|
|||
/// each other or the return value.
|
||||
#[serde(default, skip_serializing_if = "PointsToSummary::is_empty")]
|
||||
pub points_to: PointsToSummary,
|
||||
/// Pointer-Phase 5: field-granularity per-parameter points-to
|
||||
/// field-granularity per-parameter points-to
|
||||
/// summary. Records which fields the callee reads from / writes
|
||||
/// to on each parameter, so cross-file resolution can spread
|
||||
/// taint through field-level mutations the callee performs on
|
||||
|
|
@ -295,7 +269,7 @@ pub struct SsaFuncSummary {
|
|||
/// Empty for callees whose return blocks produce no non-Top fact,
|
||||
/// or whose single return path makes the aggregate already precise.
|
||||
/// Cross-file callers that cannot pick a specific path fall back to
|
||||
/// joining the entries — equivalent to the pre-decomposition
|
||||
/// joining the entries, equivalent to the pre-decomposition
|
||||
/// behaviour.
|
||||
#[serde(default, skip_serializing_if = "SmallVec::is_empty")]
|
||||
pub return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
|
|
@ -307,7 +281,7 @@ pub struct SsaFuncSummary {
|
|||
/// non-empty [`crate::ssa::type_facts::TypeKind::container_name`].
|
||||
///
|
||||
/// Consumed by [`crate::callgraph::build_call_graph`] to feed
|
||||
/// `CalleeQuery.receiver_type` for the matching ordinal — letting
|
||||
/// `CalleeQuery.receiver_type` for the matching ordinal, letting
|
||||
/// the call graph narrow indirect method-call edges to only those
|
||||
/// targets whose defining container matches the inferred type.
|
||||
/// Strictly additive: an empty map means today's name-only
|
||||
|
|
|
|||
|
|
@ -580,7 +580,7 @@ fn global_summaries_insert_ssa_exact_key_replacement() {
|
|||
gs.insert_ssa(key.clone(), v1.clone());
|
||||
assert_eq!(gs.get_ssa(&key), Some(&v1));
|
||||
|
||||
// Replace with a different summary — exact replacement, not union
|
||||
// Replace with a different summary, exact replacement, not union
|
||||
let v2 = SsaFuncSummary {
|
||||
param_to_return: vec![(0, TaintTransform::StripBits(Cap::HTML_ESCAPE))],
|
||||
param_to_sink: vec![(0, cap_sites(Cap::SQL_QUERY))],
|
||||
|
|
@ -1492,7 +1492,7 @@ fn free_function_and_method_with_same_name_resolve_separately() {
|
|||
assert_eq!(method, CalleeResolution::Resolved(km));
|
||||
|
||||
// Without any qualifier, receiver, or receiver_type, a bare
|
||||
// `process()` call is syntactically a free-function invocation — a
|
||||
// `process()` call is syntactically a free-function invocation, a
|
||||
// method cannot be invoked that way from outside its class. The
|
||||
// resolver's bare-call preference (step 5.5) picks the sole
|
||||
// empty-container candidate deterministically.
|
||||
|
|
@ -1709,7 +1709,7 @@ fn legacy_callees_string_array_deserializes() {
|
|||
#[test]
|
||||
fn mixed_callee_form_deserializes() {
|
||||
// Interop / partial-migration rows may mix legacy strings with
|
||||
// structured entries in the same array — deserializer accepts both.
|
||||
// structured entries in the same array, deserializer accepts both.
|
||||
let json = r#"{
|
||||
"name": "mixed",
|
||||
"file_path": "m.rs",
|
||||
|
|
@ -1936,7 +1936,7 @@ fn rust_wildcard_import_resolves_uniquely() {
|
|||
|
||||
#[test]
|
||||
fn rust_use_map_fallback_when_absent() {
|
||||
// No use_map entry — falls through to generic same-language resolution,
|
||||
// No use_map entry, falls through to generic same-language resolution,
|
||||
// which for an unqualified caller in the same namespace still works.
|
||||
let helper = rust_summary_with_mod("helper", "/proj/src/lib.rs", 0, Some(""), &[], &[], vec![]);
|
||||
let caller = rust_summary_with_mod(
|
||||
|
|
@ -1960,7 +1960,7 @@ fn rust_use_map_fallback_when_absent() {
|
|||
|
||||
#[test]
|
||||
fn rust_use_map_ambiguous_stays_ambiguous_without_hint() {
|
||||
// Two modules define `validate`; no use-map on the caller — resolution
|
||||
// Two modules define `validate`; no use-map on the caller, resolution
|
||||
// should remain Ambiguous rather than silently picking one.
|
||||
let token = rust_summary_with_mod(
|
||||
"validate",
|
||||
|
|
@ -2135,7 +2135,7 @@ fn query_prefers_receiver_type_over_leaf_collision() {
|
|||
// Old behaviour-parity regression: `resolve_callee_key_with_container`
|
||||
// (now a thin wrapper) used to treat `MessageQueue` as an authoritative
|
||||
// qualifier that *only* picked on exact match. The new resolver must
|
||||
// still do that — swap to `MessageQueue` and we get its method back.
|
||||
// still do that, swap to `MessageQueue` and we get its method back.
|
||||
let resolved_queue = gs.resolve_callee(&CalleeQuery {
|
||||
name: "send",
|
||||
caller_lang: Lang::Java,
|
||||
|
|
@ -2164,7 +2164,7 @@ fn query_prefers_receiver_type_over_leaf_collision() {
|
|||
fn query_authoritative_receiver_miss_does_not_fall_through_to_leaf() {
|
||||
// When `receiver_type = HttpClient` is supplied but no
|
||||
// `HttpClient::send` exists, the resolver MUST NOT silently pick a
|
||||
// same-leaf collision in another container — that would be the
|
||||
// same-leaf collision in another container, that would be the
|
||||
// classic "resolved by leaf name" bug the refactor aims to prevent.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k_queue, s_queue) = method_summary("src/queue.java", "MessageQueue", "send", 1, 0x02);
|
||||
|
|
@ -2326,7 +2326,7 @@ fn query_caller_container_resolves_self_call() {
|
|||
fn query_leaf_same_namespace_still_resolves_intra_file_calls() {
|
||||
// Two definitions share a leaf name but live in different files.
|
||||
// A same-namespace call (intra-file) must resolve to the local one
|
||||
// without requiring any structured hint — this is the common case
|
||||
// without requiring any structured hint, this is the common case
|
||||
// for bare top-level function calls.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k_a, s_a) = free_summary("src/a.js", "helper", 1, 0x01);
|
||||
|
|
@ -2369,7 +2369,7 @@ fn query_leaf_same_namespace_still_resolves_intra_file_calls() {
|
|||
|
||||
#[test]
|
||||
fn query_arity_filter_is_hard() {
|
||||
// Same container and leaf, different arities — resolution must
|
||||
// Same container and leaf, different arities, resolution must
|
||||
// honour the arity filter before any qualifier-based tie-break.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k_1arg, s_1arg) = method_summary("src/svc.py", "Svc", "render", 1, 0x01);
|
||||
|
|
@ -2402,7 +2402,7 @@ fn query_arity_filter_is_hard() {
|
|||
assert_eq!(two, CalleeResolution::Resolved(k_2arg));
|
||||
|
||||
// With a non-existent arity, arity filter prunes everything and we
|
||||
// get NotFound — not a "closest match" guess.
|
||||
// get NotFound, not a "closest match" guess.
|
||||
let mismatched = gs.resolve_callee(&CalleeQuery {
|
||||
name: "render",
|
||||
caller_lang: Lang::Java,
|
||||
|
|
@ -2427,7 +2427,7 @@ fn query_receiver_var_is_soft_tiebreak_not_primary() {
|
|||
// happens to also be called "obj". The old resolver used the
|
||||
// variable name as container_hint #1, which could mis-pick when
|
||||
// the qualified index had a coincidental hit. The new resolver
|
||||
// treats `receiver_var` as a *soft* tie-break — it only fires
|
||||
// treats `receiver_var` as a *soft* tie-break, it only fires
|
||||
// after same-namespace unique-leaf resolution fails.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k_same_ns, s_same_ns) = free_summary("src/app.js", "method", 1, 0xAA);
|
||||
|
|
@ -2514,7 +2514,7 @@ fn legacy_wrapper_preserves_test_contract() {
|
|||
gs.insert(k_a.clone(), s_a);
|
||||
|
||||
// container_hint doesn't match any container, but the leaf name has
|
||||
// exactly one candidate — the wrapper should still resolve.
|
||||
// exactly one candidate, the wrapper should still resolve.
|
||||
let resolved = gs.resolve_callee_key_with_container(
|
||||
"only",
|
||||
Lang::Java,
|
||||
|
|
@ -2530,7 +2530,7 @@ fn legacy_wrapper_preserves_test_contract() {
|
|||
// These tests target the most error-prone identity cases: two or more
|
||||
// definitions that share `(lang, namespace, name, arity)` but differ in
|
||||
// `container`. The resolver must either resolve to the exact container
|
||||
// target or refuse to guess — silently falling back to a same-leaf
|
||||
// target or refuse to guess, silently falling back to a same-leaf
|
||||
// collision in a different container is a correctness bug, and mis-
|
||||
// ordering the resolution steps can cause either false positives (wrong
|
||||
// summary picked) or false negatives (missed flow because Ambiguous
|
||||
|
|
@ -2542,7 +2542,7 @@ fn same_file_two_classes_same_method_typed_receiver_picks_exact() {
|
|||
// incompatible security behaviour: `Safe::run` is a sanitizer-ish
|
||||
// passthrough (no sink bits) while `Unsafe::run` is a shell sink.
|
||||
// When the caller has a typed receiver (via type inference), the
|
||||
// resolver must pick the exact class — the wrong pick would either
|
||||
// resolver must pick the exact class, the wrong pick would either
|
||||
// miss the Unsafe sink or wrongly flag the Safe path.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k_safe, s_safe) = method_summary("src/app.java", "Safe", "run", 1, 0x00);
|
||||
|
|
@ -2595,7 +2595,7 @@ fn same_file_two_classes_same_method_typed_receiver_picks_exact() {
|
|||
#[test]
|
||||
fn same_file_two_classes_same_method_untyped_receiver_is_ambiguous_not_wrong() {
|
||||
// Same setup as above, but the caller only has a variable-name
|
||||
// receiver (no type facts). `receiver_var` is a SOFT hint — and in
|
||||
// receiver (no type facts). `receiver_var` is a SOFT hint, and in
|
||||
// the common case `s`/`u` don't match any container. The resolver
|
||||
// MUST refuse to pick one arbitrarily; returning `Safe::run` when
|
||||
// the call was `u.run(...)` would be a silent false negative of the
|
||||
|
|
@ -2635,8 +2635,8 @@ fn same_file_two_classes_same_method_untyped_receiver_is_ambiguous_not_wrong() {
|
|||
#[test]
|
||||
fn same_file_free_function_and_method_bare_call_prefers_free_function() {
|
||||
// Classic "I wrote a top-level helper AND a method with the same
|
||||
// name in the same file" trap. A bare `process()` call — no
|
||||
// receiver, no qualifier, caller outside any container — is
|
||||
// name in the same file" trap. A bare `process()` call, no
|
||||
// receiver, no qualifier, caller outside any container, is
|
||||
// syntactically a FREE function call; the method cannot be invoked
|
||||
// this way. The resolver MUST resolve to the free function, not
|
||||
// return Ambiguous.
|
||||
|
|
@ -2682,7 +2682,7 @@ fn same_file_method_calling_sibling_free_function_resolves_to_free() {
|
|||
// Variant of the previous test with the caller LIVING INSIDE a
|
||||
// class whose own container does NOT define `process`. Bare
|
||||
// `process()` inside `Runner::kick()` must still resolve to the
|
||||
// file-local free function — not get lost in Ambiguous because the
|
||||
// file-local free function, not get lost in Ambiguous because the
|
||||
// caller_container hint (`Runner`) misses both candidates.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let (k_free, s_free) = free_summary("src/app.java", "process", 1, 0x0F);
|
||||
|
|
@ -2727,7 +2727,7 @@ fn same_file_method_calling_own_container_sibling_prefers_self_class() {
|
|||
// Inverse of the previous: caller is INSIDE `Worker::other()` and
|
||||
// calls bare `process()`. Both a free `process` AND `Worker::process`
|
||||
// exist in the file. The caller's own container resolution (step 3)
|
||||
// must prefer `Worker::process` — otherwise intra-class self calls
|
||||
// must prefer `Worker::process`, otherwise intra-class self calls
|
||||
// would get misresolved to a free function with possibly different
|
||||
// security behaviour.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
@ -2804,7 +2804,7 @@ fn same_file_nested_container_same_method_disambiguates_by_container() {
|
|||
"`Outer` receiver_type must pick only Outer::foo — not Outer::Inner::foo via prefix match"
|
||||
);
|
||||
|
||||
// Exact cap pinning — guards against merge_summaries accidentally
|
||||
// Exact cap pinning, guards against merge_summaries accidentally
|
||||
// unioning caps across the two nested keys.
|
||||
assert_eq!(gs.get(&k_inner).unwrap().sink_caps, 0x02);
|
||||
}
|
||||
|
|
@ -2814,7 +2814,7 @@ fn same_file_same_name_different_security_behaviour_no_cap_leak() {
|
|||
// Three `validate/1` entries in the same file: a sanitizer
|
||||
// passthrough (free function), an HTML-escape sanitizer in one
|
||||
// class, and a shell-exec sink in another class. These must end
|
||||
// up as three distinct keys with their caps preserved exactly —
|
||||
// up as three distinct keys with their caps preserved exactly ,
|
||||
// no merge of sink caps into the sanitizer entry, no cross-leak
|
||||
// via `by_lang_name` fallback.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
@ -2873,7 +2873,7 @@ fn same_file_same_name_different_security_behaviour_no_cap_leak() {
|
|||
// (typically `disambig: None` from legacy/interop/DB-loaded summaries) where
|
||||
// the old code silently collapsed structurally distinct functions.
|
||||
|
||||
/// Build a minimal `FuncSummary` with `disambig: None` — mirrors the shape
|
||||
/// Build a minimal `FuncSummary` with `disambig: None`, mirrors the shape
|
||||
/// produced by legacy JSON rows / interop configs that don't know byte
|
||||
/// offsets. `file_path` is left blank so namespace normalisation doesn't
|
||||
/// separate the two otherwise-identical keys.
|
||||
|
|
@ -2956,7 +2956,7 @@ fn insert_mismatched_module_path_does_not_silently_merge() {
|
|||
.find(|(_, s)| s.module_path.as_deref() == Some("billing::invoice"))
|
||||
.expect("billing::invoice summary preserved");
|
||||
// Cross-contamination guard: the two crates must not have their
|
||||
// caps unioned — that's the observable failure mode of a silent
|
||||
// caps unioned, that's the observable failure mode of a silent
|
||||
// merge.
|
||||
assert_eq!(auth.1.sink_caps, Cap::SHELL_ESCAPE.bits());
|
||||
assert_eq!(billing.1.sink_caps, Cap::SQL_QUERY.bits());
|
||||
|
|
@ -2967,7 +2967,7 @@ fn insert_mismatched_module_path_does_not_silently_merge() {
|
|||
#[test]
|
||||
fn insert_mismatched_kind_does_not_silently_merge() {
|
||||
// A free function and a method with the same name, arity, namespace,
|
||||
// and container ("" vs "") can't actually occur — but kind alone
|
||||
// and container ("" vs "") can't actually occur, but kind alone
|
||||
// mismatching does happen in interop configs where a getter is
|
||||
// described as a function. Make sure the two end up distinct.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
@ -2996,7 +2996,7 @@ fn insert_mismatched_kind_does_not_silently_merge() {
|
|||
let hits = gs.lookup_same_lang(Lang::Java, "size");
|
||||
assert_eq!(hits.len(), 2);
|
||||
// The getter's sink caps must not have been unioned into the
|
||||
// function — that would be a security-relevant leak.
|
||||
// function, that would be a security-relevant leak.
|
||||
let func_hit = hits
|
||||
.iter()
|
||||
.find(|(k, _)| k.kind == FuncKind::Function)
|
||||
|
|
@ -3010,7 +3010,7 @@ fn insert_mismatched_kind_does_not_silently_merge() {
|
|||
#[test]
|
||||
fn insert_mismatched_param_names_does_not_silently_merge() {
|
||||
// Two overloads in Java/C++ with the same arity but different
|
||||
// parameter types/names — a classic case where arity-only identity
|
||||
// parameter types/names, a classic case where arity-only identity
|
||||
// collapses distinct functions. Neither summary ships a disambig
|
||||
// because it was loaded from legacy JSON.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
@ -3052,7 +3052,7 @@ fn insert_mismatched_param_names_does_not_silently_merge() {
|
|||
#[test]
|
||||
fn insert_synthetic_disambig_bit_set_only_for_collisions() {
|
||||
// A single legacy-style insert with `disambig: None` must NOT gain a
|
||||
// synthetic disambig — we only rekey to resolve collisions, never
|
||||
// synthetic disambig, we only rekey to resolve collisions, never
|
||||
// speculatively. This prevents downstream lookups keyed with
|
||||
// `disambig: None` from spuriously missing legitimately-single
|
||||
// summaries.
|
||||
|
|
@ -3075,7 +3075,7 @@ fn insert_synthetic_disambig_bit_set_only_for_collisions() {
|
|||
#[test]
|
||||
fn insert_compatible_refinement_still_unions() {
|
||||
// Two summaries describing the same function (structurally identical
|
||||
// head, differing only on behaviour fields) must still union — the
|
||||
// head, differing only on behaviour fields) must still union, the
|
||||
// tightened check doesn't regress the classic parallel-fold merge.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let a = FuncSummary {
|
||||
|
|
@ -3109,7 +3109,7 @@ fn insert_compatible_refinement_still_unions() {
|
|||
let merged = gs.get(&k).expect("compatible summaries still merge");
|
||||
assert_eq!(merged.source_caps, Cap::ENV_VAR.bits());
|
||||
assert_eq!(merged.sink_caps, Cap::SHELL_ESCAPE.bits());
|
||||
// Single entry — no accidental split for the compatible case.
|
||||
// Single entry, no accidental split for the compatible case.
|
||||
let hits = gs.lookup_same_lang(Lang::Rust, "f");
|
||||
assert_eq!(hits.len(), 1);
|
||||
}
|
||||
|
|
@ -3129,7 +3129,7 @@ fn insert_body_param_count_mismatch_rekeys() {
|
|||
..Default::default()
|
||||
};
|
||||
gs.insert_body(key.clone(), make_callee_body(2, 2));
|
||||
// Incoming body with a different param_count — must not overwrite.
|
||||
// Incoming body with a different param_count, must not overwrite.
|
||||
gs.insert_body(key.clone(), make_callee_body(5, 4));
|
||||
|
||||
// Invariant 1: the original body stays at the original key (not
|
||||
|
|
@ -3164,7 +3164,7 @@ fn insert_body_param_count_mismatch_rekeys() {
|
|||
#[test]
|
||||
fn insert_ssa_arity_overflow_rekeys() {
|
||||
// Key claims arity 1, but the incoming SSA summary references
|
||||
// param index 3 — structurally impossible for the same function.
|
||||
// param index 3, structurally impossible for the same function.
|
||||
// The fix must split so the key arity invariant is preserved.
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let key = FuncKey {
|
||||
|
|
@ -3185,7 +3185,7 @@ fn insert_ssa_arity_overflow_rekeys() {
|
|||
vec![(0, TaintTransform::Identity)]
|
||||
);
|
||||
|
||||
// Bad-arity incoming summary — must not overwrite the legitimate one.
|
||||
// Bad-arity incoming summary, must not overwrite the legitimate one.
|
||||
let overflowing = SsaFuncSummary {
|
||||
param_to_return: vec![(3, TaintTransform::Identity)],
|
||||
param_to_sink: vec![(2, cap_sites(Cap::SQL_QUERY))],
|
||||
|
|
@ -3207,10 +3207,10 @@ fn insert_ssa_arity_overflow_rekeys() {
|
|||
///
|
||||
/// This is the case `lower_to_ssa` produces for Java instance/static
|
||||
/// methods that reference free identifiers (e.g. `f.close()` where
|
||||
/// `close` is treated as an external capture — the synthetic Param 0
|
||||
/// `close` is treated as an external capture, the synthetic Param 0
|
||||
/// then leaks into `param_to_return`/`param_to_sink`). Without the
|
||||
/// audit-gap fix, `reconcile_ssa_summary_key` would synthesise a
|
||||
/// disambig and Phase 3's `summaries.get_ssa(caller_key)` lookup
|
||||
/// disambig and the analysis's `summaries.get_ssa(caller_key)` lookup
|
||||
/// (consuming `typed_call_receivers` at the FuncSummary-aligned key)
|
||||
/// would miss.
|
||||
#[test]
|
||||
|
|
@ -3229,7 +3229,7 @@ fn insert_ssa_arity_overflow_keeps_original_key_when_no_collision() {
|
|||
};
|
||||
let summary = SsaFuncSummary {
|
||||
// Synthetic Param-0 for the external `close` identifier inside
|
||||
// the static `read()` body — `param_count == 0` per the source-
|
||||
// the static `read()` body, `param_count == 0` per the source-
|
||||
// level signature.
|
||||
param_to_return: vec![(0, TaintTransform::Identity)],
|
||||
typed_call_receivers: vec![(1, "FileHandle".to_string())],
|
||||
|
|
@ -3241,7 +3241,7 @@ fn insert_ssa_arity_overflow_keeps_original_key_when_no_collision() {
|
|||
.get_ssa(&key)
|
||||
.expect("Reader::read SSA must be reachable at the FuncSummary-aligned key");
|
||||
assert_eq!(kept.typed_call_receivers, summary.typed_call_receivers);
|
||||
// The synthetic Param-0 reference is preserved verbatim — pass-2
|
||||
// The synthetic Param-0 reference is preserved verbatim, pass-2
|
||||
// analysis still aligns it with the caller's implicit-uses
|
||||
// argument group at the same index.
|
||||
assert_eq!(kept.param_to_return, summary.param_to_return);
|
||||
|
|
@ -3288,7 +3288,7 @@ fn insert_ssa_arity_overflow_iterative_rescan_stays_at_original_key() {
|
|||
assert_eq!(kept.param_to_return, round2.param_to_return);
|
||||
}
|
||||
|
||||
// ── Primary sink-location attribution — SinkSite round-trips ────────────
|
||||
// ── Primary sink-location attribution, SinkSite round-trips ────────────
|
||||
|
||||
#[test]
|
||||
fn sink_site_serde_round_trip_solo() {
|
||||
|
|
@ -3549,7 +3549,7 @@ fn cf4_merge_return_paths_caps_at_max() {
|
|||
"overflow collapses to a single Top-predicate entry"
|
||||
);
|
||||
// Joined entry has no predicate gate (hash=0) and conservatively takes
|
||||
// the intersection of all strip bits — which here is HTML_ESCAPE.
|
||||
// the intersection of all strip bits, which here is HTML_ESCAPE.
|
||||
let joined = &existing[0];
|
||||
assert_eq!(joined.path_predicate_hash, 0);
|
||||
assert!(matches!(
|
||||
|
|
@ -3626,11 +3626,11 @@ fn cf4_union_param_return_paths_by_index() {
|
|||
#[test]
|
||||
fn cf4_ssa_summary_fits_arity_keeps_out_of_range_path_idx_at_original_key() {
|
||||
// A path whose param index exceeds the key's arity is treated as a
|
||||
// synthetic external-capture artefact (audit gap A.2.1.G1 — see
|
||||
// synthetic external-capture artefact (audit gap A.2.1.G1, see
|
||||
// `project_typed_callgraph_audit_gap_ssa_disambig.md`). When no
|
||||
// existing entry sits at the key, `insert_ssa` keeps the (untrimmed)
|
||||
// summary at the original key so the SSA FuncKey stays aligned with
|
||||
// the matching FuncSummary FuncKey — Phase 3's
|
||||
// the matching FuncSummary FuncKey, the analysis's
|
||||
// `summaries.get_ssa(caller_key)` lookup (consuming
|
||||
// `typed_call_receivers`) depends on this alignment.
|
||||
let bad = SsaFuncSummary {
|
||||
|
|
@ -3641,7 +3641,7 @@ fn cf4_ssa_summary_fits_arity_keeps_out_of_range_path_idx_at_original_key() {
|
|||
lang: Lang::Rust,
|
||||
namespace: "test.rs".into(),
|
||||
name: "helper".into(),
|
||||
arity: Some(2), // too small for idx 5 — synthetic-Param marker
|
||||
arity: Some(2), // too small for idx 5, synthetic-Param marker
|
||||
..Default::default()
|
||||
};
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
|
@ -3730,9 +3730,9 @@ fn cf6_ssa_summary_fits_arity_keeps_out_of_range_points_to_idx_at_original_key()
|
|||
assert_eq!(kept.points_to.max_param_index(), Some(7));
|
||||
}
|
||||
|
||||
/// Phase 4 (typed call-graph devirtualisation): two `findById`
|
||||
/// two `findById`
|
||||
/// definitions on different containers must remain structurally
|
||||
/// disjoint after [`merge_summaries`] — no cap union may leak
|
||||
/// disjoint after [`merge_summaries`], no cap union may leak
|
||||
/// across them. The FuncKey identity model already keys on
|
||||
/// `(lang, namespace, container, name, arity, ...)` so this is
|
||||
/// supposed to be true today; the test pins it down so a future
|
||||
|
|
@ -3741,7 +3741,7 @@ fn cf6_ssa_summary_fits_arity_keeps_out_of_range_points_to_idx_at_original_key()
|
|||
/// Concretely: `Repository::findById` is parameterised (no
|
||||
/// `SQL_QUERY` sink cap), `UnsafeCache::findById` runs a string-
|
||||
/// concatenated query (carries `Cap::SQL_QUERY`). After merge,
|
||||
/// each FuncKey must own only its own caps — Repository must NOT
|
||||
/// each FuncKey must own only its own caps, Repository must NOT
|
||||
/// inherit Cache's `SQL_QUERY` bit.
|
||||
#[test]
|
||||
fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
||||
|
|
@ -3777,7 +3777,7 @@ fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
|||
|
||||
let gs = merge_summaries(vec![safe_repo, unsafe_cache], None);
|
||||
|
||||
// Two distinct keys must coexist — no merge collision.
|
||||
// Two distinct keys must coexist, no merge collision.
|
||||
let repo_key = FuncKey {
|
||||
lang: Lang::Rust,
|
||||
namespace: "src/repo.rs".into(),
|
||||
|
|
@ -3798,7 +3798,7 @@ fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
|||
let repo_sum = gs.get(&repo_key).expect("Repository::findById missing");
|
||||
let cache_sum = gs.get(&cache_key).expect("UnsafeCache::findById missing");
|
||||
|
||||
// Sink caps stay on their own owner — the whole point of
|
||||
// Sink caps stay on their own owner, the whole point of
|
||||
// devirtualisation. Repository must not have inherited the
|
||||
// SQL_QUERY bit from UnsafeCache.
|
||||
assert_eq!(
|
||||
|
|
@ -3812,7 +3812,7 @@ fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
|||
Cap::SQL_QUERY.bits(),
|
||||
"UnsafeCache::findById lost its own sink cap during merge"
|
||||
);
|
||||
// Same invariant on tainted_sink_params — must not bleed across.
|
||||
// Same invariant on tainted_sink_params, must not bleed across.
|
||||
assert!(
|
||||
repo_sum.tainted_sink_params.is_empty(),
|
||||
"Repository::findById inherited tainted_sink_params from UnsafeCache: {:?}",
|
||||
|
|
@ -3821,7 +3821,7 @@ fn cross_file_devirt_does_not_union_unrelated_findbyids() {
|
|||
assert_eq!(cache_sum.tainted_sink_params, vec![0]);
|
||||
}
|
||||
|
||||
// ── Phase 6 hierarchy fan-out at runtime resolution ────────────────────
|
||||
// ── the analysis ────────────────────
|
||||
//
|
||||
// `GlobalSummaries::resolve_callee_widened` is the runtime counterpart of
|
||||
// the call-graph builder's `TypeHierarchyIndex::resolve_with_hierarchy`.
|
||||
|
|
@ -3855,7 +3855,7 @@ mod hierarchy_widened_tests {
|
|||
(key, summary)
|
||||
}
|
||||
|
||||
/// A1 — no hierarchy installed. Widening collapses to today's
|
||||
/// A1, no hierarchy installed. Widening collapses to today's
|
||||
/// single-result behaviour: one key in / one key out.
|
||||
#[test]
|
||||
fn widened_without_hierarchy_returns_single_resolved() {
|
||||
|
|
@ -3877,7 +3877,7 @@ mod hierarchy_widened_tests {
|
|||
assert_eq!(widened, vec![k]);
|
||||
}
|
||||
|
||||
/// A2 — hierarchy installed but the receiver type has no recorded
|
||||
/// A2, hierarchy installed but the receiver type has no recorded
|
||||
/// sub-types. Falls through to today's single-result behaviour.
|
||||
#[test]
|
||||
fn widened_no_subtypes_returns_single() {
|
||||
|
|
@ -3899,7 +3899,7 @@ mod hierarchy_widened_tests {
|
|||
assert_eq!(widened, vec![k]);
|
||||
}
|
||||
|
||||
/// A3 — hierarchy with one sub-type implementer. Widening returns
|
||||
/// A3, hierarchy with one sub-type implementer. Widening returns
|
||||
/// both the direct receiver match and the sub-type's match.
|
||||
#[test]
|
||||
fn widened_one_subtype_returns_two_keys() {
|
||||
|
|
@ -3938,14 +3938,14 @@ mod hierarchy_widened_tests {
|
|||
assert!(widened.contains(&k_impl));
|
||||
}
|
||||
|
||||
/// A4 — hierarchy with multiple sub-types: every implementer's
|
||||
/// A4, hierarchy with multiple sub-types: every implementer's
|
||||
/// matching method is in the result, deduplicated.
|
||||
#[test]
|
||||
fn widened_multiple_subtypes_returns_all() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
// Three impls + one interface. The interface itself has no
|
||||
// body so we omit a method on it (that is the more common
|
||||
// shape — a pure interface plus concrete classes).
|
||||
// shape, a pure interface plus concrete classes).
|
||||
let edges = vec![
|
||||
("FileLogger".to_string(), "ILogger".to_string()),
|
||||
("NetLogger".to_string(), "ILogger".to_string()),
|
||||
|
|
@ -3984,7 +3984,7 @@ mod hierarchy_widened_tests {
|
|||
assert!(widened.contains(&k_std));
|
||||
}
|
||||
|
||||
/// A5 — the arity filter must apply across the whole fan-out, not
|
||||
/// A5, the arity filter must apply across the whole fan-out, not
|
||||
/// just the direct-receiver leg. An implementer with a different
|
||||
/// arity must not leak into the result.
|
||||
#[test]
|
||||
|
|
@ -4013,10 +4013,10 @@ mod hierarchy_widened_tests {
|
|||
assert_eq!(widened, vec![k_one], "arity-2 impl must be filtered out");
|
||||
}
|
||||
|
||||
/// A6 — fan-out is bounded at `MAX_HIERARCHY_FANOUT`. Build a
|
||||
/// A6, fan-out is bounded at `MAX_HIERARCHY_FANOUT`. Build a
|
||||
/// hierarchy with more impls than the cap allows and assert the
|
||||
/// result is exactly capped (and that early impls are preserved
|
||||
/// — the cap drops the *tail*, not the head).
|
||||
///, the cap drops the *tail*, not the head).
|
||||
#[test]
|
||||
fn widened_caps_at_max_hierarchy_fanout() {
|
||||
let cap = GlobalSummaries::MAX_HIERARCHY_FANOUT;
|
||||
|
|
@ -4030,7 +4030,7 @@ mod hierarchy_widened_tests {
|
|||
.map(|i| (format!("Impl{i:02}"), "IBase".to_string()))
|
||||
.collect();
|
||||
|
||||
// Carrier — first impl carries every edge so the index is
|
||||
// Carrier, first impl carries every edge so the index is
|
||||
// populated in one shot.
|
||||
let (k0, s0) = java_method("src/impl00.java", "Impl00", "run", 0, 0x01, edges);
|
||||
gs.insert(k0.clone(), s0);
|
||||
|
|
@ -4065,18 +4065,18 @@ mod hierarchy_widened_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// A7 — when hierarchy widening produces no candidates AND the
|
||||
/// A7, when hierarchy widening produces no candidates AND the
|
||||
/// receiver_type lookup is authoritative (Step 1), the secondary
|
||||
/// fall-through goes through `resolve_callee` which returns
|
||||
/// Ambiguous/NotFound rather than silently picking an unrelated
|
||||
/// leaf — exactly the "subset of today's targets, never a
|
||||
/// leaf, exactly the "subset of today's targets, never a
|
||||
/// superset" rule. Test asserts the empty result is preserved.
|
||||
#[test]
|
||||
fn widened_empty_does_not_silently_pick_unrelated_leaf() {
|
||||
let mut gs = GlobalSummaries::new();
|
||||
// Edge: IUnused has a sub Used, but neither declares
|
||||
// `something`. An unrelated free function `something` exists
|
||||
// in the same namespace — under today's authoritative
|
||||
// in the same namespace, under today's authoritative
|
||||
// receiver_type rules, that function MUST NOT be picked when
|
||||
// the call is annotated with receiver_type "IUnused".
|
||||
let edges = vec![("Used".to_string(), "IUnused".to_string())];
|
||||
|
|
@ -4104,7 +4104,7 @@ mod hierarchy_widened_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// A7b — when hierarchy widening produces nothing AND today's
|
||||
/// A7b, when hierarchy widening produces nothing AND today's
|
||||
/// `resolve_callee` *does* resolve (no receiver_type, just bare
|
||||
/// leaf or qualifier hint), the fallback returns the single key.
|
||||
/// This pins the secondary-fallback contract on the path where it
|
||||
|
|
@ -4131,7 +4131,7 @@ mod hierarchy_widened_tests {
|
|||
assert_eq!(widened, vec![k_free]);
|
||||
}
|
||||
|
||||
/// A8 — receiver_type is None → no widening; behaves identically
|
||||
/// A8, receiver_type is None → no widening; behaves identically
|
||||
/// to `resolve_callee` (single-result wrap).
|
||||
#[test]
|
||||
fn widened_no_receiver_type_collapses_to_resolve_callee() {
|
||||
|
|
@ -4153,7 +4153,7 @@ mod hierarchy_widened_tests {
|
|||
assert_eq!(widened, vec![k_free]);
|
||||
}
|
||||
|
||||
/// A9 — `merge()` must invalidate the cached hierarchy index so a
|
||||
/// A9, `merge()` must invalidate the cached hierarchy index so a
|
||||
/// post-merge call to `resolve_callee_widened` doesn't look up a
|
||||
/// stale view. Since `install_hierarchy` is required after merges,
|
||||
/// the test asserts: post-merge, before reinstall, fan-out must
|
||||
|
|
@ -4180,7 +4180,7 @@ mod hierarchy_widened_tests {
|
|||
});
|
||||
assert_eq!(pre_merge.len(), 2);
|
||||
|
||||
// Merge in an empty `gs_b` — should invalidate the cached
|
||||
// Merge in an empty `gs_b`, should invalidate the cached
|
||||
// hierarchy.
|
||||
gs_a.merge(GlobalSummaries::new());
|
||||
assert!(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue