Python fp and docs updtes (#58)

* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
2026-06-24 20:28:06 +02:00 · 2026-04-29 19:53:34 -04:00 · 2026-04-29 19:53:34 -04:00 · a438886217
commit a438886217
parent 4db0805de6
291 changed files with 9485 additions and 3851 deletions
--- a/src/taint/backwards.rs
+++ b/src/taint/backwards.rs
@ -3,7 +3,7 @@
 //! The forward taint engine (`ssa_transfer.rs`) proceeds source-to-sink,
 //! spending analysis budget on every function the source might touch.  Its
 //! precision ceiling is fixed by what summaries + inline re-analysis can
-//! preserve on every edge of a flow — a single lossy edge drops the finding.
+//! preserve on every edge of a flow, a single lossy edge drops the finding.
 //!
 //! This module implements the opposite direction: start at each sink value,
 //! walk *reverse* SSA edges and (when needed) cross-file callee bodies on
@ -16,7 +16,7 @@
 //!   reaches a matching source, we append `backwards-confirmed` to the
 //!   finding's evidence notes.
 //! * When the backwards walk proves the flow infeasible via accumulated
-//!   path predicates, we append `backwards-infeasible` — consumed by the
+//!   path predicates, we append `backwards-infeasible`, consumed by the
 //!   confidence scorer as a cap-to-Low signal.
 //! * Backward flows that reach a source with no matching forward finding
 //!   become standalone `taint-backwards-flow` diags (a separate rule id so
@ -63,7 +63,7 @@ pub const MAX_BACKWARDS_CALLEE_BLOCKS: usize = 500;
 /// the finding, and which predicate evidence (if any) has been gathered so
 /// far.
 ///
-/// `caps` is monotone — the walk can only narrow the demand (by proving
+/// `caps` is monotone, the walk can only narrow the demand (by proving
 /// operands validated or sanitized against specific capability bits), never
 /// widen it.  This keeps backwards composition with summary-derived
 /// transforms sound.
@ -140,7 +140,7 @@ pub const MAX_CHAIN_LEN: usize = 16;
 /// The context is intentionally narrow: it borrows from whatever analysis
 /// objects the caller has already prepared (summaries, the current body,
 /// cross-file body maps) and does not build its own.  This keeps the
-/// backwards pass cheap to enable — when off, none of this code is touched.
+/// backwards pass cheap to enable, when off, none of this code is touched.
 pub struct BackwardsCtx<'a> {
    /// Callee's SSA body.
    pub ssa: &'a SsaBody,
@ -178,7 +178,7 @@ impl<'a> BackwardsCtx<'a> {

 /// One step of the backwards transfer: given a demand on `value`, compute
 /// the demand on its immediate SSA operands.  Returns the list of
-/// `(operand, demand)` pairs — possibly empty if the defining op terminates
+/// `(operand, demand)` pairs, possibly empty if the defining op terminates
 /// the walk (Source/Const/Param).
 ///
 /// This is a pure function over the op and demand; cycle detection and
@ -224,7 +224,7 @@ pub fn backward_transfer(
        SsaOp::CatchParam => (BackwardStep::ReachedCatchParam, SmallVec::new()),
        SsaOp::Nop => (BackwardStep::Unknown, SmallVec::new()),
        // Undef is a phi-operand sentinel on edges with no reaching
-        // definition — nothing to trace backwards through.
+        // definition, nothing to trace backwards through.
        SsaOp::Undef => (BackwardStep::ReachedConst, SmallVec::new()),
        SsaOp::Phi(operands) => {
            // Demand fans out to every incoming value: the runtime value of
@ -254,7 +254,7 @@ pub fn backward_transfer(
            ..
        } => {
            // For Call ops the full demand transfer depends on callee
-            // metadata (summary or body).  The driver handles that —
+            // metadata (summary or body).  The driver handles that ,
            // return a `BackwardStep::Call` carrying the receiver + args
            // so the driver can consult [`GlobalSummaries`] / bodies_by_key.
            let mut flat: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new();
@ -276,7 +276,7 @@ pub fn backward_transfer(
        SsaOp::FieldProj { receiver, .. } => {
            // Field projection: demand for `obj.f` flows to `obj`.  Treated
            // structurally like a single-operand Assign for the backwards
-            // walk — sufficient until Phase 4 introduces field-sensitive
+            // walk, sufficient until future passes will introduce field-sensitive
            // demand discrimination.
            let mut next: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new();
            next.push((*receiver, demand.clone()));
@ -290,12 +290,12 @@ pub fn backward_transfer(
 /// resolution.
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum BackwardStep {
-    /// Defining op is a tainted [`SsaOp::Source`] — walk terminates with a
+    /// Defining op is a tainted [`SsaOp::Source`], walk terminates with a
    /// confirmed flow.
    ReachedSource(NodeIndex),
-    /// Defining op is a [`SsaOp::Const`] — walk terminates without a source.
+    /// Defining op is a [`SsaOp::Const`], walk terminates without a source.
    ReachedConst,
-    /// Defining op is an [`SsaOp::Param`] / [`SsaOp::SelfParam`] — walk may
+    /// Defining op is an [`SsaOp::Param`] / [`SsaOp::SelfParam`], walk may
    /// continue by resolving the parameter against the caller's arguments
    /// (requires reverse call-graph expansion, which is out of scope for
    /// the current cut and is handled as a terminal step).
@ -305,13 +305,13 @@ pub enum BackwardStep {
    /// the actual exception source requires exception-edge traversal not
    /// performed here.
    ReachedCatchParam,
-    /// Phi node — driver fans out to predecessors.
+    /// Phi node, driver fans out to predecessors.
    Phi,
-    /// Arithmetic / copy / cast — driver fans out to operands.
+    /// Arithmetic / copy / cast, driver fans out to operands.
    Assign,
-    /// Call op — driver consults summaries and/or callee bodies.
+    /// Call op, driver consults summaries and/or callee bodies.
    Call { callee: String },
-    /// Defining op could not be located or was a [`SsaOp::Nop`] — walk
+    /// Defining op could not be located or was a [`SsaOp::Nop`], walk
    /// terminates as inconclusive.
    Unknown,
 }
@ -321,7 +321,7 @@ pub enum BackwardStep {
 /// Walk backwards from `sink_value` in `ctx.ssa`, producing at most one
 /// [`BackwardFlow`] per reached source (phi fan-outs can produce multiple).
 ///
-/// Does not consult forward findings — the caller is responsible for
+/// Does not consult forward findings, the caller is responsible for
 /// matching the returned flows against its finding set.
 pub fn analyse_sink_backwards(
    ctx: &BackwardsCtx<'_>,
@ -385,7 +385,7 @@ fn walk_dfs(
    // Before dispatching on the SSA op kind, consult the defining CFG node's
    // label set.  Many Source-labelled callables in the CFG lower to an
    // `SsaOp::Call` rather than `SsaOp::Source` (request.args.get,
-    // os.getenv, …) — recognising the label here keeps the walk in
+    // os.getenv, …), recognising the label here keeps the walk in
    // sync with the forward engine's source model.
    let def_cfg_node = ctx.ssa.def_of(value).cfg_node;
    if def_cfg_node.index() < ctx.cfg.node_count() {
@ -429,7 +429,7 @@ fn walk_dfs(
            });
        }
        BackwardStep::ReachedConst => {
-            // Constants never supply taint — treat as a silent prune.
+            // Constants never supply taint, treat as a silent prune.
        }
        BackwardStep::ReachedParam { index: _, node } => {
            // Reverse-call-graph expansion is intentionally left out of the
@ -452,7 +452,7 @@ fn walk_dfs(
            });
        }
        BackwardStep::ReachedCatchParam => {
-            // Exception-borne taint — record but don't confirm.  Marked
+            // Exception-borne taint, record but don't confirm.  Marked
            // non-confirmatory so unit tests can distinguish "walk reached
            // catch-param" from "walk reached source".
        }
@ -514,7 +514,7 @@ fn walk_dfs(
                    }
                }
                // Prevent an unused-variable warning while still accepting
-                // the key in the matcher — the key is useful for debug
+                // the key in the matcher, the key is useful for debug
                // logging in bigger expansions.
                let _ = callee_key;
                return;
@ -539,7 +539,7 @@ fn walk_dfs(
            }
        }
        BackwardStep::Unknown => {
-            // No information — terminate silently.
+            // No information, terminate silently.
        }
    }
 }
@ -632,12 +632,12 @@ pub const NOTE_BUDGET: &str = "backwards-budget-exhausted";
 /// Classification for a forward finding after backwards post-processing.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum FindingVerdict {
-    /// Backwards reached a matching source — finding corroborated.
+    /// Backwards reached a matching source, finding corroborated.
    Confirmed,
    /// Backwards was inconclusive (no source, not infeasible).  Finding
    /// keeps its forward-assigned confidence.
    Inconclusive,
-    /// Backwards proved the flow infeasible — finding confidence must drop.
+    /// Backwards proved the flow infeasible, finding confidence must drop.
    Infeasible,
    /// Budget exhausted before a verdict was reached.
    BudgetExhausted,
@ -658,7 +658,7 @@ pub fn aggregate_verdict(flows: &[BackwardFlow]) -> FindingVerdict {
 }

 /// Apply a verdict as a note on a [`Finding`].  No-ops when the verdict is
-/// [`FindingVerdict::Inconclusive`] — the forward finding retains its
+/// [`FindingVerdict::Inconclusive`], the forward finding retains its
 /// original metadata.
 pub fn annotate_finding(finding: &mut Finding, verdict: FindingVerdict) {
    // `Finding` does not own an Evidence struct directly (that lives on
@ -1079,6 +1079,7 @@ mod tests {
            path_hash: 0,
            finding_id: String::new(),
            alternative_finding_ids: smallvec::SmallVec::new(),
+            effective_sink_caps: crate::labels::Cap::empty(),
        };
        annotate_finding(&mut f, FindingVerdict::Confirmed);
        let sv = f.symbolic.as_ref().expect("symbolic verdict created");
@ -1116,6 +1117,7 @@ mod tests {
            path_hash: 0,
            finding_id: String::new(),
            alternative_finding_ids: smallvec::SmallVec::new(),
+            effective_sink_caps: crate::labels::Cap::empty(),
        };
        annotate_finding(&mut f, FindingVerdict::Inconclusive);
        assert!(f.symbolic.is_none());
--- a/src/taint/domain.rs
+++ b/src/taint/domain.rs
@ -13,7 +13,7 @@ pub struct VarTaint {
    pub uses_summary: bool,
 }

-/// A single taint origin — the node and classification of where taint came from.
+/// A single taint origin, the node and classification of where taint came from.
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 pub struct TaintOrigin {
    pub node: NodeIndex,
@ -30,7 +30,7 @@ pub struct TaintOrigin {
 /// # Capacity limit
 ///
 /// `SmallBitSet` is a fixed-size 64-slot bitset backed by a single `u64`.
-/// Inserting a `SymbolId` with ordinal ≥ 64 is a no-op — the bit is silently
+/// Inserting a `SymbolId` with ordinal ≥ 64 is a no-op, the bit is silently
 /// dropped. This is a deliberate precision-over-completeness trade: the
 /// bitset underpins predicate / validation tracking in the SSA taint engine,
 /// and functions with more than 64 distinct predicate-relevant variables are
--- a/src/taint/mod.rs
+++ b/src/taint/mod.rs
@ -1,4 +1,5 @@
 #![allow(clippy::collapsible_if, clippy::too_many_arguments)]
+#![doc = include_str!(concat!(env!("OUT_DIR"), "/taint.md"))]

 pub mod backwards;
 pub mod domain;
@ -84,7 +85,7 @@ fn js_ts_pass2_cap() -> usize {
 //
 // Active only when the slot is `Some`.  Production code path leaves it
 // `None`, making instrumentation cost a single thread-local borrow + a
-// `match Option::None` per measured chunk — sub-nanosecond.
+// `match Option::None` per measured chunk, sub-nanosecond.
 thread_local! {
    static PERF_LOWER_TIMINGS: std::cell::Cell<Option<[u128; 7]>> =
        const { std::cell::Cell::new(None) };
@ -112,10 +113,10 @@ fn perf_lower_record(slot: usize, micros: u128) {

 /// Test-only override for the Gauss-Seidel toggle.  Values:
 ///
-/// * `0` — respect `NYX_JS_GAUSS_SEIDEL` env var (default production
+/// * `0`, respect `NYX_JS_GAUSS_SEIDEL` env var (default production
 ///   behaviour).
-/// * `1` — force Jacobi (env ignored).
-/// * `2` — force Gauss-Seidel (env ignored).
+/// * `1`, force Jacobi (env ignored).
+/// * `2`, force Gauss-Seidel (env ignored).
 ///
 /// Used exclusively by integration tests that need to assert both
 /// variants produce equal findings without per-test process isolation.
@ -209,7 +210,7 @@ pub struct Finding {
    /// The kind of source that originated the taint.
    pub source_kind: SourceKind,
    /// Whether all tainted sink variables are guarded by a validation
-    /// predicate on this path (metadata only — does not change severity).
+    /// predicate on this path (metadata only, does not change severity).
    pub path_validated: bool,
    /// The kind of validation guard protecting this path, if any.
    pub guard_kind: Option<PredicateKind>,
@ -233,7 +234,7 @@ pub struct Finding {
    /// sink was resolved via a function summary carrying a
    /// [`crate::summary::SinkSite`] with concrete coordinates for primary
    /// sink-location attribution.  `None` for:
-    /// * intra-procedural / label-based sinks — the caller's `cfg[sink]`
+    /// * intra-procedural / label-based sinks, the caller's `cfg[sink]`
    ///   span already names the dangerous instruction;
    /// * summary-resolved sinks whose `SinkSite` was cap-only (no tree or
    ///   bytes context at extraction time).
@ -245,7 +246,7 @@ pub struct Finding {
    /// the scan root is the file itself (every namespace normalizes to
    /// `""`); consumers resolve empty `file_rel` against the file under
    /// analysis.  Enforced at `ssa_events_to_findings` by a
-    /// `debug_assert!` — upstream filters drop cap-only sites before
+    /// `debug_assert!`, upstream filters drop cap-only sites before
    /// they reach this field.
    ///
    /// Deliberately independent of `uses_summary`: that flag tracks whether
@ -255,13 +256,13 @@ pub struct Finding {
    /// `primary_location`.
    pub primary_location: Option<SinkLocation>,
    /// Engine provenance notes recorded during the analysis that produced
-    /// this finding.  Populated when an internal budget/cap was hit — see
+    /// this finding.  Populated when an internal budget/cap was hit, see
    /// [`crate::engine_notes::EngineNote`].  Empty for the typical
    /// under-budget finding.
    pub engine_notes: SmallVec<[EngineNote; 2]>,
    /// Stable hash of the intermediate-variable sequence between `source`
    /// and `sink`.  Used to keep distinct paths through different
-    /// variables as separate findings during deduplication — two
+    /// variables as separate findings during deduplication, two
    /// `(body_id, sink, source)` siblings with different `path_hash`
    /// values represent flows along different data paths and are
    /// preserved as alternatives rather than collapsed.
@ -289,6 +290,13 @@ pub struct Finding {
    /// formatters can present them as "this flow … and N alternative
    /// path(s)" rather than silently dropping one.
    pub alternative_finding_ids: SmallVec<[String; 2]>,
+    /// Sink-cap mask that this specific finding fired against.  Carries the
+    /// per-event `sink_caps` from the multi-gate dispatch (e.g.
+    /// `Cap::SSRF` for a URL-flow finding on `fetch`, `Cap::DATA_EXFIL`
+    /// for a body-flow finding on the same call).  Used by `ast.rs` to
+    /// route the finding to a cap-specific rule id rather than the
+    /// generic `taint-unsanitised-flow` bucket.
+    pub effective_sink_caps: crate::labels::Cap,
 }

 impl Finding {
@ -425,7 +433,7 @@ pub(crate) fn analyse_file_with_lowered(

    // 3. Unified multi-body analysis with lexical containment propagation.
    //
-    // `max_iterations` is the safety cap, not an expected depth — the
+    // `max_iterations` is the safety cap, not an expected depth, the
    // pass-2 loop breaks on seed equality (monotone lattice, finite
    // height) and only rides the cap when convergence legitimately
    // needs more rounds than the cap allows.  See
@ -481,7 +489,7 @@ pub(crate) fn analyse_file_with_lowered(
    //        dedup_by_key(|f| (body_id, sink, source));
    //
    //    which silently collapsed an *unguarded* flow reaching the same
-    //    `(sink, source)` as a guarded flow — the `!path_validated` sort
+    //    `(sink, source)` as a guarded flow, the `!path_validated` sort
    //    ordered `path_validated == true` first, so the exploitable
    //    branch was the one that got dropped.
    //
@ -541,7 +549,7 @@ fn make_finding_id(f: &Finding) -> String {
 /// Cross-link findings that share `(body_id, sink, source)` but differ
 /// on `path_validated` or `path_hash`.  After this call each such
 /// finding's `alternative_finding_ids` lists every sibling's
-/// [`Finding::finding_id`] — so a guarded flow links to the unguarded
+/// [`Finding::finding_id`], so a guarded flow links to the unguarded
 /// sibling and vice versa.  Isolated findings (no sibling) get an
 /// empty list.
 fn link_alternative_paths(findings: &mut [Finding]) {
@ -576,7 +584,7 @@ fn link_alternative_paths(findings: &mut [Finding]) {
 /// Compute containment-topological order: parent bodies before children.
 ///
 /// Uses BFS from roots (bodies with no parent), ensuring a body is always
-/// processed after its parent — required for lexical seed propagation.
+/// processed after its parent, required for lexical seed propagation.
 /// Returns indices into `file_cfg.bodies` in processing order.
 fn containment_order(bodies: &[BodyCfg]) -> Vec<usize> {
    let mut children: HashMap<BodyId, Vec<usize>> = HashMap::new();
@ -637,7 +645,7 @@ fn analyse_body_with_seed(
    // Per-body graphs contain only the body's own nodes.
    // For non-toplevel bodies, use lower_to_ssa_with_params with scope to
    // create SsaOp::Param ops for external/captured variables and formal
-    // parameters — required for global_seed to inject taint from the parent.
+    // parameters, required for global_seed to inject taint from the parent.
    // Top-level bodies use lower_to_ssa with scope_all=true (no Param ops).
    let is_toplevel = body.meta.parent_body_id.is_none();
    // JS/TS function bodies always use scoped lowering to create Param ops
@ -708,12 +716,9 @@ fn analyse_body_with_seed(
            } else {
                Some(static_map)
            };
-            // Pointer-Phase 3 / W1+W2+W3: per-body field-sensitive points-to
-            // facts.  Computed only when `NYX_POINTER_ANALYSIS=1`; the
-            // per-body `analyse_body` cost is amortised across the three
-            // hooks (W1 field-write read-back, W2 container ELEM cells,
-            // W3 cross-call resolver).  Strict-additive: `None` keeps
-            // pointer-disabled behaviour bit-identical.
+            // Per-body field-sensitive points-to facts. Cost is
+            // amortised across field-write read-back, container ELEM
+            // cells, and the cross-call resolver.
            let pointer_facts = if crate::pointer::is_enabled() {
                Some(crate::pointer::analyse_body(&ssa_body, body.meta.id))
            } else {
@ -836,7 +841,7 @@ fn analyse_body_with_seed(
        Err(e) => {
            // SSA lowering produced no analyzable body.  We still surface
            // the event so downstream tooling can tell "we tried and gave
-            // up" from "we ran clean" — a TRACE-level log records the
+            // up" from "we ran clean", a TRACE-level log records the
            // reason (no synthetic Finding is manufactured because a
            // diag pointing at no source location would be misleading).
            tracing::trace!(
@ -948,7 +953,7 @@ fn analyse_multi_body(
        let top_cfg = &top.graph;

        // Collect top-level binding keys for seed filtering.  Always
-        // keyed under `BodyId(0)` — `filter_seed_to_toplevel` matches
+        // keyed under `BodyId(0)`, `filter_seed_to_toplevel` matches
        // by name and re-keys every surviving entry to `BodyId(0)`
        // anyway, so the body_id on the probe keys is informational.
        let toplevel_keys: HashSet<ssa_transfer::BindingKey> = {
@ -969,7 +974,7 @@ fn analyse_multi_body(
        // re-analysis when a name it reads via Param or via the
        // global_seed ancestor-lookup path has actually changed in
        // the combined seed.  `reads` is a superset of the body's
-        // top-level dependencies — we err on the side of over-running
+        // top-level dependencies, we err on the side of over-running
        // (false dirty) rather than missing a dependency.
        let body_reads: HashMap<BodyId, HashSet<String>> = {
            let mut m: HashMap<BodyId, HashSet<String>> = HashMap::new();
@ -1060,7 +1065,7 @@ fn analyse_multi_body(

            // Re-run non-toplevel bodies with updated seed.
            body_exit_states.insert(BodyId(0), current_seed.clone());
-            // Phase-C: Gauss-Seidel variant — as each body is
+            // Phase-C: Gauss-Seidel variant, as each body is
            // re-analysed, merge its new exit into `current_seed`
            // immediately so subsequent bodies in the same round see
            // the fresh value.  Order matters here; we pin to
@ -1137,7 +1142,7 @@ fn analyse_multi_body(

    // Record observability counter.  `iters_used == 0` covers the
    // non-JS/TS path (`max_iterations == 1`) and the JS/TS case where
-    // the convergence loop did not enter — report `1` so the counter
+    // the convergence loop did not enter, report `1` so the counter
    // always reflects "at least the lexical-containment pass ran".
    let reported_iters = if iters_used == 0 { 1 } else { iters_used };
    LAST_JS_TS_PASS2_ITERATIONS.store(reported_iters, Ordering::Relaxed);
@ -1287,7 +1292,7 @@ fn lookup_formal_params(local_summaries: &FuncSummaries, func_name: &str) -> Vec
 /// When exactly one `(name, arity)`-matching entry exists we use its full
 /// identity (container / disambig / kind preserved).  When zero or multiple
 /// match we fall back to a free-function key so the caller still has a
-/// well-formed key — this can only happen in legacy discovery paths that
+/// well-formed key, this can only happen in legacy discovery paths that
 /// cannot see through same-name siblings, and those paths were already
 /// collision-prone before this refactor.  New intra-file analysis code
 /// should prefer [`BodyMeta::func_key`].
@ -1300,7 +1305,7 @@ fn lookup_canonical_func_key(
 ) -> FuncKey {
    // `local_summaries` is file-local, so every entry's namespace agrees with
    // whatever `build_cfg` wrote (raw file path). We match by lang + name +
-    // arity and fall back to name-only — the caller's `namespace` argument is
+    // arity and fall back to name-only, the caller's `namespace` argument is
    // only used when we have to synthesise a key as a last resort.
    let mut matches = local_summaries
        .keys()
@ -1372,7 +1377,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
                .count()
        };

-        // Zero-param helpers are normally elided — a fixture with no
+        // Zero-param helpers are normally elided, a fixture with no
        // parameters cannot carry per-parameter taint transforms.  But
        // zero-arg factories (`function makeBag() { return []; }`) do
        // have one observable cross-file effect: the return is a fresh
@ -1409,7 +1414,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
        // must survive this filter so summary application at cross-file
        // call sites can replay the alias edges.  Zero-param factories
        // are kept via the `returns_fresh_alloc` leg of
-        // `points_to.is_empty()` — `is_empty()` returns false when the
+        // `points_to.is_empty()`, `is_empty()` returns false when the
        // fresh-alloc flag is set.
        if !summary.param_to_return.is_empty()
            || !summary.param_to_sink.is_empty()
@ -1436,7 +1441,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
 }

 /// Lower all function bodies from `FileCfg` to produce SSA summaries + cached
-/// bodies.  Each body's own graph is used directly — no scope filtering needed.
+/// bodies.  Each body's own graph is used directly, no scope filtering needed.
 ///
 /// Both returned maps are keyed by each body's canonical [`FuncKey`] (carried
 /// on [`crate::cfg::BodyMeta::func_key`]).  This is the most collision-
@ -1503,7 +1508,7 @@ pub(crate) fn lower_all_functions_from_bodies(
        // `build_cfg` wrote. The caller passes `namespace` already normalized
        // against `scan_root`, which is what FuncSummary keys use on the
        // cross-file side (`FuncSummary::func_key`). Overriding the namespace
-        // here keeps both sides of `GlobalSummaries` agreement — otherwise
+        // here keeps both sides of `GlobalSummaries` agreement, otherwise
        // `resolve_callee` resolves to the normalized FuncSummary key and
        // misses the raw-path SSA entry.
        let mut key = body.meta.func_key.clone().unwrap_or_else(|| {
@ -1542,7 +1547,7 @@ pub(crate) fn lower_all_functions_from_bodies(

            // Always insert the summary, even when all fields are empty/default.
            // An empty summary tells resolve_callee "this function exists and has
-            // no taint effects" — preventing fallthrough to the less precise old
+            // no taint effects", preventing fallthrough to the less precise old
            // FuncSummary which may report false source_caps from internal sources.
            // For zero-param functions we only insert when the summary carries
            // the fresh-container signal (the only observable effect worth
@ -1563,34 +1568,23 @@ pub(crate) fn lower_all_functions_from_bodies(
        perf_lower_record(2, _t_opt.elapsed().as_micros());

        let _t_typed = std::time::Instant::now();
-        // Phase 2 (typed call-graph devirtualisation): walk every SSA
-        // method call in this body, look up the receiver SSA value's
-        // [`crate::ssa::type_facts::TypeKind`] in the just-computed
-        // `opt.type_facts`, and record `(call_ordinal, container_name)`
-        // on the matching summary so Phase 3 in `build_call_graph` can
-        // narrow the indirect-method-call edge to the receiver-typed
-        // container.  Free-function calls (`receiver: None`) and
-        // unknown receiver types are silently skipped — the bare-name
-        // resolution path applies unchanged in that case.
+        // For every SSA method call, look up the receiver's TypeKind
+        // and record `(call_ordinal, container_name)` so devirtualisation
+        // in `build_call_graph` can narrow the edge to the receiver-typed
+        // container. Free-function calls and unknown types fall back to
+        // bare-name resolution.
        let typed_receivers = collect_typed_call_receivers(&func_ssa, &body.graph, &opt.type_facts);
        if !typed_receivers.is_empty() {
-            // The summary may not have been inserted above (zero-param,
-            // no-fresh-alloc bodies are skipped).  Force-insert in that
-            // case so the receiver-type info reaches Phase 3 — without
-            // it, the cross-file devirtualisation signal would be lost
-            // for any method invoked inside a parameterless caller.
+            // Zero-param/no-fresh-alloc bodies are skipped above;
+            // force-insert so receiver-type info still reaches
+            // build_call_graph.
            let entry = summaries.entry(key.clone()).or_default();
            entry.typed_call_receivers = typed_receivers;
        }

-        // Pointer-Phase 5 / W3: populate `field_points_to` from the
-        // body's pointer facts when the analysis is enabled.  Strict
-        // opt-in via `NYX_POINTER_ANALYSIS=1`; off-by-default keeps
-        // bit-for-bit identity with the pre-W3 behaviour.
-        //
-        // `extract_field_points_to` covers both reads (via
-        // `SsaOp::FieldProj` walks) and writes (via the W1
-        // `field_writes` side-table on the body) in a single pass.
+        // Populate `field_points_to` from the body's pointer facts.
+        // `extract_field_points_to` covers both reads (FieldProj walks)
+        // and writes (`field_writes` side-table) in one pass.
        if crate::pointer::is_enabled() {
            let facts = crate::pointer::analyse_body(&func_ssa, body.meta.id);
            let fpt = crate::pointer::extract_field_points_to(&func_ssa, &facts);
@ -1621,7 +1615,7 @@ pub(crate) fn lower_all_functions_from_bodies(
    // Lift child-body sinks into the parent's `param_to_sink` for
    // every parent body with lexically contained children. This
    // handles the direct-wrapper case
-    // `f(x) { return new Promise((res, rej) => sink(x)) }` — the
+    // `f(x) { return new Promise((res, rej) => sink(x)) }`, the
    // executor's gated http.get sink becomes visible to callers of
    // `f` via `f.summary.param_to_sink`.
    //
@ -1635,8 +1629,8 @@ pub(crate) fn lower_all_functions_from_bodies(
    // propagation at summary-extraction time so cross-call
    // resolution sees the sink at every caller of `f`.
    //
-    // Strict-additive: only ADDs `param_to_sink` entries — never
-    // removes or modifies existing data — so it cannot regress
+    // Strict-additive: only ADDs `param_to_sink` entries, never
+    // removes or modifies existing data, so it cannot regress
    // detection. Bounded: each parent-param probe runs each child
    // body's analysis exactly once.
    let _t_aug = std::time::Instant::now();
@ -1665,7 +1659,7 @@ pub(crate) fn lower_all_functions_from_bodies(
    // OR-merge: only adds `param_to_sink` / `param_to_sink_param`
    // entries to existing summaries. Existing entries (return
    // transforms, source caps, augment-populated sinks, etc.) are
-    // preserved. Strict-additive — cannot regress detection.
+    // preserved. Strict-additive, cannot regress detection.
    let _t_rerun = std::time::Instant::now();
    rerun_extraction_with_augmented_summaries(
        file_cfg,
@ -1919,7 +1913,7 @@ fn augment_summaries_with_child_sinks(
        let parent_interner = crate::state::symbol::SymbolInterner::from_cfg(parent_cfg);

        // Collect (formal_param_idx, var_name, ssa_value) for the parent's
-        // formal params — mirrors `extract_ssa_func_summary`'s param scan.
+        // formal params, mirrors `extract_ssa_func_summary`'s param scan.
        let mut parent_param_info: Vec<(usize, String)> = Vec::new();
        for block in &parent_ssa.blocks {
            for inst in block.phis.iter().chain(block.body.iter()) {
@ -2055,7 +2049,7 @@ fn augment_summaries_with_child_sinks(
                }

                // Aggregate sink caps across all child events into one
-                // entry per parent param (cap-only SinkSite — the
+                // entry per parent param (cap-only SinkSite, the
                // exact location lives in the child body's CFG and is
                // not directly addressable from the parent's summary).
                let mut union_caps = Cap::empty();
@ -2088,7 +2082,7 @@ fn augment_summaries_with_child_sinks(
                // engine's primary sink-site picker uses
                // `param_to_sink_param` for arg-position filtering)
                // sees this captured-flow sink. Position 0 is a
-                // best-effort placeholder — the actual filtering at
+                // best-effort placeholder, the actual filtering at
                // the caller is by SSRF cap, not arg position, when
                // the wrapper is itself non-gated.
                if !entry
@ -2109,7 +2103,7 @@ fn augment_summaries_with_child_sinks(
 /// non-empty [`crate::ssa::type_facts::TypeKind::container_name`].
 ///
 /// Free-function calls (`receiver: None`) and unknown receiver types
-/// are skipped — the cross-file call-graph builder will fall back to
+/// are skipped, the cross-file call-graph builder will fall back to
 /// today's name-only resolution for those, preserving the
 /// "subset of today's targets, never a superset" invariant from
 /// `docs/typed-call-graph-prompt.md`.
@ -2135,13 +2129,13 @@ fn collect_typed_call_receivers(
                continue;
            };
            let Some(receiver_val) = receiver else {
-                continue; // free-function call — no devirtualisation possible
+                continue; // free-function call, no devirtualisation possible
            };
            let Some(kind) = type_facts.get_type(*receiver_val) else {
-                continue; // type unknown — fall back to name-only resolution
+                continue; // type unknown, fall back to name-only resolution
            };
            let Some(container) = kind.container_name() else {
-                continue; // scalar/unknown type — no useful container
+                continue; // scalar/unknown type, no useful container
            };
            let Some(node_info) = cfg.node_weight(inst.cfg_node) else {
                continue;
@ -2150,7 +2144,7 @@ fn collect_typed_call_receivers(
            // A single SSA call instruction maps 1:1 with a CFG call
            // node, so each ordinal should appear at most once.  The
            // dedup guard exists in case lowering ever introduces a
-            // second SSA Call sharing a cfg_node — first wins.
+            // second SSA Call sharing a cfg_node, first wins.
            if !seen.insert(ordinal) {
                continue;
            }
@ -2211,7 +2205,7 @@ pub(crate) fn build_eligible_bodies(
                continue;
            }
            // Populate node metadata against the per-body graph whose NodeIndex
-            // space the SSA was produced on — otherwise cross-file replay can't
+            // space the SSA was produced on, otherwise cross-file replay can't
            // find the original CFG nodes.
            //
            // `key.namespace` was already normalised against `scan_root` in
--- a/src/taint/path_state.rs
+++ b/src/taint/path_state.rs
@ -35,13 +35,13 @@ pub enum PredicateKind {
    /// Commonly paired with [`ShellMetaValidated`] in OR-chain rejection
    /// idioms (`if x.len() > MAX || x.contains(";") { reject }`).  Counts as
    /// a dominator guard for `cfg-unguarded-sink` purposes, but intentionally
-    /// does **not** mark variables as validated — the rejection direction is
+    /// does **not** mark variables as validated, the rejection direction is
    /// ambiguous from the condition alone (a `.len() > 5 { sink(x) }`
    /// gate is a precondition, not a rejection).
    BoundedLength,
    /// Comparison operators: `x == 5`, `x > threshold`
    Comparison,
-    /// Generic boolean test — cannot classify further.
+    /// Generic boolean test, cannot classify further.
    Unknown,
 }

@ -50,7 +50,7 @@ pub enum PredicateKind {
 ///
 /// Presence of any of these in user input is sufficient to enable shell
 /// injection, so rejecting input that contains them is a real sanitizer.
-/// `"foo"` or other non-metachar needles don't qualify — a rejection of
+/// `"foo"` or other non-metachar needles don't qualify, a rejection of
 /// those is business logic, not security.
 const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r", "\0"];

@ -65,7 +65,7 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r"
 ///   character class containing only metacharacters.
 ///
 /// Returns `false` if the needle is a non-metachar literal or cannot be
-/// extracted — falls through to broader classification.
+/// extracted, falls through to broader classification.
 fn is_shell_metachar_rejection(text: &str) -> bool {
    // Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
    for method in [".contains(", ".includes(", ".include?("] {
@ -134,7 +134,7 @@ fn extract_first_string_arg(after_open: &str) -> Option<String> {
 }

 /// For Python `"<METACHAR>" in x` (needle on the left side of ` in `), return
-/// the needle.  Returns `None` for `x in ALLOWED` (identifier on the left) —
+/// the needle.  Returns `None` for `x in ALLOWED` (identifier on the left) ,
 /// that is an allowlist check, not a rejection.
 fn extract_python_in_needle(text: &str) -> Option<String> {
    let pos = text.find(" in ")?;
@ -155,7 +155,7 @@ fn extract_python_in_needle(text: &str) -> Option<String> {

 /// Detect regex character classes that contain only shell metacharacters:
 /// `[;|&]`, `[;&`$]`, etc.  Missing: escape-class metacharacters inside the
-/// class (e.g. `[\n]`) — conservative, returns false there.
+/// class (e.g. `[\n]`), conservative, returns false there.
 fn is_metachar_regex_class(text: &str) -> bool {
    // Find `[` followed by content and `]`, anywhere in the text.
    let mut rest = text;
@ -180,7 +180,7 @@ fn is_metachar_regex_class(text: &str) -> bool {

 /// Check whether `text` looks like a bounded-length rejection:
 /// `x.len() > N`, `x.len() < N`, `x.length >= N`, etc. where `N` is an
-/// integer literal >= 2.  Excludes `> 0` / `>= 1` / `< 1` — those are
+/// integer literal >= 2.  Excludes `> 0` / `>= 1` / `< 1`, those are
 /// non-empty checks, which are not length-bound validations.
 fn is_bounded_length_check(lower: &str) -> bool {
    const PROBES: &[&str] = &[
@ -290,7 +290,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
    // Matched BEFORE AllowlistCheck so that `x.contains(";")` is recognized
    // as a rejection idiom rather than a membership test.  Checked on the
    // raw (non-lowercased) text so metacharacter comparisons stay
-    // case-accurate — `;` / `|` / `&` have no case.
+    // case-accurate, `;` / `|` / `&` have no case.
    if is_shell_metachar_rejection(text) {
        return PredicateKind::ShellMetaValidated;
    }
@ -409,7 +409,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
 /// validator's effect is opaque: we can't tell which argument is being
 /// checked. Returning the original kind with `None` target would cause
 /// upstream code to over-validate (mark every `condition_var` as validated).
-/// Instead, we fall back to `PredicateKind::Unknown` — safer to assume the
+/// Instead, we fall back to `PredicateKind::Unknown`, safer to assume the
 /// validator did nothing than to assume it validated every variable in the
 /// condition. Single-argument calls retain `(kind, None)` so downstream code
 /// can still use the predicate-summary bit tracking.
@ -442,7 +442,7 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
            (kind, target)
        }
        PredicateKind::Comparison => {
-            // `x === '/login'`, `x == 5`, `null != obj` — when exactly one
+            // `x === '/login'`, `x == 5`, `null != obj`, when exactly one
            // side is a literal, extract the identifier side as the target.
            // Downstream `apply_branch_predicates` uses this to mark the
            // variable as `validated_may` on the true (equal) branch.
@ -464,7 +464,7 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
 /// - `'a' == 'b'` → `None` (both sides are literals)
 /// - `obj.field == 3` → `None` (not a bare identifier)
 ///
-/// Best-effort text analysis — kept conservative to avoid false validation.
+/// Best-effort text analysis, kept conservative to avoid false validation.
 fn extract_comparison_target(text: &str) -> Option<String> {
    let trimmed = text.trim();

@ -537,7 +537,7 @@ fn is_comparison_literal(s: &str) -> bool {
 /// `Some(0)` for a call with empty argument list. Respects paren/bracket/brace
 /// nesting so `f(g(a, b), c)` counts as 2 top-level args.
 ///
-/// Best-effort — operates on source text, not an AST. Used by
+/// Best-effort, operates on source text, not an AST. Used by
 /// `classify_condition_with_target` to distinguish single-arg vs multi-arg
 /// validator calls when target extraction fails.
 fn count_call_args(text: &str) -> Option<usize> {
@ -592,7 +592,7 @@ fn extract_validation_target(text: &str) -> Option<String> {
        }
    }

-    // Function call pattern: `func(x, ...)` — extract first argument
+    // Function call pattern: `func(x, ...)`, extract first argument
    // Strip closing paren if present
    let args_inner = args_part.trim_end().strip_suffix(')').unwrap_or(args_part);
    // Take text up to first comma (first argument)
@ -653,7 +653,7 @@ fn extract_allowlist_target(text: &str) -> Option<String> {

    // Python `in` operator: `cmd in ALLOWED` / `cmd not in ALLOWED`
    if lower.contains(" in ") {
-        // Find the leftmost ` in ` — everything before it is the target expression
+        // Find the leftmost ` in `, everything before it is the target expression
        // Handle `not in` by looking for ` not in ` first
        let target_part = if let Some(pos) = lower.find(" not in ") {
            &trimmed[..pos]
@ -857,7 +857,7 @@ mod tests {

    #[test]
    fn classify_validation_requires_paren() {
-        // `x_valid == true` should NOT be ValidationCall — no `(` call syntax.
+        // `x_valid == true` should NOT be ValidationCall, no `(` call syntax.
        assert_eq!(
            classify_condition("x_valid == true"),
            PredicateKind::Comparison
@ -978,7 +978,7 @@ mod tests {

    #[test]
    fn target_multi_arg_fallback_opaque_expr_is_unknown() {
-        // `validate(x + 1, y)` — first arg is an expression, not an identifier.
+        // `validate(x + 1, y)`, first arg is an expression, not an identifier.
        // Target extraction fails. Multi-arg call, so fall back to Unknown
        // rather than letting upstream validate every condition var.
        let (kind, target) = classify_condition_with_target("validate(x + 1, y)");
--- a/src/taint/ssa_transfer/events.rs
+++ b/src/taint/ssa_transfer/events.rs
@ -1,9 +1,9 @@
 //! Taint event emission and conversion to [`crate::taint::Finding`].
 //!
 //! Extracted from the monolithic `ssa_transfer.rs`.  Contains:
-//! * [`SsaTaintEvent`] — the raw event struct produced by the block-level
+//! * [`SsaTaintEvent`], the raw event struct produced by the block-level
 //!   worklist each time a tainted value reaches a sink.
-//! * [`ssa_events_to_findings`] — event → `Finding` conversion with the
+//! * [`ssa_events_to_findings`], event → `Finding` conversion with the
 //!   `primary_location` invariant and dedup.
 //! * Flow-path reconstruction helpers ([`reconstruct_flow_path`] and
 //!   operand pickers).
@ -38,14 +38,14 @@ pub struct SsaTaintEvent {
    /// `sink_caps`.  When multiple [`SinkSite`]s for the same `(param_idx,
    /// cap mask)` match, the emission site produces one event per
    /// [`SinkSite`] so each downstream [`crate::taint::Finding`] carries a
-    /// single primary attribution — the multi-primary case collapses to
+    /// single primary attribution, the multi-primary case collapses to
    /// multiple single-primary events.
    ///
    /// `None` for:
    /// * intra-procedural sinks (`uses_summary == false`), where the
    ///   caller's sink span already names the dangerous instruction;
    /// * summary-resolved sinks whose callee summary carried only cap-only
-    ///   [`SinkSite`]s (no source coordinates — e.g. pass-2 transient
+    ///   [`SinkSite`]s (no source coordinates, e.g. pass-2 transient
    ///   summaries or local `LocalFuncSummary`-only callees).
    pub primary_sink_site: Option<SinkSite>,
 }
@ -79,7 +79,7 @@ pub(super) fn block_distance(ssa: &SsaBody, source_node: NodeIndex, sink_node: N
            }
        }
    }
-    0 // unreachable or not connected — conservative default
+    0 // unreachable or not connected, conservative default
 }

 // ── Flow Path Reconstruction ─────────────────────────────────────────────
@ -204,7 +204,7 @@ pub(super) fn reconstruct_flow_path(
            SsaOp::FieldProj { receiver, .. } => {
                // Treat field projection as a one-step assignment for
                // flow-step reconstruction: taint reaching `obj.f` came
-                // from `obj`.  Phase 4 will refine the witness rendering
+                // from `obj`.  the analysis may refine the witness rendering
                // to include the field name in the step.
                steps.push(FlowStepRaw {
                    cfg_node: inst.cfg_node,
@ -270,7 +270,7 @@ fn pick_tainted_operand_call(
 ///
 /// Note: this invariant is intentionally independent of `uses_summary`.
 /// The taint-chain flag tracks summary-propagated *taint*, not summary-
-/// resolved *sinks* — a local source can reach a cross-file sink, so
+/// resolved *sinks*, a local source can reach a cross-file sink, so
 /// `primary_location.is_some()` does not imply `uses_summary == true`.
 pub fn ssa_events_to_findings(
    events: &[SsaTaintEvent],
@ -329,7 +329,7 @@ pub fn ssa_events_to_findings(

        // Data-integrity invariant: a populated primary_location must at least
        // carry resolved line coordinates.  `file_rel` may legitimately be
-        // empty — when the scan root is the caller file itself (single-file
+        // empty, when the scan root is the caller file itself (single-file
        // scans), every namespace normalizes to `""` and the callee's site
        // inherits that empty path; consumers resolve it against the file
        // under analysis.  Line==0 is the only filter-worthy invariant.
@ -340,7 +340,7 @@ pub fn ssa_events_to_findings(

        // Dedup key includes primary location so multi-site events that
        // share a single (source, sink) pair still produce distinct findings
-        // — one per resolved callee-internal site.
+        //, one per resolved callee-internal site.
        let loc_key = primary_location
            .as_ref()
            .map(|l| (l.file_rel.clone(), l.line, l.col));
@ -374,6 +374,11 @@ pub fn ssa_events_to_findings(
                        path_hash,
                        finding_id: String::new(),
                        alternative_finding_ids: smallvec::SmallVec::new(),
+                        // Per-event mask from the multi-gate dispatch, picks
+                        // exactly the cap that fired (e.g. `Cap::DATA_EXFIL`
+                        // for a `fetch` body-flow finding versus `Cap::SSRF`
+                        // for a URL-flow finding on the same call).
+                        effective_sink_caps: event.sink_caps & *caps,
                    });
                }
            }
--- a/src/taint/ssa_transfer/inline.rs
+++ b/src/taint/ssa_transfer/inline.rs
@ -1,34 +1,10 @@
-//! Context-sensitive inline analysis — cache, body, and attribution types.
+//! Context-sensitive inline analysis, cache, body, and attribution types.
 //!
-//! Extracted from the monolithic `ssa_transfer.rs`.  Contains:
-//! * [`ArgTaintSig`] — compact per-arg cap signature used as a cache key.
-//! * [`InlineResult`] / [`CachedInlineShape`] / [`ReturnShape`] — the
-//!   callsite-adapted and callsite-agnostic inline-analysis result types.
-//! * [`InlineCache`] — the shared cache map keyed by
-//!   `(FuncKey, ArgTaintSig)`.
-//! * [`CrossFileNodeMeta`] / [`CalleeSsaBody`] — the serde-able bodies
-//!   persisted to SQLite for cross-file context-sensitive analysis.
-//! * [`populate_node_meta`] / [`rebuild_body_graph`] — bookkeeping for
-//!   cross-file body proxy CFGs.
-//!
-//! The implementation functions (`inline_analyse_callee`,
-//! `apply_cached_shape`, `extract_inline_return_taint`) remain in the
-//! parent `mod.rs` because they depend tightly on the block worklist, the
-//! `run_ssa_taint_full` entry point, and the callee-resolution pipeline.
-//!
-//! # Cache key scope and origin attribution
-//!
-//! The inline-analysis cache below ([`InlineCache`]) is keyed by
-//! `(FuncKey, ArgTaintSig)`, where [`ArgTaintSig`] encodes **per-arg
-//! capability bits only** — not the identity of the source
-//! [`crate::taint::domain::TaintOrigin`]s that produced those caps.  The
-//! stored value ([`CachedInlineShape`]) captures **only the structural**
-//! shape of the callee's return taint: return caps, callee-internal
-//! origins (from `Source` ops inside the callee body), and per-parameter
-//! provenance flags that record which formal parameters contributed to
-//! the return.  Caller-specific origin identity is *not* stored — it is
-//! re-attributed at cache-apply time from the current call site's
-//! argument taint.
+//! The cache ([`InlineCache`]) is keyed by `(FuncKey, ArgTaintSig)`,
+//! where [`ArgTaintSig`] is per-arg cap bits only (not origin identity).
+//! Stored values ([`CachedInlineShape`]) capture the structural shape of
+//! the callee's return taint; caller-specific origins are re-attributed
+//! at apply time.

 use crate::labels::Cap;
 use crate::ssa::ir::{SsaBody, Terminator};
@ -42,61 +18,30 @@ use std::collections::HashMap;
 /// Maximum SSA blocks in a callee body before skipping inline analysis.
 pub(super) const MAX_INLINE_BLOCKS: usize = 500;

-/// Compact cache key: per-arg-position cap bits (sorted, non-empty only).
-///
-/// Two calls with identical `ArgTaintSig` produce identical inline results
-/// for soundness purposes (return caps, callee-internal sink activations).
-/// Origin identity is **not** part of the key — see the module-level note
-/// above on origin-attribution non-determinism.
+/// Compact cache key: per-arg-position cap bits (sorted, non-empty
+/// only). Origin identity is not part of the key.
 #[derive(Clone, Debug, PartialEq, Eq, Hash)]
 pub(crate) struct ArgTaintSig(pub(super) SmallVec<[(usize, u16); 4]>);

-/// Call-site-adapted result of inline-analyzing a callee.
-///
-/// Constructed fresh per call site by `apply_cached_shape` from a stored
-/// [`CachedInlineShape`]; carries origins that point to the *current*
-/// caller's source chain, not to whichever caller first populated the
-/// cache entry.
+/// Call-site-adapted result of inline-analyzing a callee. Built fresh
+/// per call site so origins point to the current caller's chain.
 #[derive(Clone, Debug)]
 pub(crate) struct InlineResult {
-    /// Taint on the return value after inline analysis.
    pub(super) return_taint: Option<VarTaint>,
-    /// PathFact on the return value after inline analysis.
-    ///
-    /// Non-top when the callee's body provably narrows the
-    /// [`crate::abstract_interp::PathFact`] of the value it returns (for
-    /// example, a `sanitize_path(s) -> Option<String>` helper that
-    /// early-returns on `s.contains("..")` / `s.starts_with('/')`).  At
-    /// apply time the caller sets its call-result SSA value's PathFact to
-    /// this narrowed fact, so downstream FILE_IO sinks see the sanitised
-    /// axis regardless of whether a named label-rule exists for the
-    /// helper.  Top when the callee produces no narrowing — matches
-    /// pre-PathFact behaviour exactly.
+    /// PathFact on the return value. Non-top when the callee body
+    /// provably narrows it (e.g. a `sanitize_path` early-returning on
+    /// `s.contains("..")`).
    pub(super) return_path_fact: crate::abstract_interp::PathFact,
-    /// Per-return-path decomposition of [`Self::return_path_fact`].
-    ///
-    /// Non-empty when the callee has ≥2 distinct return blocks whose
-    /// predicate gates differ.  Match-arm-sensitive callers pick the
-    /// entry whose `variant_inner_fact` matches the arm binding's
-    /// variant; path-resolvable callers may refuse infeasible entries.
-    /// Callers unable to distinguish paths still consult
-    /// [`Self::return_path_fact`] (the join of all entries) and see
-    /// pre-decomposition behaviour.
+    /// Per-return-path decomposition of `return_path_fact`. Non-empty
+    /// when the callee has ≥2 return blocks with different predicate
+    /// gates.
    #[allow(dead_code)]
    pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
 }

-/// Structural (callsite-agnostic) summary of an inline-analyzed callee.
-///
-/// Stored in [`InlineCache`] in place of a fully-attributed `InlineResult`.
-/// Origin-identity information that depends on the caller's argument chain
-/// is *not* kept here; instead, [`ReturnShape::param_provenance`]
-/// records which callee parameter positions contributed seed taint to the
-/// return, and the actual caller origins are re-unioned in at apply time.
-///
-/// `None` means "this callee produced no return taint for the given
-/// argument shape".  A cached `None` is still a meaningful result — it
-/// short-circuits re-analysis on subsequent calls with matching caps.
+/// Structural (callsite-agnostic) summary of an inline-analyzed
+/// callee. `None` means "no return taint for this arg shape", still
+/// meaningful, short-circuits subsequent calls with matching caps.
 #[derive(Clone, Debug)]
 pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);

@ -107,7 +52,7 @@ pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);
 /// origins.  See the module-level note above on origin attribution.
 #[derive(Clone, Debug)]
 pub(crate) struct ReturnShape {
-    /// Return value caps (cap bits only — structural).
+    /// Return value caps (cap bits only, structural).
    pub(super) caps: Cap,
    /// Origins produced **inside the callee body** (e.g. `Source` op fired
    /// in the callee).  `node` is set to a placeholder; at apply time the
@ -115,31 +60,19 @@ pub(crate) struct ReturnShape {
    /// stable (from the callee CFG) and preserved as-is.
    pub(super) internal_origins: SmallVec<[TaintOrigin; 2]>,
    /// Bit i set = callee's `Param(i)` seed taint reached the return value.
-    /// At apply time, caller's argument origins at matching positions are
-    /// unioned into the applied `VarTaint`.  Params beyond index 63 are
-    /// dropped (matching `SmallBitSet` semantics); the capped case is rare
-    /// and still yields cap-correct results.
+    /// At apply time, caller arg origins at matching positions are
+    /// unioned into the applied `VarTaint`. Params beyond 63 are
+    /// dropped (matches `SmallBitSet`); rare and still cap-correct.
    pub(super) param_provenance: u64,
-    /// Whether the receiver (`SelfParam`) seed taint flowed to the return.
+    /// Whether the receiver (`SelfParam`) seed taint flowed to return.
    pub(super) receiver_provenance: bool,
-    /// Whether the applied `VarTaint` should be tagged `uses_summary`.
    pub(super) uses_summary: bool,
-    /// PathFact of the return value observed from the callee's exit
-    /// abstract state.  Cache-safe because the callee is inline-analysed
-    /// with [`crate::abstract_interp::PathFact::top`] Param seeds — the
-    /// resulting fact describes the callee's intrinsic narrowing (e.g.
-    /// the `Some` arm of a `sanitize(..) -> Option<String>` body
-    /// proves `dotdot = No`) and does not depend on caller-side
-    /// narrowing of the argument's PathFact.  Top when the callee does
-    /// not narrow.
+    /// PathFact of the return value, observed from the callee exit
+    /// state under Top-seeded Params. Describes the callee's intrinsic
+    /// narrowing.
    pub(super) return_path_fact: crate::abstract_interp::PathFact,
-    /// Per-return-path [`PathFact`] decomposition of the return value.
-    ///
-    /// Populated alongside [`Self::return_path_fact`] when the callee
-    /// has ≥2 distinct return blocks with different predicate gates.
-    /// Cache-safe for the same reason as `return_path_fact`: entries
-    /// describe callee-intrinsic narrowing under Top-seeded Params.
-    /// Empty when no per-path distinction was observed.
+    /// Per-return-path decomposition of the return value. Populated
+    /// when the callee has ≥2 return blocks with different predicates.
    pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
 }

@ -151,50 +84,21 @@ impl CachedInlineShape {
    }
 }

-/// Cache for context-sensitive inline analysis results.
-///
-/// Keyed by the callee's canonical [`FuncKey`] rather than a bare function
-/// name so that same-name definitions (e.g. two `process/1` methods on
-/// different classes in the same file) never share or overwrite each
-/// other's cache entries.  Values are stored as [`CachedInlineShape`]; see
-/// the module-level note above for why origins are stripped from the
-/// cache value and re-attributed at apply time.
+/// Cache for context-sensitive inline analysis results, keyed by
+/// canonical [`FuncKey`] so same-name definitions in different scopes
+/// never collide.
 pub(crate) type InlineCache = HashMap<(FuncKey, ArgTaintSig), CachedInlineShape>;

-/// Drop every entry from an inline cache, marking the start of a new
-/// convergence epoch.
-///
-/// Cross-file SCC fixed-point iteration runs pass 2 repeatedly until the
-/// merged summaries stop changing.  Between iterations the callee-summary
-/// inputs to inline analysis may have changed, so results cached under a
-/// stale snapshot must not leak into the next iteration — otherwise the
-/// engine could converge to a non-fixed-point (reporting a taint result
-/// that would not reproduce on a fresh run of the same file order).
-///
-/// The per-file inline cache is already reconstructed fresh at the top of
-/// each [`crate::taint::analyse_file`] call, so in the current code this
-/// call is effectively a no-op plumbing hook.  Keeping the method (instead
-/// of relying on ambient re-construction) makes the lifecycle explicit for
-/// any future refactor that moves the cache up into the SCC orchestrator.
-#[allow(dead_code)] // semantic hook; used by tests and future shared-cache refactor
+/// Drop every entry from the inline cache between SCC fixpoint
+/// iterations so stale results don't leak forward.
+#[allow(dead_code)]
 pub(crate) fn inline_cache_clear_epoch(cache: &mut InlineCache) {
    cache.clear();
 }

-/// Set-equal fingerprint of an inline cache, used by the SCC orchestrator
-/// to detect when cross-file inline analysis has reached a fixed point
-/// alongside summary convergence.
-///
-/// Returns a `HashMap` mapping each `(FuncKey, ArgTaintSig)` cache key to
-/// the return-value capability bits of its inline result.  `HashMap`
-/// equality is set-equal (unordered), so two caches with the same entries
-/// compare equal regardless of insertion order.
-///
-/// Origins are intentionally omitted — they are non-deterministic across
-/// callers with identical caps (see the module-level note on origin
-/// attribution) and would cause the fingerprint to oscillate without
-/// reflecting a real precision change.
-#[allow(dead_code)] // observability hook; used by tests and future shared-cache refactor
+/// Set-equal fingerprint of the inline cache, used by the SCC
+/// orchestrator to detect convergence.
+#[allow(dead_code)]
 pub(crate) fn inline_cache_fingerprint(
    cache: &InlineCache,
 ) -> HashMap<(FuncKey, ArgTaintSig), u16> {
@ -206,24 +110,11 @@ pub(crate) fn inline_cache_fingerprint(

 /// CFG node metadata embedded in cross-file callee bodies.
 ///
-/// ## Why a full [`crate::cfg::NodeInfo`] lives here
-///
-/// An earlier variant carried only the two fields the symex executor reads
-/// (`bin_op`, `labels`).  That was sufficient for symex but not for the
-/// taint engine, which reads ~20 fields off `cfg[inst.cfg_node]` across
-/// `transfer_inst`, `collect_block_events`, `compute_succ_states`, and
-/// helpers (callee name, `arg_uses`, `arg_callees`, `call_ordinal`,
-/// `outer_callee`, `kwargs`, `arg_string_literals`, `ast.span`,
-/// `ast.enclosing_func`, `condition_*`, `all_args_literal`, `catch_param`,
-/// `parameterized_query`, `in_defer`, `cast_target_type`, `string_prefix`,
-/// `taint.uses`, `taint.defines`, `taint.extra_defines`,
-/// `taint.const_text`, …).  Rather than shuttling each of those through a
-/// `CfgView` accessor at every callsite, we store a full serde-able
-/// [`crate::cfg::NodeInfo`] snapshot here so the indexed-scan path can
-/// rehydrate an equivalent `Cfg` on load (see [`rebuild_body_graph`]).
-/// Both scan paths then feed the same `&Cfg` into the taint engine, and
-/// cross-file inline fires regardless of whether the body came from pass
-/// 1 or from SQLite.
+/// Stores a full serde-able [`crate::cfg::NodeInfo`] snapshot rather
+/// than projecting individual fields, so the indexed-scan path can
+/// rehydrate an equivalent `Cfg` (see [`rebuild_body_graph`]) and feed
+/// the same `&Cfg` into the taint engine regardless of whether the
+/// body came from pass 1 or SQLite.
 #[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct CrossFileNodeMeta {
    /// Full `NodeInfo` snapshot for this body-local NodeIndex.
@ -268,7 +159,7 @@ pub fn populate_node_meta(body: &mut CalleeSsaBody, cfg: &crate::cfg::Cfg) -> bo
    // `compute_succ_states` via `cfg[*cond]`, so without it the synthesized
    // cross-file proxy CFG (`rebuild_body_graph`) ends up too small whenever
    // the callee body has any conditional branch whose `cond` index sits
-    // past the maximum `inst.cfg_node` index — inline analysis then panics
+    // past the maximum `inst.cfg_node` index, inline analysis then panics
    // with an out-of-bounds index.
    let mut referenced: Vec<NodeIndex> = Vec::new();
    for block in &body.ssa.blocks {
@ -320,7 +211,7 @@ pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
    // index.  We fill any unreferenced intermediate indices with
    // `NodeInfo::default()`.
    //
-    // Walks both instruction `cfg_node`s and `Terminator::Branch.cond` —
+    // Walks both instruction `cfg_node`s and `Terminator::Branch.cond` ,
    // the latter is read by `compute_succ_states` via `cfg[*cond]`, so
    // missing it produces an OOB panic when a conditional branch's cond
    // node has a higher index than any `inst.cfg_node` in the body.
@ -339,7 +230,7 @@ pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
            }
        }
    }
-    // Also consider node_meta keys — they should be a subset of the
+    // Also consider node_meta keys, they should be a subset of the
    // SSA-referenced indices, but be defensive.
    for &k in body.node_meta.keys() {
        if k > max_idx {
--- a/src/taint/ssa_transfer/mod.rs
+++ b/src/taint/ssa_transfer/mod.rs
--- a/src/taint/ssa_transfer/state.rs
+++ b/src/taint/ssa_transfer/state.rs
@ -2,7 +2,7 @@
 //! the original monolithic `ssa_transfer.rs`.
 //!
 //! Contains:
-//! * [`SsaTaintState`] — the per-block lattice value with `values`,
+//! * [`SsaTaintState`], the per-block lattice value with `values`,
 //!   `validated_must`/`validated_may`, `predicates`, `heap`, `path_env`,
 //!   `abstract_state`.
 //! * [`BindingKey`] / [`seed_lookup`] for cross-body taint seeding.
@ -25,7 +25,7 @@ use std::collections::HashMap;

 // NOTE: The per-SSA-value origin cap used to be a hardcoded
 // `MAX_ORIGINS: usize = 4`.  It is now governed by the stable
-// `analysis.engine.max_origins` option (default `32`) — see
+// `analysis.engine.max_origins` option (default `32`), see
 // `crate::utils::analysis_options` and [`effective_max_origins`].  The
 // test-only override below still short-circuits the config read so
 // `engine_notes_tests.rs` can force a tiny cap to trigger truncation
@ -42,7 +42,7 @@ static WORKLIST_CAP_OVERRIDE: std::sync::atomic::AtomicUsize =
    std::sync::atomic::AtomicUsize::new(0);
 /// Records the MAX iteration count observed across every
 /// `run_ssa_taint_full` call since the most recent reset.  Cheaper and
-/// more useful for regression tests than the last-call value — a cap
+/// more useful for regression tests than the last-call value, a cap
 /// hit anywhere in the scan is remembered.
 pub(super) static MAX_WORKLIST_ITERATIONS: std::sync::atomic::AtomicUsize =
    std::sync::atomic::AtomicUsize::new(0);
@ -90,7 +90,7 @@ pub fn reset_worklist_observability() {
 /// force `OriginsTruncated` emission on small fixtures.
 static MAX_ORIGINS_OVERRIDE: std::sync::atomic::AtomicUsize =
    std::sync::atomic::AtomicUsize::new(0);
-/// Total number of origins dropped since the most recent reset — captured
+/// Total number of origins dropped since the most recent reset, captured
 /// from `merge_origins` and the post-hoc saturation scan.  Used by tests
 /// to detect truncation events that don't propagate to a finding (e.g.
 /// when the cap is so tight no taint flow survives to emit a sink event).
@ -136,7 +136,7 @@ pub fn reset_origins_observability() {
 thread_local! {
    /// Per-body engine-note collector.  Cleared at the start of each
    /// `analyse_body_with_seed` invocation and drained after
-    /// `run_ssa_taint_full` returns — notes are then attached to every
+    /// `run_ssa_taint_full` returns, notes are then attached to every
    /// finding emitted from that body.  Living as a thread-local avoids
    /// threading a `&RefCell` through the nearly-10-argument transfer
    /// struct; inline analysis recursion is intentionally allowed to
@ -148,7 +148,7 @@ thread_local! {
    /// was suppressed by an SSA-engine path-safety proof (PathFact
    /// `dotdot=No && absolute=No`).  Populated by `is_path_safe_for_sink`
    /// and consumed by the state-analysis pass to suppress
-    /// `state-unauthed-access` on the same sink — when the taint engine
+    /// `state-unauthed-access` on the same sink, when the taint engine
    /// has already proved the user-controlled input cannot escape into a
    /// privileged location, the auth concern on that sink is reduced.
    /// Reset at start of `analyse_file`, drained before state analysis.
@ -156,7 +156,7 @@ thread_local! {
        RefCell::new(std::collections::HashSet::new());

    /// File-level set of CFG sink spans where the SSA engine emitted an
-    /// `all_validated` event — every tainted input to the sink passed
+    /// `all_validated` event, every tainted input to the sink passed
    /// through a recognised validation/sanitisation predicate before
    /// reaching it.  Distinct from `PATH_SAFE_SUPPRESSED_SPANS`, which
    /// is FILE_IO-scoped and feeds state analysis: this set is
@ -167,7 +167,7 @@ thread_local! {
    ///
    /// Without this signal the suppression gate has to fall back to
    /// "function emitted at least one taint-unsanitised-flow finding"
-    /// or "function contains a labelled Sanitizer node" — both of
+    /// or "function contains a labelled Sanitizer node", both of
    /// which miss validated/dominated/early-return safety where the
    /// engine cleared the flow without firing or hitting an explicit
    /// sanitiser.
@ -227,7 +227,7 @@ pub fn take_path_safe_suppressed_spans() -> std::collections::HashSet<(usize, us

 /// Record a sink CFG-node span where the SSA engine proved every
 /// tainted input was validated (`SsaTaintEvent::all_validated`).
-/// Cap-agnostic — fires for any sink the engine evaluated and cleared.
+/// Cap-agnostic, fires for any sink the engine evaluated and cleared.
 /// Consumed by `TaintSuppressionCtx::build` as positive evidence that
 /// taint analysis reached this line and proved safety, so AST-pattern
 /// findings on the same line can be suppressed without misclassifying
@ -263,7 +263,7 @@ pub fn take_all_validated_spans() -> std::collections::HashSet<(usize, usize)> {
 /// into the seed map always specify the owning body's id; readers look
 /// up by the scope they know they want (typically their own
 /// `parent_body_id`, with a fallback to `BodyId(0)` for entries that
-/// the JS/TS two-level solve has re-keyed onto the top-level scope —
+/// the JS/TS two-level solve has re-keyed onto the top-level scope ,
 /// see [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
 #[derive(Debug, Clone, Hash, Eq, PartialEq)]
 pub struct BindingKey {
@ -284,7 +284,7 @@ impl BindingKey {
 /// Look up a binding in a seed map.
 ///
 /// Thin wrapper over [`HashMap::get`] retained for call-site readability
-/// — every seed entry is now exactly scoped to a single `(name,
+///, every seed entry is now exactly scoped to a single `(name,
 /// BodyId)`, so the lookup is O(1) with no fallback.  Writers that want
 /// cross-scope reachability must explicitly re-key their entries (see
 /// [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
@ -299,7 +299,7 @@ pub fn seed_lookup<'a>(

 /// Compact key for a heap-field taint cell.
 ///
-/// `(loc, field)` — `loc` is the abstract location of the *parent*
+/// `(loc, field)`, `loc` is the abstract location of the *parent*
 /// (interned by the body's [`crate::pointer::LocInterner`]), `field`
 /// is the [`FieldId`] of the projected field.  The pair survives lattice
 /// joins / leq comparisons by `Ord`-derived sort.
@ -309,16 +309,16 @@ pub struct FieldTaintKey {
    pub field: FieldId,
 }

-/// Pointer-Phase 4 / W4: per-field-cell taint record.
+/// per-field-cell taint record.
 ///
 /// Carries the union of writers' taint for the abstract field cell plus
 /// two validation channels:
-/// * `validated_must` — set when *every* writer recorded a value that was
+/// * `validated_must`, set when *every* writer recorded a value that was
 ///   `validated_must` in its own SSA scope.  Lattice join intersects
-///   (`AND`) — matching the symbol-keyed [`SsaTaintState::validated_must`]
+///   (`AND`), matching the symbol-keyed [`SsaTaintState::validated_must`]
 ///   semantics for "validated on every path".
-/// * `validated_may` — set when *any* writer recorded a `validated_may`
-///   value.  Lattice join unions (`OR`) — matching the symbol-keyed
+/// * `validated_may`, set when *any* writer recorded a `validated_may`
+///   value.  Lattice join unions (`OR`), matching the symbol-keyed
 ///   [`SsaTaintState::validated_may`] semantics for "validated on some
 ///   path".
 ///
@ -332,7 +332,7 @@ pub struct FieldCell {
 }

 impl FieldCell {
-    /// Construct a cell with no validation bits — convenience for the
+    /// Construct a cell with no validation bits, convenience for the
    /// pre-W4 callers that don't propagate symbol-level validation.
    pub fn unvalidated(taint: VarTaint) -> Self {
        Self {
@ -365,17 +365,17 @@ pub struct SsaTaintState {
    /// interpretation is disabled (`analysis.engine.abstract_interpretation
    /// = false`).
    pub abstract_state: Option<AbstractState>,
-    /// Pointer-Phase 3: per-heap-field taint cells, keyed by
+    /// per-heap-field taint cells, keyed by
    /// `(parent_loc, field)`.  Sorted by [`FieldTaintKey`] for O(n)
    /// merge-join.  Populated only when the body's
    /// [`crate::pointer::PointsToFacts`] is available
    /// (`NYX_POINTER_ANALYSIS=1`); empty otherwise so the lattice join
    /// is a strict no-op for pointer-disabled runs.  Field reads
    /// (`SsaOp::FieldProj`) consult the cells; field writes record into
-    /// them.  Cross-call propagation lands in Phase 5 via the
+    /// them.  Cross-call propagation lands during lowering via the
    /// field-granularity `PointsToSummary`.
    ///
-    /// Cell shape (Phase 4 / W4): [`FieldCell`] carries `taint` plus
+    /// Cell shape: [`FieldCell`] carries `taint` plus
    /// `validated_must` / `validated_may` flags so validation flows
    /// through abstract field / element identity.
    pub field_taint: SmallVec<[(FieldTaintKey, FieldCell); 4]>,
@ -403,7 +403,7 @@ impl SsaTaintState {
        }
    }

-    /// Pointer-Phase 3: read the field cell at `key`.  Returns `None`
+    /// read the field cell at `key`.  Returns `None`
    /// when no cell has been recorded (caller should treat as
    /// untainted).  O(log n) on the sorted [`field_taint`] list.
    pub fn get_field(&self, key: FieldTaintKey) -> Option<&FieldCell> {
@ -413,13 +413,13 @@ impl SsaTaintState {
            .map(|idx| &self.field_taint[idx].1)
    }

-    /// Pointer-Phase 3 / W4: union `t` into the field cell at `key`,
+    /// union `t` into the field cell at `key`,
    /// recording per-write `validated_must` / `validated_may` channels.
    ///
    /// Maintains sorted invariant.  No-op when `t.caps` is empty (so the
    /// lattice bottom stays `[]`).  When the cell already exists, the
-    /// validation channels merge with the lattice-join semantics —
-    /// `must` AND-intersects, `may` OR-unions — matching the symbol-
+    /// validation channels merge with the lattice-join semantics ,
+    /// `must` AND-intersects, `may` OR-unions, matching the symbol-
    /// keyed [`SsaTaintState::validated_must`] / `validated_may`
    /// semantics so a write coming through a non-validated path tears
    /// down `must` while preserving `may` of any earlier validated path.
@ -563,15 +563,15 @@ impl Lattice for SsaTaintState {
    }
 }

-/// Pointer-Phase 3 / W4: merge-join two sorted `field_taint` lists.
+/// merge-join two sorted `field_taint` lists.
 /// Same shape as [`merge_join_ssa_vars`] but keyed on [`FieldTaintKey`]:
-/// * `taint.caps`  — OR-union
-/// * `taint.origins` — merged with cap-respecting de-dup
-/// * `taint.uses_summary` — OR-union
-/// * `validated_must` — AND-intersect (matches the symbol-keyed
+/// * `taint.caps` , OR-union
+/// * `taint.origins`, merged with cap-respecting de-dup
+/// * `taint.uses_summary`, OR-union
+/// * `validated_must`, AND-intersect (matches the symbol-keyed
 ///   `validated_must` lattice: a path that didn't validate this cell
 ///   breaks the invariant)
-/// * `validated_may` — OR-union (any path's validation contributes)
+/// * `validated_may`, OR-union (any path's validation contributes)
 pub(super) fn merge_join_field_taint(
    a: &[(FieldTaintKey, FieldCell)],
    b: &[(FieldTaintKey, FieldCell)],
@ -581,7 +581,7 @@ pub(super) fn merge_join_field_taint(
    while i < a.len() && j < b.len() {
        match a[i].0.cmp(&b[j].0) {
            std::cmp::Ordering::Less => {
-                // Cell present only in `a` — counterpart in `b` is the
+                // Cell present only in `a`, counterpart in `b` is the
                // lattice bottom (no validation, no taint), so:
                //   must = a.must AND false = false
                //   may  = a.may  OR  false = a.may
@ -637,11 +637,11 @@ pub(super) fn merge_join_field_taint(
 /// `a ≤ b` for sorted `field_taint` lists.  Used by the convergence
 /// check in [`Lattice::leq`].  Per-cell criteria:
 ///
-/// * `taint.caps` — `a ⊆ b` (sub-state on caps; matches per-SSA-value
+/// * `taint.caps`, `a ⊆ b` (sub-state on caps; matches per-SSA-value
 ///   `ssa_vars_leq`).
-/// * `validated_must` — `a.must ⊇ b.must` (super-state on must; same
+/// * `validated_must`, `a.must ⊇ b.must` (super-state on must; same
 ///   shape as the symbol-keyed `validated_must` leq).
-/// * `validated_may` — `a.may ⊆ b.may` (sub-state on may).
+/// * `validated_may`, `a.may ⊆ b.may` (sub-state on may).
 ///
 /// When `b` lacks a key present in `a`, `b`'s side is the lattice
 /// bottom: no caps, no validation.  `a`'s caps must also be empty
@ -669,12 +669,12 @@ pub(super) fn field_taint_leq(
        if (ca.taint.caps - cb.taint.caps).bits() != 0 {
            return false;
        }
-        // Must: a ⊇ b — every must-validated key in b is must-validated
+        // Must: a ⊇ b, every must-validated key in b is must-validated
        // in a.  Equivalently: !cb.must OR ca.must.
        if cb.validated_must && !ca.validated_must {
            return false;
        }
-        // May: a ⊆ b — every may-validated key in a is may-validated
+        // May: a ⊆ b, every may-validated key in a is may-validated
        // in b.  Equivalently: !ca.may OR cb.may.
        if ca.validated_may && !cb.validated_may {
            return false;
@ -735,7 +735,7 @@ pub(super) fn merge_join_ssa_vars(
 ///
 /// Ordering is lexicographic over
 /// `(source_span_start, source_span_end, source_kind_tag, node_index)`.
-/// `source_span` is the most stable component across bodies — cross-body
+/// `source_span` is the most stable component across bodies, cross-body
 /// remapped origins carry the original byte span explicitly; intra-body
 /// origins default to `(0, 0)` and fall through to the secondary keys.
 ///
@ -760,7 +760,7 @@ fn origin_sort_key(o: &TaintOrigin) -> (usize, usize, u8, usize) {
 /// Bounded, deterministic insertion of an origin into a sorted origin
 /// set.  Returns `true` when `new` was admitted (or de-duplicated against
 /// an existing entry), `false` when the cap forced a drop.  On drop,
-/// the origin with the *largest* sort key is evicted first — the caller
+/// the origin with the *largest* sort key is evicted first, the caller
 /// sees a survivor set that depends only on the input multiset and
 /// [`effective_max_origins`], not on insertion order.
 ///
@ -774,7 +774,7 @@ pub(crate) fn push_origin_bounded(
 ) -> bool {
    // Identity check: same node counts as the same origin.  We keep
    // node-only dedup to match [`ssa_vars_leq`], which compares origin
-    // sets by node membership — widening dedup here without tightening
+    // sets by node membership, widening dedup here without tightening
    // there would break the monotonicity invariant.
    if target.iter().any(|o| o.node == new.node) {
        return true;
@ -814,7 +814,7 @@ pub(crate) fn push_origin_bounded(
        target.insert(pos, new);
        true
    } else {
-        // `new` itself is the worst — drop it instead of the survivor.
+        // `new` itself is the worst, drop it instead of the survivor.
        false
    }
 }
@ -829,7 +829,7 @@ pub(super) fn merge_origins(
    a: &SmallVec<[TaintOrigin; 2]>,
    b: &SmallVec<[TaintOrigin; 2]>,
 ) -> SmallVec<[TaintOrigin; 2]> {
-    // Seed the result with `a` — but re-sort defensively in case the
+    // Seed the result with `a`, but re-sort defensively in case the
    // caller constructed `a` through non-bounded paths.  Historically
    // every write goes through `push_origin_bounded` (or `merge_origins`
    // itself), so this resort is a no-op on the steady state but costs
@ -911,7 +911,7 @@ pub(super) fn merge_join_ssa_predicates(
 mod origin_cap_tests {
    //! Tests for the deterministic, config-driven origin cap.  These
    //! cover the behavior at the `push_origin_bounded` / `merge_origins`
-    //! boundary — the end-to-end engine-note signal is exercised in
+    //! boundary, the end-to-end engine-note signal is exercised in
    //! `tests/engine_notes_tests.rs`.

    use super::*;
@ -1037,7 +1037,7 @@ mod origin_cap_tests {
    fn effective_cap_reads_runtime_config_when_override_zero() {
        // Override takes priority; override=0 falls through to config.
        // `current()` returns the default (32) when no runtime is
-        // installed — which is the state the rest of the test suite runs
+        // installed, which is the state the rest of the test suite runs
        // under.  Guard that the fallback path reaches 32.
        let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
        set_max_origins_override(0);
@ -1053,7 +1053,7 @@ mod origin_cap_tests {

 #[cfg(test)]
 mod field_taint_tests {
-    //! Pointer-Phase 3: tests for the heap-field taint cells on
+    //!: tests for the heap-field taint cells on
    //! [`SsaTaintState`].  Cover get/add round-trip, lattice join
    //! (cap union + origin merge), and `leq` convergence semantics.
    use super::*;
@ -1202,7 +1202,7 @@ mod field_taint_tests {
        assert!(cell.validated_must, "a.must AND b.must = true");
        assert!(cell.validated_may);

-        // Now make `b`'s validated_must false — must should drop to
+        // Now make `b`'s validated_must false, must should drop to
        // false on the join, may stays at OR.
        let mut c = SsaTaintState::initial();
        c.add_field(k, taint(Cap::ENV_VAR), false, true);
@ -1213,7 +1213,7 @@ mod field_taint_tests {
    }

    /// W4 audit: `merge_join_field_taint` OR-unions `validated_may`
-    /// — any path's may-validation contributes to the joined cell.
+    ///, any path's may-validation contributes to the joined cell.
    #[test]
    fn lattice_validated_may_unions_on_join() {
        let k = key(1, 7);
@ -1275,7 +1275,7 @@ mod field_taint_tests {
            a.leq(&b),
            "must super-state and equal caps: a ≤ b should hold"
        );
-        // Reverse: b.must=false, a.must=true — for b ≤ a, we need
+        // Reverse: b.must=false, a.must=true, for b ≤ a, we need
        // b.must ⊇ a.must which is false ⊇ true = false.  So b ≤ a
        // must fail.
        assert!(!b.leq(&a), "b lacks the must invariant a holds");
@ -1289,7 +1289,7 @@ mod field_taint_tests {
        assert!(!a2.leq(&b2), "a.may=true is NOT ⊆ b.may=false");
    }

-    /// Pointer-Phase 3 / A8 audit: the field_taint lattice is monotone
+    /// the field_taint lattice is monotone
    /// and converges under a deterministic enumeration of inputs.
    /// Caps grow (OR), `uses_summary` grows (OR), origins grow modulo
    /// the cap (merge_origins is bounded).  Joins must:
@ -1409,7 +1409,7 @@ mod field_taint_tests {

    /// `field_taint_leq` is the soundness gate for worklist
    /// convergence: once `next ≤ acc`, the worklist halts.  Pin that
-    /// `leq` is consistent with `join` — i.e. `s.leq(s.join(t))` holds
+    /// `leq` is consistent with `join`, i.e. `s.leq(s.join(t))` holds
    /// for any `s, t`.  Without this, the worklist could loop
    /// indefinitely on inputs whose join produces a state not
    /// dominated by both inputs.
--- a/src/taint/ssa_transfer/summary_extract.rs
+++ b/src/taint/ssa_transfer/summary_extract.rs
@ -1,11 +1,11 @@
 //! SSA function-summary and container-flow extraction.
 //!
 //! Extracted from the monolithic `ssa_transfer.rs`.  Contains:
-//! * [`extract_ssa_func_summary`] — runs per-parameter taint probes and
+//! * [`extract_ssa_func_summary`], runs per-parameter taint probes and
 //!   synthesises an [`crate::summary::ssa_summary::SsaFuncSummary`] with
 //!   source caps, return transforms, per-path transforms, and sink site
 //!   attribution.
-//! * [`extract_container_flow_summary`] — structural scan for
+//! * [`extract_container_flow_summary`], structural scan for
 //!   `param_container_to_return` + `param_to_container_store` pairs.
 //! * Private helpers for predicate-hash summarisation, abstract-transfer
 //!   derivation, callback source detection, and return-type inference.
@ -123,15 +123,15 @@ pub fn extract_ssa_func_summary_full(
        .collect();

    // Collect all param SSA values to exclude from return cap collection.
-    // Param values persist with their seeded taint throughout the function —
+    // Param values persist with their seeded taint throughout the function ,
    // we only want caps on derived values (call results, assigns) at return.
    let all_param_values: std::collections::HashSet<SsaValue> =
        param_info.iter().map(|(_, _, v)| *v).collect();

    // Per-return-block observation captured alongside the aggregate return
-    // caps.  Each entry records one return block's exit state — caps
+    // caps.  Each entry records one return block's exit state, caps
    // contributed on that path, path-predicate hash, known_true/false bits,
-    // and the return SSA value's abstract fact — so the per-param loop can
+    // and the return SSA value's abstract fact, so the per-param loop can
    // emit one [`ReturnPathTransform`] per distinct predicate gate.
    struct ReturnBlockObs {
        /// Caps at the return SSA value (or joined live values for
@ -141,7 +141,7 @@ pub fn extract_ssa_func_summary_full(
        /// (passthrough fallback).
        param_caps: Cap,
        /// Deterministic hash of the predicate gate at this return.
-        /// `0` means "no predicate gate" — an unguarded return.
+        /// `0` means "no predicate gate", an unguarded return.
        predicate_hash: u64,
        /// `PredicateSummary::known_true` bits intersected across all
        /// tracked variables at this return.  Encoded via
@ -268,7 +268,7 @@ pub fn extract_ssa_func_summary_full(
                        }
                    }
                } else {
-                    // Return(None): implicit return — fall back to all live values.
+                    // Return(None): implicit return, fall back to all live values.
                    for (val, taint) in &exit.values {
                        if all_param_values.contains(val) {
                            block_param_caps |= taint.caps;
@ -348,7 +348,7 @@ pub fn extract_ssa_func_summary_full(

    // Per-return-path PathFact decomposition derived from the baseline
    // probe (no seeded taint).  Abstract facts on the return rv are
-    // independent of taint seeding — they describe the function's
+    // independent of taint seeding, they describe the function's
    // intrinsic narrowing, so the baseline run captures them without
    // per-param noise.
    //
@ -388,7 +388,7 @@ pub fn extract_ssa_func_summary_full(
    let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new();
    let mut param_to_sink_param = Vec::new();
    // Per-param return-path decomposition.  Populated only when the param
-    // has ≥2 distinct return-block predicate hashes — a single-return-path
+    // has ≥2 distinct return-block predicate hashes, a single-return-path
    // callee is already precise via `param_to_return`.
    let mut param_return_paths: Vec<(
        usize,
@ -417,7 +417,7 @@ pub fn extract_ssa_func_summary_full(
        // expressions (e.g. `file._source.uri`) as their own
        // [`SsaOp::Param`] ops with composite `var_name`s like
        // `"file._source.uri"`.  These phantom Params are the values
-        // actually used as call arguments — not the formal-param SSA
+        // actually used as call arguments, not the formal-param SSA
        // value the seed targets.  Without this, the per-param probe
        // misses cross-call sinks because the call's arg SSA value is
        // a phantom Param with no seed entry, so `transfer_inst::Param`
@ -447,7 +447,7 @@ pub fn extract_ssa_func_summary_full(

        let (return_caps, events, _, per_return_obs) = run_probe(seed);

-        // Subtract baseline source_caps — we only want param-contributed caps
+        // Subtract baseline source_caps, we only want param-contributed caps
        let param_return_caps = return_caps & !source_caps;

        if !param_return_caps.is_empty() {
@ -464,7 +464,7 @@ pub fn extract_ssa_func_summary_full(
        // observed return block, derive a `ReturnPathTransform` mirroring
        // the aggregate logic (prefer derived caps, fall back to param
        // caps, strip baseline source caps).  Only emit when ≥2 distinct
-        // predicate hashes are present — a single-hash summary adds no
+        // predicate hashes are present, a single-hash summary adds no
        // signal over the aggregate `param_to_return`.
        if per_return_obs.len() >= 2 {
            let mut per_path: SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]> =
@ -477,7 +477,7 @@ pub fn extract_ssa_func_summary_full(
                };
                let block_contributed = block_return_caps & !source_caps;
                let transform_kind = if block_contributed.is_empty() {
-                    // No caps on this path — param does not reach return
+                    // No caps on this path, param does not reach return
                    // under this predicate.  A `StripBits(all)` records
                    // "all bits cleared" so downstream join preserves the
                    // disparity with other paths.
@ -513,9 +513,31 @@ pub fn extract_ssa_func_summary_full(
            }
        }

-        // Collect sink caps + primary-location sites from events + per-arg-position detail
+        // Collect sink caps + primary-location sites from events + per-arg-position detail.
+        //
+        // Skip events flagged `all_validated`: every tainted SSA value
+        // that reached the sink was already proved validated by a
+        // dominating predicate (AllowlistCheck / TypeCheck /
+        // ValidationCall, including the indirect-validator branch
+        // narrowing for `validate*` / `is_valid*` callees).  Those
+        // events would have been dropped by `ssa_events_to_findings` at
+        // the per-file finding step; carrying them into
+        // `param_to_sink` / `param_to_sink_param` re-publishes a sink
+        // attribution callers can no longer suppress, because the
+        // caller can't see the validator that lives inside the
+        // callee body.
+        //
+        // Strict-additive: `all_validated` is set only when every
+        // tainted operand at the sink has its `var_name` in
+        // `state.validated_may`, single-path single-validator helpers
+        // cleanly skip; mixed-tainted-with-some-unvalidated events
+        // still propagate.  Closes the helper-summary precision gap
+        // surfaced by Novu CVE GHSA-4x48-cgf9-q33f.
        let mut param_sites: SmallVec<[SinkSite; 1]> = SmallVec::new();
        for event in &events {
+            if event.all_validated {
+                continue;
+            }
            for pos in extract_sink_arg_positions(event, ssa) {
                param_to_sink_param.push((idx, pos, event.sink_caps));
            }
@ -601,14 +623,14 @@ pub fn extract_ssa_func_summary_full(

    // Per-parameter abstract-domain transfers.
    //
-    // Derived structurally from the SSA body — no additional taint probes.
+    // Derived structurally from the SSA body, no additional taint probes.
    // Three-step inference per parameter:
    //   1. Identity: return SSA value at every return block traces back to
    //      this parameter (possibly through assigns / phi merges all feeding
    //      from the same param).
    //   2. Callee-intrinsic bound: baseline `return_abstract` carries a
    //      concrete fact (bounded interval or known prefix) that holds
-    //      regardless of caller input — record it once per parameter as
+    //      regardless of caller input, record it once per parameter as
    //      `Clamped` / `LiteralPrefix` so the caller sees the bound even
    //      when it has no abstract info on its own argument.
    //   3. Top: default; the entry is omitted (empty transfer is meaningless).
@ -630,14 +652,14 @@ pub fn extract_ssa_func_summary_full(
        param_return_paths,
        return_path_facts,
        points_to,
-        // Pointer-Phase 5 extension — empty until the field-granularity
+        // extension, empty until the field-granularity
        // extractor is wired (`NYX_POINTER_ANALYSIS=1` only).  Default
        // path stays bit-identical to today.
        field_points_to: crate::summary::points_to::FieldPointsToSummary::empty(),
        // Populated post-extraction in
        // `taint::lower_all_functions_from_bodies` once SSA optimisation
        // has computed `opt.type_facts`.  Empty here means the
-        // extractor itself doesn't carry receiver-type info — the
+        // extractor itself doesn't carry receiver-type info, the
        // caller patches it in.
        typed_call_receivers: Vec::new(),
    }
@ -699,14 +721,14 @@ pub(super) fn summarise_return_predicates(state: &SsaTaintState) -> (u64, u8, u8
 ///
 /// `return_abstract` is the callee's intrinsic baseline (from the no-seed
 /// probe).  When present, it describes a fact that holds for the return
-/// regardless of parameter input — so it can be attached as a
+/// regardless of parameter input, so it can be attached as a
 /// `Clamped` / `LiteralPrefix` transform to every parameter that flows to
 /// the return.
 ///
 /// Identity detection is structural: walk the return values back through
 /// [`SsaOp::Assign`] / [`SsaOp::Phi`] chains (bounded) and check whether
 /// every leaf resolves to the same [`SsaOp::Param`].  The trace is cheap
-/// and can only produce `Identity` for passthrough callees — anything
+/// and can only produce `Identity` for passthrough callees, anything
 /// more complex degrades to the baseline fact or `Top`.
 fn derive_abstract_transfer(
    ssa: &SsaBody,
@ -780,7 +802,7 @@ fn derive_abstract_transfer(
    }

    // Derive a baseline-invariant transform from `return_abstract`.  This is
-    // the "callee intrinsic" fact that always holds — each parameter that
+    // the "callee intrinsic" fact that always holds, each parameter that
    // flows to the return gets it attached as the conservative transfer.
    let baseline_invariant: Option<AbstractTransfer> = return_abstract.map(|av| {
        let interval = match (av.interval.lo, av.interval.hi) {
@ -805,7 +827,7 @@ fn derive_abstract_transfer(
        } else if let Some(base) = baseline_invariant.as_ref() {
            // Baseline intrinsic bound applies to every parameter that could
            // reach the return.  We conservatively attach it to all params
-            // — at apply time the caller meets it with the real return
+            //, at apply time the caller meets it with the real return
            // abstract (also from this same summary), so double-counting
            // would collapse to the tighter of the two.
            transfer = base.clone();
@ -879,7 +901,7 @@ fn infer_summary_return_type(
    lang: Lang,
 ) -> Option<crate::ssa::type_facts::TypeKind> {
    // Find blocks with Return terminators, then look at the last defined value
-    // in those blocks — if it's a Call with a known constructor, that's our type.
+    // in those blocks, if it's a Call with a known constructor, that's our type.
    for block in &ssa.blocks {
        if !matches!(block.terminator, Terminator::Return(_)) {
            continue;
@ -965,7 +987,7 @@ pub(crate) fn extract_container_flow_summary(
    // `trace_to_param` will happily return any `SsaOp::Param { index }`, but
    // scoped lowering synthesises `Param` ops for external captures (module
    // imports, free identifiers) at indices beyond the formal parameter count.
-    // Those must not enter the summary — the key's arity only covers formal
+    // Those must not enter the summary, the key's arity only covers formal
    // params, and an out-of-range index trips `ssa_summary_fits_arity`, forcing
    // the reconciliation probe to generate a synthetic disambiguator that no
    // caller will ever look up.
@ -1035,7 +1057,7 @@ pub(crate) fn extract_container_flow_summary(
                };

                // Trace container to positional param (SelfParam → None, so
-                // when the container is the receiver we skip — the caller
+                // when the container is the receiver we skip, the caller
                // tracks that via `receiver_to_container_store` if needed).
                // Same arity filter as above: reject synthetic Param ops that
                // were injected for free captures.
--- a/src/taint/ssa_transfer/tests.rs
+++ b/src/taint/ssa_transfer/tests.rs
@ -221,7 +221,7 @@ mod cross_file_tests {
 mod inline_cache_epoch_tests {
    //! Hooks for cross-file SCC joint fixed-point iteration.
    //!
-    //! These do not exercise the full inline pipeline — they lock down the
+    //! These do not exercise the full inline pipeline, they lock down the
    //! semantic contract of [`inline_cache_clear_epoch`] and
    //! [`inline_cache_fingerprint`] so the SCC orchestrator can rely on:
    //!
@ -229,7 +229,7 @@ mod inline_cache_epoch_tests {
    //! * `fingerprint` is deterministic across equivalent caches (same
    //!   keys → same bytes).  Two caches with identical entries produce
    //!   identical fingerprints regardless of insertion order.
-    //! * `fingerprint` changes when return caps change — the signal the
+    //! * `fingerprint` changes when return caps change, the signal the
    //!   orchestrator will use to detect inline-cache convergence.

    use super::super::*;
@ -675,7 +675,7 @@ mod worklist_tests {

    #[test]
    fn dense_successors_no_duplicates() {
-        // Many successors, some repeated — old O(n) contains() would be slow here
+        // Many successors, some repeated, old O(n) contains() would be slow here
        let mut wl = VecDeque::new();
        let mut in_wl = HashSet::new();

@ -735,8 +735,8 @@ mod primary_sink_location_tests {
    //! [`SsaTaintEvent::primary_sink_site`] →
    //! [`crate::taint::Finding::primary_location`].
    //!
-    //! The test is deliberately low-level — it wires up synthetic SSA and
-    //! drives the three emission stages directly — so any future refactor
+    //! The test is deliberately low-level, it wires up synthetic SSA and
+    //! drives the three emission stages directly, so any future refactor
    //! that drops the site on the floor between stages fails here rather
    //! than only at the corpus/benchmark layer.
    use super::super::*;
@ -841,7 +841,7 @@ mod primary_sink_location_tests {
    /// If this fails, something on the summary→event→finding path
    /// (`pick_primary_sink_sites`, `emit_ssa_taint_events`, or
    /// `ssa_events_to_findings`) has silently stopped forwarding
-    /// coordinates.  Fixing that path — not this test — is the right
+    /// coordinates.  Fixing that path, not this test, is the right
    /// response.
    #[test]
    fn ssa_summary_sinksite_surfaces_as_finding_primary_location() {
@ -863,7 +863,7 @@ mod primary_sink_location_tests {
        };

        // Drive the three emission stages with the summary's own
-        // `param_to_sink` — that is what summary resolution feeds in the
+        // `param_to_sink`, that is what summary resolution feeds in the
        // real pipeline.
        let tainted: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)> = vec![(
            SsaValue(0),
@ -944,7 +944,7 @@ mod goto_succ_propagation_tests {

    #[test]
    fn goto_propagates_to_every_succ_on_three_way_collapse() {
-        // Build a block with Terminator::Goto(1) but succs = [1, 2, 3] — the
+        // Build a block with Terminator::Goto(1) but succs = [1, 2, 3], the
        // shape lowering emits for a 3-way fanout.
        let block = SsaBlock {
            id: BlockId(0),
@ -1001,7 +1001,7 @@ mod goto_succ_propagation_tests {
            pointer_facts: None,
        };

-        // A non-bottom exit state — the test only cares that *every* succ
+        // A non-bottom exit state, the test only cares that *every* succ
        // receives a clone of it, so any distinguishable state works.
        let mut exit_state = SsaTaintState::initial();
        exit_state.values.push((
@ -1259,7 +1259,7 @@ mod goto_succ_propagation_tests {
    fn is_path_safe_for_sink_unknown_axis_returns_false() {
        use crate::abstract_interp::PathFact;

-        // Only dotdot is cleared — absolute stays Maybe → not path-safe.
+        // Only dotdot is cleared, absolute stays Maybe → not path-safe.
        let half_fact = PathFact::default().with_dotdot_cleared();
        assert!(!half_fact.is_path_safe());
    }
@ -1328,9 +1328,9 @@ mod goto_succ_propagation_tests {
    }
 }

-// ── Phase 4.2: receiver_candidates_for_type_lookup walks FieldProj ──────
+// ── receiver_candidates_for_type_lookup walks FieldProj ──────
 //
-// After Phase 2 SSA decomposition, `c.client.send(req)` lowers to
+// After SSA decomposition, `c.client.send(req)` lowers to
 //   v_c      = Param("c", 0)
 //   v_client = FieldProj(v_c, "client")
 //   v_call   = Call("send", receiver: v_client, args: [v_req])
@ -1430,7 +1430,7 @@ mod receiver_candidates_field_proj_tests {
    fn field_proj_receiver_walks_to_typed_root_in_go() {
        // Go is not Rust, so pre-Phase-4 the candidate walk would have
        // returned ONLY the immediate receiver (v2 = FieldProj). With
-        // Phase 4 we walk through FieldProj.receiver to recover v0 (the
+        // We walk through FieldProj.receiver to recover v0 (the
        // typed root `c`).
        let body = body_with_field_proj_chain();
        let cands =
@ -1516,7 +1516,7 @@ mod receiver_candidates_field_proj_tests {
    }
 }

-// ── Phase 6 hierarchy fan-out: ResolvedSummary union semantics ──────────
+// ── Hierarchy: ResolvedSummary union semantics ──────────
 //
 // `merge_resolved_summaries_fanout` is invoked at virtual-dispatch call
 // sites where the receiver's static type has multiple concrete
@ -1553,7 +1553,7 @@ mod fanout_merge_tests {
        }
    }

-    /// B1 — caps that grow taint signal (source/sink/receiver_to_sink)
+    /// B1, caps that grow taint signal (source/sink/receiver_to_sink)
    /// are unioned.  sanitizer_caps are intersected so only bits
    /// stripped by EVERY implementer count as cleared at the call site.
    #[test]
@ -1581,7 +1581,7 @@ mod fanout_merge_tests {
        );
    }

-    /// B2 — propagates_taint is OR'd; propagating_params is the union
+    /// B2, propagates_taint is OR'd; propagating_params is the union
    /// (any implementer's propagator counts).
    #[test]
    fn merge_propagation_unions() {
@ -1600,7 +1600,7 @@ mod fanout_merge_tests {
        assert_eq!(params, vec![0, 1, 2]);
    }

-    /// B3 — param_to_sink merges per-parameter caps (OR).  An impl
+    /// B3, param_to_sink merges per-parameter caps (OR).  An impl
    /// that adds a sink at param N composes with another impl that
    /// adds a different cap at the same N.
    #[test]
@ -1630,7 +1630,7 @@ mod fanout_merge_tests {
        );
    }

-    /// B4 — param_to_sink_sites merges per-parameter site lists with
+    /// B4, param_to_sink_sites merges per-parameter site lists with
    /// PartialEq dedup.  The same site appearing in both impls (e.g.
    /// inherited definition) must not be reported twice.
    #[test]
@ -1675,7 +1675,7 @@ mod fanout_merge_tests {
        assert!(sites.iter().any(|s| s == &unique_b));
    }

-    /// B5 — SSA-precision fields are dropped on disagreement.  Two
+    /// B5, SSA-precision fields are dropped on disagreement.  Two
    /// summaries with different `return_type` collapse to None;
    /// agreement is preserved.
    #[test]
@ -1704,7 +1704,7 @@ mod fanout_merge_tests {
        );
    }

-    /// B6 — abstract_transfer + param_return_paths drop on
+    /// B6, abstract_transfer + param_return_paths drop on
    /// disagreement (precise predicate-path data is not safely
    /// composable across distinct function bodies).
    #[test]
@ -1737,7 +1737,7 @@ mod fanout_merge_tests {
        );
    }

-    /// B7 — empty + empty = empty (no panic on degenerate inputs).
+    /// B7, empty + empty = empty (no panic on degenerate inputs).
    #[test]
    fn merge_empties_is_identity() {
        let m = merge_resolved_summaries_fanout(empty(), empty());
@ -1748,7 +1748,7 @@ mod fanout_merge_tests {
    }
 }

-// ── Pointer-Phase 3 / W1: synthetic field-WRITE round-trip ──────────────
+//── synthetic field-WRITE round-trip ──────────────
 //
 // SSA lowering populates `SsaBody.field_writes` with entries that lift a
 // synthetic base-update Assign (`obj.f = rhs`) into a structural field
@ -1918,8 +1918,8 @@ mod field_write_tests {
        crate::pointer::analyse_body(body, crate::cfg::BodyId(7))
    }

-    /// Reuse `make_cfg`'s nodes — the body's instructions all reference
-    /// them — so `transfer_inst` can index `cfg[cfg_node]`.
+    /// Reuse `make_cfg`'s nodes, the body's instructions all reference
+    /// them, so `transfer_inst` can index `cfg[cfg_node]`.
    fn drive(body: &SsaBody, pf: &PointsToFacts) -> SsaTaintState {
        // We need a CFG that contains the bodies' cfg_nodes.
        let (cfg, _, _, _, _) = make_cfg();
@ -1998,7 +1998,7 @@ mod field_write_tests {

    /// Pointer-disabled run (`pointer_facts: None`): no field cell is
    /// recorded, no taint flows through the `obj.cache` projection.  The
-    /// strict-additive contract — pointer-disabled behaviour is the
+    /// strict-additive contract, pointer-disabled behaviour is the
    /// pre-W1 baseline.
    #[test]
    fn pointer_disabled_run_produces_no_field_taint() {
@ -2047,8 +2047,8 @@ mod field_write_tests {
            state.field_taint.is_empty(),
            "pointer-disabled run must not populate field_taint",
        );
-        // FieldProj reads still produce the receiver's existing taint —
-        // none — so no entry for SsaValue(3) either.
+        // FieldProj reads still produce the receiver's existing taint ,
+        // none, so no entry for SsaValue(3) either.
        assert!(state.get(SsaValue(3)).is_none());
        let _ = cache_id;
    }
@ -2059,7 +2059,7 @@ mod field_write_tests {
    /// projected value's symbol-level `validated_must` from the cell.
    ///
    /// This is the key invariant: validation flows *through* abstract
-    /// field identity — the read recovers what the write recorded.
+    /// field identity, the read recovers what the write recorded.
    #[test]
    fn write_then_read_preserves_validated_must() {
        let (body, cache_id) = make_body();
@ -2208,7 +2208,7 @@ mod field_write_tests {
            },
        };
        let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
-        // v0 is Const → empty pt — the hook should not insert anything.
+        // v0 is Const → empty pt, the hook should not insert anything.
        assert!(
            pf.pt(SsaValue(0)).is_empty(),
            "Const value should have empty pt set",
@ -2259,7 +2259,7 @@ mod field_write_tests {
    }
 }

-// ── Pointer-Phase 4 / W2: container ELEM write/read round-trip ──────────
+//── container ELEM write/read round-trip ──────────
 //
 // Container methods like `arr.push(v)` / `arr.shift()` flow per-element
 // taint through the `Field(_, ELEM)` cells on `SsaTaintState`.  These
@ -2351,7 +2351,7 @@ mod container_elem_tests {
        state
    }

-    /// `arr.push(source()); arr.shift()` — the read picks the source's
+    /// `arr.push(source()); arr.shift()`, the read picks the source's
    /// caps up via the ELEM cell.
    #[test]
    fn container_write_then_read_round_trips_taint() {
@ -2456,7 +2456,7 @@ mod container_elem_tests {
        );

        // Drive the transfer.  `e := arr.shift()` goes through the
-        // existing Call arm — the W2 path is the *write* on `push`.
+        // existing Call arm, the W2 path is the *write* on `push`.
        // The element-read side already exists on `analyse_body`; the
        // taint engine doesn't yet read field cells through call-result
        // paths (Call args are walked by Call's own argument-taint
@ -2482,7 +2482,7 @@ mod container_elem_tests {
        }
    }

-    /// W4: `arr.push(validate(src)); arr.shift()` — the push records
+    /// W4: `arr.push(validate(src)); arr.shift()`, the push records
    /// `validated_must = true` on the ELEM cell because the pushed
    /// value's symbol carried `validated_must`.  The shift call result
    /// reads through the cell and seeds the result symbol's
@ -2761,7 +2761,7 @@ mod container_elem_tests {
    }
 }

-// ── Pointer-Phase 5 / W3: cross-call field-points-to application ────────
+//── cross-call field-points-to application ────────
 //
 // `apply_field_points_to_writes` is the resolver-side hook that turns
 // callee-summary `field_points_to.param_field_writes` into caller-side
@ -2783,7 +2783,7 @@ mod cross_call_field_tests {
    use smallvec::smallvec;
    use std::collections::HashMap;

-    /// W3 / W4: shared empty interner — these unit tests don't seed
+    /// W3 / W4: shared empty interner, these unit tests don't seed
    /// validation bits, so a fresh interner is sufficient for the
    /// `interner` parameter on `apply_field_points_to_writes`.
    fn empty_interner() -> SymbolInterner {
@ -2861,23 +2861,23 @@ mod cross_call_field_tests {
        state
    }

-    /// Callee summary with `param_field_writes[(0, ["cache"])]` —
+    /// Callee summary with `param_field_writes[(0, ["cache"])]` ,
    /// "callee writes cache field on parameter 0 (obj)".
-    /// Caller passes `(obj, source)` to this callee — `arg 0 = obj`,
+    /// Caller passes `(obj, source)` to this callee, `arg 0 = obj`,
    /// but the W3 hook resolves the *value at arg position 0* as the
    /// receiver of the field write, populating its pt's cells.
    ///
    /// We model the caller as `callee(obj, source)` with arg 0 = obj
    /// (the receiver) and arg 1 = source (the value being written).
    /// The callee's signature is `fn store(obj, value) { obj.cache = value; }`
-    /// — so the field write on param 0 is keyed by `pt(obj)` and the
+    ///, so the field write on param 0 is keyed by `pt(obj)` and the
    /// taint comes from arg 1's caps.  Our helper conservatively unions
-    /// every arg's taint into the cell — which over-tints (for this
+    /// every arg's taint into the cell, which over-tints (for this
    /// shape, arg 0's pt member becomes the loc, with arg 0's own taint
    /// applied), but is sound.
    ///
    /// To make the test precise, we model the simpler shape `fn store(obj)
-    /// { obj.cache = source(); }` — callee writes a literal source into
+    /// { obj.cache = source(); }`, callee writes a literal source into
    /// `obj.cache`, with no value parameter.  Then the caller-side hook
    /// only sees param 0's taint (zero), so the cell is empty and the
    /// test fails.
@ -2886,7 +2886,7 @@ mod cross_call_field_tests {
    /// at the call site arg 0 carries source taint.  The hook then
    /// records (pt(arg0_value), cache) ← arg0_value's taint.  In a
    /// real callee this corresponds to "callee writes its parameter
-    /// value into a self.cache field internally" — but the spread we
+    /// value into a self.cache field internally", but the spread we
    /// validate is just substitute-and-mirror.
    #[test]
    fn cross_call_writes_into_param_field_cell() {
@ -2947,7 +2947,7 @@ mod cross_call_field_tests {
    fn cross_call_receiver_field_uses_max_sentinel() {
        let (body, cache_id, pf) = caller_body();
        let mut state = SsaTaintState::initial();
-        // Seed receiver with taint — SsaValue(0) is the param/receiver.
+        // Seed receiver with taint, SsaValue(0) is the param/receiver.
        state.set(
            SsaValue(0),
            VarTaint {
@ -3026,7 +3026,7 @@ mod cross_call_field_tests {
        );
    }

-    /// Field names the caller never interned are skipped silently —
+    /// Field names the caller never interned are skipped silently ,
    /// no FieldProj read in the caller could observe such a cell.
    #[test]
    fn cross_call_unknown_field_name_skipped() {
@ -3062,7 +3062,7 @@ mod cross_call_field_tests {
        );
    }

-    /// Overflow summary is treated conservatively as no-op — the
+    /// Overflow summary is treated conservatively as no-op, the
    /// engine cannot soundly cell-flood, so it skips entirely.
    #[test]
    fn cross_call_overflow_summary_is_noop() {
@ -3117,7 +3117,7 @@ mod cross_call_field_tests {
 //
 // `SsaTaintState.add_field` already routes through `merge_origins`, but
 // the FieldProj READ path used to walk the cell's origins inline,
-// deduping by node only — meaning a cell with N>cap origins surfaced
+// deduping by node only, meaning a cell with N>cap origins surfaced
 // all N to the projected SSA value.  After A7, the read path uses
 // `push_origin_bounded`, ensuring the cap-driven survivor selection
 // applies on read too.
@ -3225,7 +3225,7 @@ mod field_taint_origin_cap_tests {
        let (body, cache_id, cfg, _n_proj) = build_body();
        let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));

-        // Pre-populate the (Param, cache) cell with 4 origins —
+        // Pre-populate the (Param, cache) cell with 4 origins ,
        // 2× the cap.  The `add_field` path already truncates via
        // `merge_origins`, so we go through it 4 times to grow.
        let mut state = SsaTaintState::initial();
@ -3326,14 +3326,14 @@ mod field_taint_origin_cap_tests {
 // the field_taint cells.
 //
 // Two scenarios:
-// 1. `must_validated_flows_through_join` — both predecessor blocks
+// 1. `must_validated_flows_through_join`, both predecessor blocks
 //    write the cell with `validated_must = true`.  After the join, the
 //    cell at the read site retains `validated_must = true` (AND
 //    intersection of two `true`s).
-// 2. `early_exit_branch_drops_validated_must` — only one predecessor
+// 2. `early_exit_branch_drops_validated_must`, only one predecessor
 //    writes; the other reaches the read block via an empty branch.
 //    After the join, the cell has `validated_must = false`,
-//    `validated_may = true` — W4's must/may intersection in action.
+//    `validated_may = true`, W4's must/may intersection in action.
 #[cfg(test)]
 mod pointer_lattice_worklist_tests {
    use super::super::*;
@ -3425,7 +3425,7 @@ mod pointer_lattice_worklist_tests {
            succs: smallvec![BlockId(1), BlockId(2)],
        };

-        // Block 1: synth `obj.cache = src` — field_writes[v2] = (v0, cache_id)
+        // Block 1: synth `obj.cache = src`, field_writes[v2] = (v0, cache_id)
        let block1 = SsaBlock {
            id: BlockId(1),
            phis: vec![],
@ -3441,7 +3441,7 @@ mod pointer_lattice_worklist_tests {
            succs: smallvec![BlockId(3)],
        };

-        // Block 2: identical synth write — keeps both branches
+        // Block 2: identical synth write, keeps both branches
        // contributing the same cell so AND-intersection of must
        // preserves true on the join.
        let block2 = SsaBlock {
@ -3459,7 +3459,7 @@ mod pointer_lattice_worklist_tests {
            succs: smallvec![BlockId(3)],
        };

-        // Block 3: read — FieldProj uses obj from a phi between B1 and B2.
+        // Block 3: read, FieldProj uses obj from a phi between B1 and B2.
        let block3 = SsaBlock {
            id: BlockId(3),
            phis: vec![SsaInst {
@ -3634,7 +3634,7 @@ mod pointer_lattice_worklist_tests {
        );
    }

-    /// A2.b: early-exit branch — only B1 writes, B2 reaches B3 via
+    /// A2.b: early-exit branch, only B1 writes, B2 reaches B3 via
    /// an empty body.  After the join, the cell exists (B1 wrote
    /// it), but `validated_must` is `false` (B2 didn't write, the
    /// orphan-side merge clears `must` per the W4 lattice rule);
@ -3642,7 +3642,7 @@ mod pointer_lattice_worklist_tests {
    ///
    /// To exercise the validation channels we synthesise the cell
    /// directly at the appropriate exit state, then run the
-    /// worklist's join via two `SsaTaintState::join()` calls — the
+    /// worklist's join via two `SsaTaintState::join()` calls, the
    /// body's worklist itself doesn't seed `validated_must` on the
    /// rhs of an Assign, so we model the "writer recorded must=true"
    /// scenario at the lattice level rather than driving it through
--- a/src/taint/tests.rs
+++ b/src/taint/tests.rs
@ -698,7 +698,7 @@ fn cross_file_sink_finding_carries_primary_location() {
    );
    let finding = &findings[0];
    // Note: `uses_summary == false` here because the source (env::var) is
-    // local — only the *sink* was summary-resolved.  That's the case the
+    // local, only the *sink* was summary-resolved.  That's the case the
    // `primary_location` / `uses_summary` independence comment on
    // [`super::Finding::primary_location`] documents.
    let loc = finding
@ -925,7 +925,7 @@ fn multi_file_sink_in_another_file() {
        }
    "#;

-    // File B: env::var → exec_cmd() — sink is cross-file.
+    // File B: env::var → exec_cmd(), sink is cross-file.
    let caller_src = br#"
        use std::env;
        fn main() {
@ -956,7 +956,7 @@ fn multi_file_sink_in_another_file() {
 fn multi_file_passthrough_preserves_taint() {
    use crate::summary::FuncSummary;

-    // identity() just returns its argument — it propagates taint but has no
+    // identity() just returns its argument, it propagates taint but has no
    // source/sanitizer/sink caps of its own.
    let mut global = GlobalSummaries::new();
    let key = FuncKey {
@ -1071,7 +1071,7 @@ fn multi_file_chain_source_sanitize_sink_across_files() {
 fn sanitizer_strips_only_matching_bits() {
    // Source(ALL) → shell_escape → sink_html (HTML sink).
    // shell_escape strips SHELL_ESCAPE but not HTML_ESCAPE.
-    // sink_html is an HTML sink — HTML_ESCAPE bit is still set → 1 finding.
+    // sink_html is an HTML sink, HTML_ESCAPE bit is still set → 1 finding.
    let src = br#"
        use std::env;
        fn sink_html(s: &str) {}
@ -1142,7 +1142,7 @@ fn taint_through_variable_reassignment() {

 #[test]
 fn untainted_variable_at_sink_is_safe() {
-    // A string literal (not from a source) passed to Command — no finding.
+    // A string literal (not from a source) passed to Command, no finding.
    let src = br#"
        use std::process::Command;
        fn main() {
@ -1585,7 +1585,7 @@ fn cpp_source_to_sink() {
    );
 }

-/// Phase 2 (cpp-precision): `c_str()` is a const accessor on `std::string`
+/// `c_str()` is a const accessor on `std::string`
 /// that returns a pointer to the same buffer.  It must propagate taint from
 /// the receiver to the result so the downstream sink fires.
 #[test]
@ -1597,12 +1597,12 @@ fn cpp_c_str_propagates_taint() {
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
-        "C++: tainted s.c_str() into system() must fire (Phase 2 c_str passthrough)",
+        "C++: tainted s.c_str() into system() must fire",
    );
 }

-/// Phase 2: `std::move(x)` returns its argument unchanged in terms of
-/// data flow — the rvalue cast is a representation move, not a sanitiser.
+/// `std::move(x)` returns its argument unchanged in terms of
+/// data flow, the rvalue cast is a representation move, not a sanitiser.
 /// Default propagation collects argument taint into the result.
 #[test]
 fn cpp_std_move_propagates_taint() {
@ -1617,7 +1617,7 @@ fn cpp_std_move_propagates_taint() {
    );
 }

-/// Phase 2: `static_cast<T>(x)` is parsed as a call expression by
+/// `static_cast<T>(x)` is parsed as a call expression by
 /// tree-sitter-cpp; default propagation transports taint from the casted
 /// argument to the result.
 #[test]
@ -1633,7 +1633,7 @@ fn cpp_static_cast_propagates_taint() {
    );
 }

-/// Phase 5 (cpp-precision): a fluent builder chain whose host
+/// a fluent builder chain whose host
 /// argument is tainted should fire on the terminal `.connect()`
 /// SSRF sink.  The chained `.host(...)` / `.port(...)` calls return
 /// the receiver, and default Call-arg propagation puts the tainted
@ -1647,12 +1647,12 @@ fn cpp_builder_chain_user_host_fires() {
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
-        "C++: tainted host through fluent builder chain must reach terminal connect() (Phase 5)",
+        "C++: tainted host through fluent builder chain must reach terminal connect()",
    );
 }

-/// Phase 5: a fluent builder chain with a hardcoded host literal
-/// must NOT fire on the terminal connect() sink — the chain carries
+/// a fluent builder chain with a hardcoded host literal
+/// must NOT fire on the terminal connect() sink, the chain carries
 /// no taint.
 #[test]
 fn cpp_builder_chain_const_host_silent() {
@ -1663,11 +1663,11 @@ fn cpp_builder_chain_const_host_silent() {
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        findings.is_empty(),
-        "C++: builder chain with literal host must NOT fire (Phase 5 negative)",
+        "C++: builder chain with literal host must NOT fire (Negative)",
    );
 }

-/// Phase 4 (cpp-precision): inline member-function bodies inside a
+/// inline member-function bodies inside a
 /// `class_specifier` must be extracted as separate functions and
 /// intra-file calls must resolve to their bodies. Pre-Phase-4, the
 /// `class_specifier` AST kind was unmapped in cpp KINDS, so the CFG
@ -1682,11 +1682,11 @@ fn cpp_inline_class_method_resolves() {
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
    assert!(
        !findings.is_empty(),
-        "C++: tainted arg through inline class method must reach system() (Phase 4)",
+        "C++: tainted arg through inline class method must reach system()",
    );
 }

-/// Phase 3 (cpp-precision): a tainted argument passed through an
+/// a tainted argument passed through an
 /// identity-style lambda (`auto echo = [](const char* s) { return s; }`)
 /// must reach the downstream sink. This is handled by the same default
 /// Call-arg propagation as `std::move`/`static_cast`; pinning the
@ -1705,7 +1705,7 @@ fn cpp_identity_lambda_propagates_taint() {
    );
 }

-/// Phase 2: `std::vector<char>::data()` is a Load-style container op that
+/// `std::vector<char>::data()` is a Load-style container op that
 /// returns a pointer to the underlying buffer; `system(v.data())` should
 /// fire when `v` is tainted.
 #[test]
@ -1801,7 +1801,7 @@ fn ruby_source_to_sink() {
 // ─────────────────────────────────────────────────────────────────────────────
 //
 // Cross-language resolution now requires explicit InteropEdge declarations.
-// Without an edge, functions from different languages are never resolved —
+// Without an edge, functions from different languages are never resolved ,
 // this prevents false positives from name collisions across languages.

 /// Extract cross-file summaries from any language's source bytes.
@ -1984,7 +1984,7 @@ fn cross_lang_rust_sanitizer_in_js_via_interop() {
        None,
    );
    // eval uses Cap::all(), so a SHELL_ESCAPE sanitizer alone does NOT
-    // neutralise taint — shell-escape is semantically wrong for code injection.
+    // neutralise taint, shell-escape is semantically wrong for code injection.
    // The finding should still be reported.
    assert!(
        !findings.is_empty(),
@ -2481,7 +2481,7 @@ fn cross_lang_summary_preserves_lang_metadata() {

    let global = merge_summaries(vec![py_summary, js_summary], None);

-    // They are now separate entries — not merged
+    // They are now separate entries, not merged
    let py_matches = global.lookup_same_lang(Lang::Python, "helper");
    let js_matches = global.lookup_same_lang(Lang::JavaScript, "helper");

@ -2609,7 +2609,7 @@ fn ambiguous_resolution_returns_none() {
        );
    }

-    // Caller from c.rs calls helper() — ambiguous (two matches, neither is caller's namespace)
+    // Caller from c.rs calls helper(), ambiguous (two matches, neither is caller's namespace)
    let src = br#"
        use std::process::Command;
        fn main() {
@ -2855,7 +2855,7 @@ fn validate_and_early_return() {
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

-    // Validated findings are now suppressed — validate() guard means the
+    // Validated findings are now suppressed, validate() guard means the
    // sink is on the safe path, so no finding should be emitted.
    assert_eq!(findings.len(), 0, "validated finding should be suppressed");
 }
@ -2888,7 +2888,7 @@ fn validate_in_if_else_path_validated() {
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

-    // Validated findings are now suppressed — sink is in the validated
+    // Validated findings are now suppressed, sink is in the validated
    // branch, so no finding should be emitted.
    assert_eq!(findings.len(), 0, "validated finding should be suppressed");
 }
@ -2932,7 +2932,7 @@ fn contradictory_null_check_pruned() {

    // Inner branch is infeasible: if x.is_none() then x cannot also be is_none().
    // After early return on is_none(), the fall-through path has polarity=false
-    // for NullCheck. The inner `if x.is_none()` True branch has polarity=true —
+    // for NullCheck. The inner `if x.is_none()` True branch has polarity=true ,
    // contradiction.
    let src = br#"
        use std::env; use std::process::Command;
@ -3045,7 +3045,7 @@ fn path_state_budget_graceful() {
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

-    // Should still detect the flow — truncation shouldn't cause false negatives.
+    // Should still detect the flow, truncation shouldn't cause false negatives.
    assert_eq!(
        findings.len(),
        1,
@ -3080,7 +3080,7 @@ fn unknown_predicate_not_pruned() {
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

-    // Comparison is not in the whitelist — the path should NOT be pruned.
+    // Comparison is not in the whitelist, the path should NOT be pruned.
    assert_eq!(
        findings.len(),
        1,
@ -3096,7 +3096,7 @@ fn duplicate_null_guard_prunes_unreachable_sink() {
    // After `if y.is_none() { return; }`, the false arm proves
    // `y.is_none() == false` on the only surviving path.  A second
    // `if y.is_none() { sink }` then adds `y.is_none() == true` on the
-    // body's True arm — a per-symbol PredicateSummary contradiction
+    // body's True arm, a per-symbol PredicateSummary contradiction
    // (known_true & known_false on bit NullCheck).  The body is
    // structurally unreachable; the sink must not fire.
    //
@ -3573,7 +3573,7 @@ fn js_two_level_converges_no_mutation() {

 #[test]
 fn catch_param_to_sink_has_caught_exception_source_kind() {
-    // Catch param flows to a sink — the finding source_kind must be
+    // Catch param flows to a sink, the finding source_kind must be
    // CaughtException, not Unknown.
    let src = b"
        const { exec } = require('child_process');
@ -3743,7 +3743,7 @@ fn assert_ssa_integration(src: &[u8]) {
    // High-level path (per-body analysis)
    let high_level = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

-    // Direct SSA path — use the first function body (fn main), not top-level
+    // Direct SSA path, use the first function body (fn main), not top-level
    let body = if file_cfg.bodies.len() > 1 {
        &file_cfg.bodies[1]
    } else {
@ -4654,7 +4654,7 @@ fn ssa_induction_var_no_taint() {

 #[test]
 fn ssa_loop_tainted_var_not_induction() {
-    // `x` is tainted and transformed in a loop — NOT an induction variable
+    // `x` is tainted and transformed in a loop, NOT an induction variable
    let src = br#"
        use std::{env, process::Command};
        fn main() {
@ -4766,7 +4766,7 @@ fn ssa_phi_path_sensitive_both_branches_validated() {
    let summaries = &file_cfg.summaries;
    let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);

-    // Validated findings are now suppressed — sink is in the validated
+    // Validated findings are now suppressed, sink is in the validated
    // branch, so no finding should be emitted.
    assert_eq!(findings.len(), 0, "validated finding should be suppressed");
 }
@ -5116,7 +5116,7 @@ fn abstract_ssrf_prefix_linear_suppression() {
 /// Two predecessor blocks produce string concat values with different safe
 /// prefixes ("https://api.example.com/users/" and "https://api.example.com/admins/").
 /// A phi merges them. The LCP of the prefixes is "https://api.example.com/" which
-/// still has scheme://host/ — so SSRF suppression should fire.
+/// still has scheme://host/, so SSRF suppression should fire.
 ///
 /// Before the phi replay fix, collect_block_events did NOT replay abstract phis,
 /// leaving the phi result's abstract value as Top (stale). The SSRF suppression
@ -5255,7 +5255,7 @@ fn phi_validated_must_requires_all_paths() {
    use tree_sitter::Language;

    // Path A validates x, path B does NOT validate x.
-    // The phi for x after the merge must NOT get validated_must — only
+    // The phi for x after the merge must NOT get validated_must, only
    // validated_may (since at least one path validated). The sink after
    // the merge must still fire because the must-analysis says "not
    // definitely validated on all paths".
@ -5324,7 +5324,7 @@ fn inline_return_constant_with_internal_source_produces_no_finding() {
        None,
    );

-    // transform() returns a constant — no taint should leak to caller
+    // transform() returns a constant, no taint should leak to caller
    assert_eq!(
        findings.len(),
        0,
@ -5386,7 +5386,7 @@ fn inline_return_taint_internal_source_does_not_widen_caps() {
    // Callee has an internal source (document.location) alongside a tainted
    // param. The explicit return value is the param. Without the C-1 fix,
    // extract_inline_return_taint would union ALL live tainted values' caps
-    // — the internal source's derived-caps would override the param-caps
+    //, the internal source's derived-caps would override the param-caps
    // (derived takes priority in the extraction logic). With the fix, only
    // the return value's taint is collected, so param taint is returned
    // correctly.
@ -5420,7 +5420,7 @@ fn inline_return_taint_internal_source_does_not_widen_caps() {
        None,
    );

-    // The callee returns cmd (tainted param) — 1 finding expected.
+    // The callee returns cmd (tainted param), 1 finding expected.
    // The internal document.location() should NOT widen the return taint.
    assert_eq!(
        findings.len(),
@ -5435,7 +5435,7 @@ fn inline_return_taint_internal_source_does_not_widen_caps() {
 ///
 /// Two class methods share the leaf name `process` in the same file.  If the
 /// summary map were keyed by bare name (or raw file-path namespace), the
-/// second lowering would overwrite the first — both methods would end up
+/// second lowering would overwrite the first, both methods would end up
 /// pointing at whichever summary was extracted last.
 ///
 /// With canonical `FuncKey` identity (`container` discriminates them) both
@ -5483,7 +5483,7 @@ class Worker {
        summaries.keys().collect::<Vec<_>>(),
    );

-    // Same invariant on the cached-bodies map — inline analysis depends on
+    // Same invariant on the cached-bodies map, inline analysis depends on
    // being able to fetch the correct body by full FuncKey.
    let mut body_containers: Vec<String> = bodies
        .iter()
@ -5593,6 +5593,7 @@ fn make_finding_for_link_test(
        path_hash,
        finding_id: String::new(),
        alternative_finding_ids: smallvec::SmallVec::new(),
+        effective_sink_caps: crate::labels::Cap::empty(),
    }
 }

@ -5628,7 +5629,7 @@ fn finding_id_encodes_validation_and_path_hash() {
    );

    // Differing path_hash produces a different ID even with the same
-    // (body, source, sink, validated) — the whole point of the path
+    // (body, source, sink, validated), the whole point of the path
    // component in the dedup key.
    let mut u2 = make_finding_for_link_test(1, 3, 7, 0xdead_beef_0000_0002, false);
    u2.finding_id = super::make_finding_id(&u2);
@ -5639,7 +5640,7 @@ fn finding_id_encodes_validation_and_path_hash() {
 }

 /// `link_alternative_paths` must cross-link findings that share
-/// `(body_id, sink, source)` — so a validated flow and an unvalidated
+/// `(body_id, sink, source)`, so a validated flow and an unvalidated
 /// flow on the same source/sink pair each list the other's ID.
 #[test]
 fn link_alternative_paths_cross_references_same_body_sink_source() {
@ -5668,18 +5669,18 @@ fn link_alternative_paths_cross_references_same_body_sink_source() {
 }

 /// Findings that differ on `(body_id, sink, source)` are independent
-/// vulnerabilities — they must **not** end up cross-linked as
+/// vulnerabilities, they must **not** end up cross-linked as
 /// alternatives, otherwise the "alternative path" framing becomes
 /// noise.
 #[test]
 fn link_alternative_paths_does_not_link_distinct_sink_source() {
    let mut findings = vec![
        make_finding_for_link_test(1, 3, 7, 0x1111, false),
-        // Different sink — independent finding, not an alternative.
+        // Different sink, independent finding, not an alternative.
        make_finding_for_link_test(1, 3, 8, 0x1111, false),
-        // Different source — also independent.
+        // Different source, also independent.
        make_finding_for_link_test(1, 4, 7, 0x1111, false),
-        // Different body — also independent.
+        // Different body, also independent.
        make_finding_for_link_test(2, 3, 7, 0x1111, false),
    ];
    for f in &mut findings {
@ -5697,7 +5698,7 @@ fn link_alternative_paths_does_not_link_distinct_sink_source() {

 /// When the same `(body, sink, source)` has three sibling findings
 /// (e.g. validated, unvalidated-path-A, unvalidated-path-B), each
-/// finding must list the other two — the group is symmetric and
+/// finding must list the other two, the group is symmetric and
 /// complete rather than a chain.
 #[test]
 fn link_alternative_paths_three_way_group() {
@ -5726,14 +5727,14 @@ fn link_alternative_paths_three_way_group() {
 }

 // ─────────────────────────────────────────────────────────────────────────────
-//  Typed call-graph devirtualisation — Phase 2 (typed_call_receivers)
+//  Typed call-graph devirtualisation (typed_call_receivers)
 // ─────────────────────────────────────────────────────────────────────────────

-/// Phase 2: when a method call's receiver was constructed from a known
+/// when a method call's receiver was constructed from a known
 /// constructor (`File::open` → `FileHandle`), the SSA-extraction
 /// pipeline must record `(call_ordinal, "FileHandle")` on the
 /// caller's [`crate::summary::ssa_summary::SsaFuncSummary::typed_call_receivers`]
-/// so Phase 3 can devirtualise the cross-file edge.
+/// so build_call_graph can devirtualise the cross-file edge.
 ///
 /// Uses Java because `FileInputStream` / `FileOutputStream` are part
 /// of the [`crate::ssa::type_facts::constructor_type`] table for Java
@ -5779,14 +5780,14 @@ class Reader {
    );
 }

-/// Phase 2 negative control: free-function calls (no receiver) must
+/// Negative control: free-function calls (no receiver) must
 /// never appear in `typed_call_receivers`.  Even when the callee is a
 /// known type-producing constructor, it sits in the body as a Call
 /// with `receiver = None` and is not a candidate for devirtualisation.
 #[test]
 fn typed_call_receivers_skips_free_function_calls() {
    // `new FileInputStream(...)` is a constructor invocation with no
-    // receiver — exactly the shape we want to ignore.
+    // receiver, exactly the shape we want to ignore.
    let src = br#"
 class Maker {
    void make() {
@ -5808,10 +5809,10 @@ class Maker {

    // make() has zero parameters and no fresh-allocation return, so the
    // generic insertion gate skips it.  The phase-2 patch only force-
-    // inserts when `typed_call_receivers` is non-empty — which it
+    // inserts when `typed_call_receivers` is non-empty, which it
    // isn't here, since `new FileInputStream(...)` is a free-function-
    // shaped constructor call (no SSA receiver).  So either the
-    // summary is absent, or — if some other side effect inserted it —
+    // summary is absent, or, if some other side effect inserted it ,
    // its `typed_call_receivers` is empty.  Both forms prove no
    // spurious typed entry was recorded.
    let typed = summaries
@ -5829,7 +5830,7 @@ class Maker {
 /// Regression: nested arrow functions inside `return new Promise((res,rej)
 /// => { ... })` must be lifted as separate bodies. Before the Kind::Return
 /// arm in cfg/mod.rs called `collect_nested_function_nodes`, only the
-/// outer function (`downloadFromUri`) was extracted — the executor and
+/// outer function (`downloadFromUri`) was extracted, the executor and
 /// its inner callbacks were silently swallowed, hiding the inner gated
 /// http.get sink from classification. Motivated by CVE-2025-64430.
 #[test]
@ -5972,7 +5973,7 @@ const handler = (req) => {
 /// The augment pass populates `downloadFromUri.summary.param_to_sink:
 /// [(0, SSRF)]` (single-hop closure-capture lift). For the handler's
 /// `helper(req.body)` call to fire, `helper.summary.param_to_sink` must
-/// also contain `[(0, SSRF)]` — but that requires `helper`'s probe to
+/// also contain `[(0, SSRF)]`, but that requires `helper`'s probe to
 /// see `downloadFromUri`'s augmented summary at resolution time.
 ///
 /// Because the probe currently runs with `ssa_summaries=None`,
@ -6065,11 +6066,198 @@ const handler = (req) => {
 /// `middle.summary.param_to_sink`, then handler's call site picks it up.
 ///
 /// Today the second-pass runs only once (no fixed-point), so depth-3+
-/// is expected to NOT fire — guards against accidental fixed-point
+/// is expected to NOT fire, guards against accidental fixed-point
 /// regression that would mask an over-eager rewrite.  Marked
 /// `#[ignore]` so it documents the depth limit without breaking CI.
 /// Motivated by CVE-2025-64430 corner case; remove the `#[ignore]` and
 /// any guarding `assert!` polarity if a fixed-point is added later.
+/// Indirect-validator branch narrowing: when an if-condition is a
+/// bare result variable whose reaching SSA def is a Call to a
+/// callee classified by `classify_input_validator_callee` (e.g.
+/// `validateUrlSsrf`, `verifyToken`, `isValidUrl`), the validator's
+/// argument is treated as validated on the success branch.
+///
+/// This pins the SSA-level
+/// `apply_input_validator_branch_narrowing` regardless of whether
+/// downstream consumers (sink-arg taint, cfg-unguarded-sink) honor
+/// `validated_must`.  Test asserts the symbol-keyed validation flag
+/// is set on the analysis exit state.
+///
+/// Direct-flow shape (no helper indirection); the helper-summary
+/// case still has open architectural gaps (validated_must doesn't
+/// propagate through `param_to_sink` summaries, same gap blocks
+/// AllowlistCheck-in-helper, see CVE_DEFERRED.md GHSA-4x48-cgf9-q33f).
+///
+/// Motivated by Novu CVE GHSA-4x48-cgf9-q33f
+/// (`const ssrfError = await validateUrlSsrf(child.webhookUrl); if (ssrfError) throw …;`).
+#[test]
+fn indirect_validator_narrowing_marks_arg_validated() {
+    let src = br#"
+async function handler(req) {
+  const target = req.query.url;
+  const ssrfError = await validateUrlSsrf(target);
+  if (ssrfError) {
+    throw new Error('blocked');
+  }
+  await axios.get(target);
+}
+"#;
+    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let file_cfg = parse_lang(src, "javascript", lang);
+    let summaries = &file_cfg.summaries;
+    let findings = analyse_file(
+        &file_cfg,
+        summaries,
+        None,
+        Lang::JavaScript,
+        "test.js",
+        &[],
+        None,
+    );
+    // Direct-flow: validator narrowing should clear axios.get's taint event.
+    assert!(
+        findings.is_empty(),
+        "validator narrowing should suppress direct-flow SSRF; got {} finding(s)",
+        findings.len()
+    );
+}
+
+/// Regression: `extract_ssa_func_summary` must skip `all_validated`
+/// events when populating `param_to_sink` / `param_to_sink_param`.
+///
+/// Helper bodies whose validator-call branch narrowing fired produce
+/// per-param probe events flagged `all_validated=true`.  Without
+/// summary-extract suppression, callers would still see the helper
+/// in their summary's sink set and refire on `helper(taintedArg)`
+/// even though the validator inside the helper proved the path
+/// safe.  The caller can't see the validator (it's behind the
+/// summary), so the gap manifests as a precision miss only when
+/// helper + caller are in the same file.
+///
+/// Closes the helper-summary half of Novu CVE GHSA-4x48-cgf9-q33f.
+#[test]
+fn helper_with_validator_does_not_propagate_to_caller_via_summary() {
+    let src = br#"
+async function getWebhookResponse(child) {
+    const ssrfError = await validateUrlSsrf(child.webhookUrl);
+    if (ssrfError) {
+        throw new Error('blocked');
+    }
+    return await axios.post(child.webhookUrl, {});
+}
+
+async function handler(req) {
+    const child = req.body.filter;
+    const r = await getWebhookResponse(child);
+    return r;
+}
+"#;
+    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let file_cfg = parse_lang(src, "javascript", lang);
+    let summaries = &file_cfg.summaries;
+    let findings = analyse_file(
+        &file_cfg,
+        summaries,
+        None,
+        Lang::JavaScript,
+        "test.js",
+        &[],
+        None,
+    );
+    assert!(
+        findings.is_empty(),
+        "helper-with-validator should not propagate sink via summary; got {} finding(s)",
+        findings.len()
+    );
+}
+
+/// Companion: same shape WITHOUT the validator inside the helper
+/// must still fire so the precision gain is targeted.  Asserts
+/// `all_validated` skip doesn't accidentally suppress unsafe helpers.
+#[test]
+fn helper_without_validator_still_propagates_to_caller_via_summary() {
+    let src = br#"
+async function getWebhookResponse(child) {
+    return await axios.post(child.webhookUrl, {});
+}
+
+async function handler(req) {
+    const child = req.body.filter;
+    const r = await getWebhookResponse(child);
+    return r;
+}
+"#;
+    let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
+    let file_cfg = parse_lang(src, "javascript", lang);
+    let summaries = &file_cfg.summaries;
+    let findings = analyse_file(
+        &file_cfg,
+        summaries,
+        None,
+        Lang::JavaScript,
+        "test.js",
+        &[],
+        None,
+    );
+    assert!(
+        !findings.is_empty(),
+        "helper-without-validator must still flag the cross-fn SSRF path",
+    );
+}
+
+/// Regression: `validate*`-named callees match
+/// `InputValidatorPolarity::ErrorReturning`, bare `if (err) throw`
+/// guards the success branch (false branch).  `is_valid*`/`is_safe*`
+/// callees match `InputValidatorPolarity::BooleanTrueIsValid`, bare
+/// `if (!ok) throw` guards the success branch (true branch via
+/// `condition_negated`).
+#[test]
+fn classify_input_validator_callee_polarity_buckets() {
+    use crate::ssa::type_facts::{InputValidatorPolarity, classify_input_validator_callee};
+
+    // ErrorReturning bucket
+    assert_eq!(
+        classify_input_validator_callee("validateUrlSsrf"),
+        Some(InputValidatorPolarity::ErrorReturning)
+    );
+    assert_eq!(
+        classify_input_validator_callee("verifyToken"),
+        Some(InputValidatorPolarity::ErrorReturning)
+    );
+    assert_eq!(
+        classify_input_validator_callee("validate_url"),
+        Some(InputValidatorPolarity::ErrorReturning)
+    );
+
+    // BooleanTrueIsValid bucket
+    assert_eq!(
+        classify_input_validator_callee("isValidUrl"),
+        Some(InputValidatorPolarity::BooleanTrueIsValid)
+    );
+    assert_eq!(
+        classify_input_validator_callee("is_valid_email"),
+        Some(InputValidatorPolarity::BooleanTrueIsValid)
+    );
+    assert_eq!(
+        classify_input_validator_callee("isSafe"),
+        Some(InputValidatorPolarity::BooleanTrueIsValid)
+    );
+
+    // Negative, names that look like validators but are auth-flavored
+    // (`checkPermissions`, `is_authorized`) are intentionally not
+    // matched here; they have separate semantics in the auth pipeline.
+    assert_eq!(classify_input_validator_callee("checkPermissions"), None);
+    assert_eq!(classify_input_validator_callee("is_authorized"), None);
+    assert_eq!(classify_input_validator_callee("randomThing"), None);
+
+    // Path-prefix peeling: `obj.validateXxx` should classify the same
+    // as the bare callee.
+    assert_eq!(
+        classify_input_validator_callee("validator.validateUrlSsrf"),
+        Some(InputValidatorPolarity::ErrorReturning)
+    );
+}
+
 #[test]
 #[ignore]
 fn cve_2025_64430_three_hop_transitive_documents_depth_limit() {