mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
Python fp and docs updtes (#58)
* refactor: Update comments for clarity and add expectations.json files for performance metrics * feat: Implement FP guard for JS/TS local-collection receivers to suppress missing ownership checks * feat: Enhance Rust parameter handling to classify local collections and prevent false ownership checks * refactor: Simplify code formatting for better readability in multiple files * refactor: Improve UTF-8 sequence length handling and enhance clarity in loop iteration * feat: Update Java and Python patterns to include new security rules * refactor: Improve comment clarity and consistency across multiple Rust files * refactor: Simplify code formatting for improved readability in integration tests and module files * refactor: Improve comment formatting and enhance clarity in assertions across multiple files
This commit is contained in:
parent
4db0805de6
commit
a438886217
291 changed files with 9485 additions and 3851 deletions
|
|
@ -3,7 +3,7 @@
|
|||
//! The forward taint engine (`ssa_transfer.rs`) proceeds source-to-sink,
|
||||
//! spending analysis budget on every function the source might touch. Its
|
||||
//! precision ceiling is fixed by what summaries + inline re-analysis can
|
||||
//! preserve on every edge of a flow — a single lossy edge drops the finding.
|
||||
//! preserve on every edge of a flow, a single lossy edge drops the finding.
|
||||
//!
|
||||
//! This module implements the opposite direction: start at each sink value,
|
||||
//! walk *reverse* SSA edges and (when needed) cross-file callee bodies on
|
||||
|
|
@ -16,7 +16,7 @@
|
|||
//! reaches a matching source, we append `backwards-confirmed` to the
|
||||
//! finding's evidence notes.
|
||||
//! * When the backwards walk proves the flow infeasible via accumulated
|
||||
//! path predicates, we append `backwards-infeasible` — consumed by the
|
||||
//! path predicates, we append `backwards-infeasible`, consumed by the
|
||||
//! confidence scorer as a cap-to-Low signal.
|
||||
//! * Backward flows that reach a source with no matching forward finding
|
||||
//! become standalone `taint-backwards-flow` diags (a separate rule id so
|
||||
|
|
@ -63,7 +63,7 @@ pub const MAX_BACKWARDS_CALLEE_BLOCKS: usize = 500;
|
|||
/// the finding, and which predicate evidence (if any) has been gathered so
|
||||
/// far.
|
||||
///
|
||||
/// `caps` is monotone — the walk can only narrow the demand (by proving
|
||||
/// `caps` is monotone, the walk can only narrow the demand (by proving
|
||||
/// operands validated or sanitized against specific capability bits), never
|
||||
/// widen it. This keeps backwards composition with summary-derived
|
||||
/// transforms sound.
|
||||
|
|
@ -140,7 +140,7 @@ pub const MAX_CHAIN_LEN: usize = 16;
|
|||
/// The context is intentionally narrow: it borrows from whatever analysis
|
||||
/// objects the caller has already prepared (summaries, the current body,
|
||||
/// cross-file body maps) and does not build its own. This keeps the
|
||||
/// backwards pass cheap to enable — when off, none of this code is touched.
|
||||
/// backwards pass cheap to enable, when off, none of this code is touched.
|
||||
pub struct BackwardsCtx<'a> {
|
||||
/// Callee's SSA body.
|
||||
pub ssa: &'a SsaBody,
|
||||
|
|
@ -178,7 +178,7 @@ impl<'a> BackwardsCtx<'a> {
|
|||
|
||||
/// One step of the backwards transfer: given a demand on `value`, compute
|
||||
/// the demand on its immediate SSA operands. Returns the list of
|
||||
/// `(operand, demand)` pairs — possibly empty if the defining op terminates
|
||||
/// `(operand, demand)` pairs, possibly empty if the defining op terminates
|
||||
/// the walk (Source/Const/Param).
|
||||
///
|
||||
/// This is a pure function over the op and demand; cycle detection and
|
||||
|
|
@ -224,7 +224,7 @@ pub fn backward_transfer(
|
|||
SsaOp::CatchParam => (BackwardStep::ReachedCatchParam, SmallVec::new()),
|
||||
SsaOp::Nop => (BackwardStep::Unknown, SmallVec::new()),
|
||||
// Undef is a phi-operand sentinel on edges with no reaching
|
||||
// definition — nothing to trace backwards through.
|
||||
// definition, nothing to trace backwards through.
|
||||
SsaOp::Undef => (BackwardStep::ReachedConst, SmallVec::new()),
|
||||
SsaOp::Phi(operands) => {
|
||||
// Demand fans out to every incoming value: the runtime value of
|
||||
|
|
@ -254,7 +254,7 @@ pub fn backward_transfer(
|
|||
..
|
||||
} => {
|
||||
// For Call ops the full demand transfer depends on callee
|
||||
// metadata (summary or body). The driver handles that —
|
||||
// metadata (summary or body). The driver handles that ,
|
||||
// return a `BackwardStep::Call` carrying the receiver + args
|
||||
// so the driver can consult [`GlobalSummaries`] / bodies_by_key.
|
||||
let mut flat: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new();
|
||||
|
|
@ -276,7 +276,7 @@ pub fn backward_transfer(
|
|||
SsaOp::FieldProj { receiver, .. } => {
|
||||
// Field projection: demand for `obj.f` flows to `obj`. Treated
|
||||
// structurally like a single-operand Assign for the backwards
|
||||
// walk — sufficient until Phase 4 introduces field-sensitive
|
||||
// walk, sufficient until future passes will introduce field-sensitive
|
||||
// demand discrimination.
|
||||
let mut next: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new();
|
||||
next.push((*receiver, demand.clone()));
|
||||
|
|
@ -290,12 +290,12 @@ pub fn backward_transfer(
|
|||
/// resolution.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum BackwardStep {
|
||||
/// Defining op is a tainted [`SsaOp::Source`] — walk terminates with a
|
||||
/// Defining op is a tainted [`SsaOp::Source`], walk terminates with a
|
||||
/// confirmed flow.
|
||||
ReachedSource(NodeIndex),
|
||||
/// Defining op is a [`SsaOp::Const`] — walk terminates without a source.
|
||||
/// Defining op is a [`SsaOp::Const`], walk terminates without a source.
|
||||
ReachedConst,
|
||||
/// Defining op is an [`SsaOp::Param`] / [`SsaOp::SelfParam`] — walk may
|
||||
/// Defining op is an [`SsaOp::Param`] / [`SsaOp::SelfParam`], walk may
|
||||
/// continue by resolving the parameter against the caller's arguments
|
||||
/// (requires reverse call-graph expansion, which is out of scope for
|
||||
/// the current cut and is handled as a terminal step).
|
||||
|
|
@ -305,13 +305,13 @@ pub enum BackwardStep {
|
|||
/// the actual exception source requires exception-edge traversal not
|
||||
/// performed here.
|
||||
ReachedCatchParam,
|
||||
/// Phi node — driver fans out to predecessors.
|
||||
/// Phi node, driver fans out to predecessors.
|
||||
Phi,
|
||||
/// Arithmetic / copy / cast — driver fans out to operands.
|
||||
/// Arithmetic / copy / cast, driver fans out to operands.
|
||||
Assign,
|
||||
/// Call op — driver consults summaries and/or callee bodies.
|
||||
/// Call op, driver consults summaries and/or callee bodies.
|
||||
Call { callee: String },
|
||||
/// Defining op could not be located or was a [`SsaOp::Nop`] — walk
|
||||
/// Defining op could not be located or was a [`SsaOp::Nop`], walk
|
||||
/// terminates as inconclusive.
|
||||
Unknown,
|
||||
}
|
||||
|
|
@ -321,7 +321,7 @@ pub enum BackwardStep {
|
|||
/// Walk backwards from `sink_value` in `ctx.ssa`, producing at most one
|
||||
/// [`BackwardFlow`] per reached source (phi fan-outs can produce multiple).
|
||||
///
|
||||
/// Does not consult forward findings — the caller is responsible for
|
||||
/// Does not consult forward findings, the caller is responsible for
|
||||
/// matching the returned flows against its finding set.
|
||||
pub fn analyse_sink_backwards(
|
||||
ctx: &BackwardsCtx<'_>,
|
||||
|
|
@ -385,7 +385,7 @@ fn walk_dfs(
|
|||
// Before dispatching on the SSA op kind, consult the defining CFG node's
|
||||
// label set. Many Source-labelled callables in the CFG lower to an
|
||||
// `SsaOp::Call` rather than `SsaOp::Source` (request.args.get,
|
||||
// os.getenv, …) — recognising the label here keeps the walk in
|
||||
// os.getenv, …), recognising the label here keeps the walk in
|
||||
// sync with the forward engine's source model.
|
||||
let def_cfg_node = ctx.ssa.def_of(value).cfg_node;
|
||||
if def_cfg_node.index() < ctx.cfg.node_count() {
|
||||
|
|
@ -429,7 +429,7 @@ fn walk_dfs(
|
|||
});
|
||||
}
|
||||
BackwardStep::ReachedConst => {
|
||||
// Constants never supply taint — treat as a silent prune.
|
||||
// Constants never supply taint, treat as a silent prune.
|
||||
}
|
||||
BackwardStep::ReachedParam { index: _, node } => {
|
||||
// Reverse-call-graph expansion is intentionally left out of the
|
||||
|
|
@ -452,7 +452,7 @@ fn walk_dfs(
|
|||
});
|
||||
}
|
||||
BackwardStep::ReachedCatchParam => {
|
||||
// Exception-borne taint — record but don't confirm. Marked
|
||||
// Exception-borne taint, record but don't confirm. Marked
|
||||
// non-confirmatory so unit tests can distinguish "walk reached
|
||||
// catch-param" from "walk reached source".
|
||||
}
|
||||
|
|
@ -514,7 +514,7 @@ fn walk_dfs(
|
|||
}
|
||||
}
|
||||
// Prevent an unused-variable warning while still accepting
|
||||
// the key in the matcher — the key is useful for debug
|
||||
// the key in the matcher, the key is useful for debug
|
||||
// logging in bigger expansions.
|
||||
let _ = callee_key;
|
||||
return;
|
||||
|
|
@ -539,7 +539,7 @@ fn walk_dfs(
|
|||
}
|
||||
}
|
||||
BackwardStep::Unknown => {
|
||||
// No information — terminate silently.
|
||||
// No information, terminate silently.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -632,12 +632,12 @@ pub const NOTE_BUDGET: &str = "backwards-budget-exhausted";
|
|||
/// Classification for a forward finding after backwards post-processing.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum FindingVerdict {
|
||||
/// Backwards reached a matching source — finding corroborated.
|
||||
/// Backwards reached a matching source, finding corroborated.
|
||||
Confirmed,
|
||||
/// Backwards was inconclusive (no source, not infeasible). Finding
|
||||
/// keeps its forward-assigned confidence.
|
||||
Inconclusive,
|
||||
/// Backwards proved the flow infeasible — finding confidence must drop.
|
||||
/// Backwards proved the flow infeasible, finding confidence must drop.
|
||||
Infeasible,
|
||||
/// Budget exhausted before a verdict was reached.
|
||||
BudgetExhausted,
|
||||
|
|
@ -658,7 +658,7 @@ pub fn aggregate_verdict(flows: &[BackwardFlow]) -> FindingVerdict {
|
|||
}
|
||||
|
||||
/// Apply a verdict as a note on a [`Finding`]. No-ops when the verdict is
|
||||
/// [`FindingVerdict::Inconclusive`] — the forward finding retains its
|
||||
/// [`FindingVerdict::Inconclusive`], the forward finding retains its
|
||||
/// original metadata.
|
||||
pub fn annotate_finding(finding: &mut Finding, verdict: FindingVerdict) {
|
||||
// `Finding` does not own an Evidence struct directly (that lives on
|
||||
|
|
@ -1079,6 +1079,7 @@ mod tests {
|
|||
path_hash: 0,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: smallvec::SmallVec::new(),
|
||||
effective_sink_caps: crate::labels::Cap::empty(),
|
||||
};
|
||||
annotate_finding(&mut f, FindingVerdict::Confirmed);
|
||||
let sv = f.symbolic.as_ref().expect("symbolic verdict created");
|
||||
|
|
@ -1116,6 +1117,7 @@ mod tests {
|
|||
path_hash: 0,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: smallvec::SmallVec::new(),
|
||||
effective_sink_caps: crate::labels::Cap::empty(),
|
||||
};
|
||||
annotate_finding(&mut f, FindingVerdict::Inconclusive);
|
||||
assert!(f.symbolic.is_none());
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ pub struct VarTaint {
|
|||
pub uses_summary: bool,
|
||||
}
|
||||
|
||||
/// A single taint origin — the node and classification of where taint came from.
|
||||
/// A single taint origin, the node and classification of where taint came from.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct TaintOrigin {
|
||||
pub node: NodeIndex,
|
||||
|
|
@ -30,7 +30,7 @@ pub struct TaintOrigin {
|
|||
/// # Capacity limit
|
||||
///
|
||||
/// `SmallBitSet` is a fixed-size 64-slot bitset backed by a single `u64`.
|
||||
/// Inserting a `SymbolId` with ordinal ≥ 64 is a no-op — the bit is silently
|
||||
/// Inserting a `SymbolId` with ordinal ≥ 64 is a no-op, the bit is silently
|
||||
/// dropped. This is a deliberate precision-over-completeness trade: the
|
||||
/// bitset underpins predicate / validation tracking in the SSA taint engine,
|
||||
/// and functions with more than 64 distinct predicate-relevant variables are
|
||||
|
|
|
|||
128
src/taint/mod.rs
128
src/taint/mod.rs
|
|
@ -1,4 +1,5 @@
|
|||
#![allow(clippy::collapsible_if, clippy::too_many_arguments)]
|
||||
#![doc = include_str!(concat!(env!("OUT_DIR"), "/taint.md"))]
|
||||
|
||||
pub mod backwards;
|
||||
pub mod domain;
|
||||
|
|
@ -84,7 +85,7 @@ fn js_ts_pass2_cap() -> usize {
|
|||
//
|
||||
// Active only when the slot is `Some`. Production code path leaves it
|
||||
// `None`, making instrumentation cost a single thread-local borrow + a
|
||||
// `match Option::None` per measured chunk — sub-nanosecond.
|
||||
// `match Option::None` per measured chunk, sub-nanosecond.
|
||||
thread_local! {
|
||||
static PERF_LOWER_TIMINGS: std::cell::Cell<Option<[u128; 7]>> =
|
||||
const { std::cell::Cell::new(None) };
|
||||
|
|
@ -112,10 +113,10 @@ fn perf_lower_record(slot: usize, micros: u128) {
|
|||
|
||||
/// Test-only override for the Gauss-Seidel toggle. Values:
|
||||
///
|
||||
/// * `0` — respect `NYX_JS_GAUSS_SEIDEL` env var (default production
|
||||
/// * `0`, respect `NYX_JS_GAUSS_SEIDEL` env var (default production
|
||||
/// behaviour).
|
||||
/// * `1` — force Jacobi (env ignored).
|
||||
/// * `2` — force Gauss-Seidel (env ignored).
|
||||
/// * `1`, force Jacobi (env ignored).
|
||||
/// * `2`, force Gauss-Seidel (env ignored).
|
||||
///
|
||||
/// Used exclusively by integration tests that need to assert both
|
||||
/// variants produce equal findings without per-test process isolation.
|
||||
|
|
@ -209,7 +210,7 @@ pub struct Finding {
|
|||
/// The kind of source that originated the taint.
|
||||
pub source_kind: SourceKind,
|
||||
/// Whether all tainted sink variables are guarded by a validation
|
||||
/// predicate on this path (metadata only — does not change severity).
|
||||
/// predicate on this path (metadata only, does not change severity).
|
||||
pub path_validated: bool,
|
||||
/// The kind of validation guard protecting this path, if any.
|
||||
pub guard_kind: Option<PredicateKind>,
|
||||
|
|
@ -233,7 +234,7 @@ pub struct Finding {
|
|||
/// sink was resolved via a function summary carrying a
|
||||
/// [`crate::summary::SinkSite`] with concrete coordinates for primary
|
||||
/// sink-location attribution. `None` for:
|
||||
/// * intra-procedural / label-based sinks — the caller's `cfg[sink]`
|
||||
/// * intra-procedural / label-based sinks, the caller's `cfg[sink]`
|
||||
/// span already names the dangerous instruction;
|
||||
/// * summary-resolved sinks whose `SinkSite` was cap-only (no tree or
|
||||
/// bytes context at extraction time).
|
||||
|
|
@ -245,7 +246,7 @@ pub struct Finding {
|
|||
/// the scan root is the file itself (every namespace normalizes to
|
||||
/// `""`); consumers resolve empty `file_rel` against the file under
|
||||
/// analysis. Enforced at `ssa_events_to_findings` by a
|
||||
/// `debug_assert!` — upstream filters drop cap-only sites before
|
||||
/// `debug_assert!`, upstream filters drop cap-only sites before
|
||||
/// they reach this field.
|
||||
///
|
||||
/// Deliberately independent of `uses_summary`: that flag tracks whether
|
||||
|
|
@ -255,13 +256,13 @@ pub struct Finding {
|
|||
/// `primary_location`.
|
||||
pub primary_location: Option<SinkLocation>,
|
||||
/// Engine provenance notes recorded during the analysis that produced
|
||||
/// this finding. Populated when an internal budget/cap was hit — see
|
||||
/// this finding. Populated when an internal budget/cap was hit, see
|
||||
/// [`crate::engine_notes::EngineNote`]. Empty for the typical
|
||||
/// under-budget finding.
|
||||
pub engine_notes: SmallVec<[EngineNote; 2]>,
|
||||
/// Stable hash of the intermediate-variable sequence between `source`
|
||||
/// and `sink`. Used to keep distinct paths through different
|
||||
/// variables as separate findings during deduplication — two
|
||||
/// variables as separate findings during deduplication, two
|
||||
/// `(body_id, sink, source)` siblings with different `path_hash`
|
||||
/// values represent flows along different data paths and are
|
||||
/// preserved as alternatives rather than collapsed.
|
||||
|
|
@ -289,6 +290,13 @@ pub struct Finding {
|
|||
/// formatters can present them as "this flow … and N alternative
|
||||
/// path(s)" rather than silently dropping one.
|
||||
pub alternative_finding_ids: SmallVec<[String; 2]>,
|
||||
/// Sink-cap mask that this specific finding fired against. Carries the
|
||||
/// per-event `sink_caps` from the multi-gate dispatch (e.g.
|
||||
/// `Cap::SSRF` for a URL-flow finding on `fetch`, `Cap::DATA_EXFIL`
|
||||
/// for a body-flow finding on the same call). Used by `ast.rs` to
|
||||
/// route the finding to a cap-specific rule id rather than the
|
||||
/// generic `taint-unsanitised-flow` bucket.
|
||||
pub effective_sink_caps: crate::labels::Cap,
|
||||
}
|
||||
|
||||
impl Finding {
|
||||
|
|
@ -425,7 +433,7 @@ pub(crate) fn analyse_file_with_lowered(
|
|||
|
||||
// 3. Unified multi-body analysis with lexical containment propagation.
|
||||
//
|
||||
// `max_iterations` is the safety cap, not an expected depth — the
|
||||
// `max_iterations` is the safety cap, not an expected depth, the
|
||||
// pass-2 loop breaks on seed equality (monotone lattice, finite
|
||||
// height) and only rides the cap when convergence legitimately
|
||||
// needs more rounds than the cap allows. See
|
||||
|
|
@ -481,7 +489,7 @@ pub(crate) fn analyse_file_with_lowered(
|
|||
// dedup_by_key(|f| (body_id, sink, source));
|
||||
//
|
||||
// which silently collapsed an *unguarded* flow reaching the same
|
||||
// `(sink, source)` as a guarded flow — the `!path_validated` sort
|
||||
// `(sink, source)` as a guarded flow, the `!path_validated` sort
|
||||
// ordered `path_validated == true` first, so the exploitable
|
||||
// branch was the one that got dropped.
|
||||
//
|
||||
|
|
@ -541,7 +549,7 @@ fn make_finding_id(f: &Finding) -> String {
|
|||
/// Cross-link findings that share `(body_id, sink, source)` but differ
|
||||
/// on `path_validated` or `path_hash`. After this call each such
|
||||
/// finding's `alternative_finding_ids` lists every sibling's
|
||||
/// [`Finding::finding_id`] — so a guarded flow links to the unguarded
|
||||
/// [`Finding::finding_id`], so a guarded flow links to the unguarded
|
||||
/// sibling and vice versa. Isolated findings (no sibling) get an
|
||||
/// empty list.
|
||||
fn link_alternative_paths(findings: &mut [Finding]) {
|
||||
|
|
@ -576,7 +584,7 @@ fn link_alternative_paths(findings: &mut [Finding]) {
|
|||
/// Compute containment-topological order: parent bodies before children.
|
||||
///
|
||||
/// Uses BFS from roots (bodies with no parent), ensuring a body is always
|
||||
/// processed after its parent — required for lexical seed propagation.
|
||||
/// processed after its parent, required for lexical seed propagation.
|
||||
/// Returns indices into `file_cfg.bodies` in processing order.
|
||||
fn containment_order(bodies: &[BodyCfg]) -> Vec<usize> {
|
||||
let mut children: HashMap<BodyId, Vec<usize>> = HashMap::new();
|
||||
|
|
@ -637,7 +645,7 @@ fn analyse_body_with_seed(
|
|||
// Per-body graphs contain only the body's own nodes.
|
||||
// For non-toplevel bodies, use lower_to_ssa_with_params with scope to
|
||||
// create SsaOp::Param ops for external/captured variables and formal
|
||||
// parameters — required for global_seed to inject taint from the parent.
|
||||
// parameters, required for global_seed to inject taint from the parent.
|
||||
// Top-level bodies use lower_to_ssa with scope_all=true (no Param ops).
|
||||
let is_toplevel = body.meta.parent_body_id.is_none();
|
||||
// JS/TS function bodies always use scoped lowering to create Param ops
|
||||
|
|
@ -708,12 +716,9 @@ fn analyse_body_with_seed(
|
|||
} else {
|
||||
Some(static_map)
|
||||
};
|
||||
// Pointer-Phase 3 / W1+W2+W3: per-body field-sensitive points-to
|
||||
// facts. Computed only when `NYX_POINTER_ANALYSIS=1`; the
|
||||
// per-body `analyse_body` cost is amortised across the three
|
||||
// hooks (W1 field-write read-back, W2 container ELEM cells,
|
||||
// W3 cross-call resolver). Strict-additive: `None` keeps
|
||||
// pointer-disabled behaviour bit-identical.
|
||||
// Per-body field-sensitive points-to facts. Cost is
|
||||
// amortised across field-write read-back, container ELEM
|
||||
// cells, and the cross-call resolver.
|
||||
let pointer_facts = if crate::pointer::is_enabled() {
|
||||
Some(crate::pointer::analyse_body(&ssa_body, body.meta.id))
|
||||
} else {
|
||||
|
|
@ -836,7 +841,7 @@ fn analyse_body_with_seed(
|
|||
Err(e) => {
|
||||
// SSA lowering produced no analyzable body. We still surface
|
||||
// the event so downstream tooling can tell "we tried and gave
|
||||
// up" from "we ran clean" — a TRACE-level log records the
|
||||
// up" from "we ran clean", a TRACE-level log records the
|
||||
// reason (no synthetic Finding is manufactured because a
|
||||
// diag pointing at no source location would be misleading).
|
||||
tracing::trace!(
|
||||
|
|
@ -948,7 +953,7 @@ fn analyse_multi_body(
|
|||
let top_cfg = &top.graph;
|
||||
|
||||
// Collect top-level binding keys for seed filtering. Always
|
||||
// keyed under `BodyId(0)` — `filter_seed_to_toplevel` matches
|
||||
// keyed under `BodyId(0)`, `filter_seed_to_toplevel` matches
|
||||
// by name and re-keys every surviving entry to `BodyId(0)`
|
||||
// anyway, so the body_id on the probe keys is informational.
|
||||
let toplevel_keys: HashSet<ssa_transfer::BindingKey> = {
|
||||
|
|
@ -969,7 +974,7 @@ fn analyse_multi_body(
|
|||
// re-analysis when a name it reads via Param or via the
|
||||
// global_seed ancestor-lookup path has actually changed in
|
||||
// the combined seed. `reads` is a superset of the body's
|
||||
// top-level dependencies — we err on the side of over-running
|
||||
// top-level dependencies, we err on the side of over-running
|
||||
// (false dirty) rather than missing a dependency.
|
||||
let body_reads: HashMap<BodyId, HashSet<String>> = {
|
||||
let mut m: HashMap<BodyId, HashSet<String>> = HashMap::new();
|
||||
|
|
@ -1060,7 +1065,7 @@ fn analyse_multi_body(
|
|||
|
||||
// Re-run non-toplevel bodies with updated seed.
|
||||
body_exit_states.insert(BodyId(0), current_seed.clone());
|
||||
// Phase-C: Gauss-Seidel variant — as each body is
|
||||
// Phase-C: Gauss-Seidel variant, as each body is
|
||||
// re-analysed, merge its new exit into `current_seed`
|
||||
// immediately so subsequent bodies in the same round see
|
||||
// the fresh value. Order matters here; we pin to
|
||||
|
|
@ -1137,7 +1142,7 @@ fn analyse_multi_body(
|
|||
|
||||
// Record observability counter. `iters_used == 0` covers the
|
||||
// non-JS/TS path (`max_iterations == 1`) and the JS/TS case where
|
||||
// the convergence loop did not enter — report `1` so the counter
|
||||
// the convergence loop did not enter, report `1` so the counter
|
||||
// always reflects "at least the lexical-containment pass ran".
|
||||
let reported_iters = if iters_used == 0 { 1 } else { iters_used };
|
||||
LAST_JS_TS_PASS2_ITERATIONS.store(reported_iters, Ordering::Relaxed);
|
||||
|
|
@ -1287,7 +1292,7 @@ fn lookup_formal_params(local_summaries: &FuncSummaries, func_name: &str) -> Vec
|
|||
/// When exactly one `(name, arity)`-matching entry exists we use its full
|
||||
/// identity (container / disambig / kind preserved). When zero or multiple
|
||||
/// match we fall back to a free-function key so the caller still has a
|
||||
/// well-formed key — this can only happen in legacy discovery paths that
|
||||
/// well-formed key, this can only happen in legacy discovery paths that
|
||||
/// cannot see through same-name siblings, and those paths were already
|
||||
/// collision-prone before this refactor. New intra-file analysis code
|
||||
/// should prefer [`BodyMeta::func_key`].
|
||||
|
|
@ -1300,7 +1305,7 @@ fn lookup_canonical_func_key(
|
|||
) -> FuncKey {
|
||||
// `local_summaries` is file-local, so every entry's namespace agrees with
|
||||
// whatever `build_cfg` wrote (raw file path). We match by lang + name +
|
||||
// arity and fall back to name-only — the caller's `namespace` argument is
|
||||
// arity and fall back to name-only, the caller's `namespace` argument is
|
||||
// only used when we have to synthesise a key as a last resort.
|
||||
let mut matches = local_summaries
|
||||
.keys()
|
||||
|
|
@ -1372,7 +1377,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
.count()
|
||||
};
|
||||
|
||||
// Zero-param helpers are normally elided — a fixture with no
|
||||
// Zero-param helpers are normally elided, a fixture with no
|
||||
// parameters cannot carry per-parameter taint transforms. But
|
||||
// zero-arg factories (`function makeBag() { return []; }`) do
|
||||
// have one observable cross-file effect: the return is a fresh
|
||||
|
|
@ -1409,7 +1414,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
// must survive this filter so summary application at cross-file
|
||||
// call sites can replay the alias edges. Zero-param factories
|
||||
// are kept via the `returns_fresh_alloc` leg of
|
||||
// `points_to.is_empty()` — `is_empty()` returns false when the
|
||||
// `points_to.is_empty()`, `is_empty()` returns false when the
|
||||
// fresh-alloc flag is set.
|
||||
if !summary.param_to_return.is_empty()
|
||||
|| !summary.param_to_sink.is_empty()
|
||||
|
|
@ -1436,7 +1441,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
}
|
||||
|
||||
/// Lower all function bodies from `FileCfg` to produce SSA summaries + cached
|
||||
/// bodies. Each body's own graph is used directly — no scope filtering needed.
|
||||
/// bodies. Each body's own graph is used directly, no scope filtering needed.
|
||||
///
|
||||
/// Both returned maps are keyed by each body's canonical [`FuncKey`] (carried
|
||||
/// on [`crate::cfg::BodyMeta::func_key`]). This is the most collision-
|
||||
|
|
@ -1503,7 +1508,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
// `build_cfg` wrote. The caller passes `namespace` already normalized
|
||||
// against `scan_root`, which is what FuncSummary keys use on the
|
||||
// cross-file side (`FuncSummary::func_key`). Overriding the namespace
|
||||
// here keeps both sides of `GlobalSummaries` agreement — otherwise
|
||||
// here keeps both sides of `GlobalSummaries` agreement, otherwise
|
||||
// `resolve_callee` resolves to the normalized FuncSummary key and
|
||||
// misses the raw-path SSA entry.
|
||||
let mut key = body.meta.func_key.clone().unwrap_or_else(|| {
|
||||
|
|
@ -1542,7 +1547,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
|
||||
// Always insert the summary, even when all fields are empty/default.
|
||||
// An empty summary tells resolve_callee "this function exists and has
|
||||
// no taint effects" — preventing fallthrough to the less precise old
|
||||
// no taint effects", preventing fallthrough to the less precise old
|
||||
// FuncSummary which may report false source_caps from internal sources.
|
||||
// For zero-param functions we only insert when the summary carries
|
||||
// the fresh-container signal (the only observable effect worth
|
||||
|
|
@ -1563,34 +1568,23 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
perf_lower_record(2, _t_opt.elapsed().as_micros());
|
||||
|
||||
let _t_typed = std::time::Instant::now();
|
||||
// Phase 2 (typed call-graph devirtualisation): walk every SSA
|
||||
// method call in this body, look up the receiver SSA value's
|
||||
// [`crate::ssa::type_facts::TypeKind`] in the just-computed
|
||||
// `opt.type_facts`, and record `(call_ordinal, container_name)`
|
||||
// on the matching summary so Phase 3 in `build_call_graph` can
|
||||
// narrow the indirect-method-call edge to the receiver-typed
|
||||
// container. Free-function calls (`receiver: None`) and
|
||||
// unknown receiver types are silently skipped — the bare-name
|
||||
// resolution path applies unchanged in that case.
|
||||
// For every SSA method call, look up the receiver's TypeKind
|
||||
// and record `(call_ordinal, container_name)` so devirtualisation
|
||||
// in `build_call_graph` can narrow the edge to the receiver-typed
|
||||
// container. Free-function calls and unknown types fall back to
|
||||
// bare-name resolution.
|
||||
let typed_receivers = collect_typed_call_receivers(&func_ssa, &body.graph, &opt.type_facts);
|
||||
if !typed_receivers.is_empty() {
|
||||
// The summary may not have been inserted above (zero-param,
|
||||
// no-fresh-alloc bodies are skipped). Force-insert in that
|
||||
// case so the receiver-type info reaches Phase 3 — without
|
||||
// it, the cross-file devirtualisation signal would be lost
|
||||
// for any method invoked inside a parameterless caller.
|
||||
// Zero-param/no-fresh-alloc bodies are skipped above;
|
||||
// force-insert so receiver-type info still reaches
|
||||
// build_call_graph.
|
||||
let entry = summaries.entry(key.clone()).or_default();
|
||||
entry.typed_call_receivers = typed_receivers;
|
||||
}
|
||||
|
||||
// Pointer-Phase 5 / W3: populate `field_points_to` from the
|
||||
// body's pointer facts when the analysis is enabled. Strict
|
||||
// opt-in via `NYX_POINTER_ANALYSIS=1`; off-by-default keeps
|
||||
// bit-for-bit identity with the pre-W3 behaviour.
|
||||
//
|
||||
// `extract_field_points_to` covers both reads (via
|
||||
// `SsaOp::FieldProj` walks) and writes (via the W1
|
||||
// `field_writes` side-table on the body) in a single pass.
|
||||
// Populate `field_points_to` from the body's pointer facts.
|
||||
// `extract_field_points_to` covers both reads (FieldProj walks)
|
||||
// and writes (`field_writes` side-table) in one pass.
|
||||
if crate::pointer::is_enabled() {
|
||||
let facts = crate::pointer::analyse_body(&func_ssa, body.meta.id);
|
||||
let fpt = crate::pointer::extract_field_points_to(&func_ssa, &facts);
|
||||
|
|
@ -1621,7 +1615,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
// Lift child-body sinks into the parent's `param_to_sink` for
|
||||
// every parent body with lexically contained children. This
|
||||
// handles the direct-wrapper case
|
||||
// `f(x) { return new Promise((res, rej) => sink(x)) }` — the
|
||||
// `f(x) { return new Promise((res, rej) => sink(x)) }`, the
|
||||
// executor's gated http.get sink becomes visible to callers of
|
||||
// `f` via `f.summary.param_to_sink`.
|
||||
//
|
||||
|
|
@ -1635,8 +1629,8 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
// propagation at summary-extraction time so cross-call
|
||||
// resolution sees the sink at every caller of `f`.
|
||||
//
|
||||
// Strict-additive: only ADDs `param_to_sink` entries — never
|
||||
// removes or modifies existing data — so it cannot regress
|
||||
// Strict-additive: only ADDs `param_to_sink` entries, never
|
||||
// removes or modifies existing data, so it cannot regress
|
||||
// detection. Bounded: each parent-param probe runs each child
|
||||
// body's analysis exactly once.
|
||||
let _t_aug = std::time::Instant::now();
|
||||
|
|
@ -1665,7 +1659,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
// OR-merge: only adds `param_to_sink` / `param_to_sink_param`
|
||||
// entries to existing summaries. Existing entries (return
|
||||
// transforms, source caps, augment-populated sinks, etc.) are
|
||||
// preserved. Strict-additive — cannot regress detection.
|
||||
// preserved. Strict-additive, cannot regress detection.
|
||||
let _t_rerun = std::time::Instant::now();
|
||||
rerun_extraction_with_augmented_summaries(
|
||||
file_cfg,
|
||||
|
|
@ -1919,7 +1913,7 @@ fn augment_summaries_with_child_sinks(
|
|||
let parent_interner = crate::state::symbol::SymbolInterner::from_cfg(parent_cfg);
|
||||
|
||||
// Collect (formal_param_idx, var_name, ssa_value) for the parent's
|
||||
// formal params — mirrors `extract_ssa_func_summary`'s param scan.
|
||||
// formal params, mirrors `extract_ssa_func_summary`'s param scan.
|
||||
let mut parent_param_info: Vec<(usize, String)> = Vec::new();
|
||||
for block in &parent_ssa.blocks {
|
||||
for inst in block.phis.iter().chain(block.body.iter()) {
|
||||
|
|
@ -2055,7 +2049,7 @@ fn augment_summaries_with_child_sinks(
|
|||
}
|
||||
|
||||
// Aggregate sink caps across all child events into one
|
||||
// entry per parent param (cap-only SinkSite — the
|
||||
// entry per parent param (cap-only SinkSite, the
|
||||
// exact location lives in the child body's CFG and is
|
||||
// not directly addressable from the parent's summary).
|
||||
let mut union_caps = Cap::empty();
|
||||
|
|
@ -2088,7 +2082,7 @@ fn augment_summaries_with_child_sinks(
|
|||
// engine's primary sink-site picker uses
|
||||
// `param_to_sink_param` for arg-position filtering)
|
||||
// sees this captured-flow sink. Position 0 is a
|
||||
// best-effort placeholder — the actual filtering at
|
||||
// best-effort placeholder, the actual filtering at
|
||||
// the caller is by SSRF cap, not arg position, when
|
||||
// the wrapper is itself non-gated.
|
||||
if !entry
|
||||
|
|
@ -2109,7 +2103,7 @@ fn augment_summaries_with_child_sinks(
|
|||
/// non-empty [`crate::ssa::type_facts::TypeKind::container_name`].
|
||||
///
|
||||
/// Free-function calls (`receiver: None`) and unknown receiver types
|
||||
/// are skipped — the cross-file call-graph builder will fall back to
|
||||
/// are skipped, the cross-file call-graph builder will fall back to
|
||||
/// today's name-only resolution for those, preserving the
|
||||
/// "subset of today's targets, never a superset" invariant from
|
||||
/// `docs/typed-call-graph-prompt.md`.
|
||||
|
|
@ -2135,13 +2129,13 @@ fn collect_typed_call_receivers(
|
|||
continue;
|
||||
};
|
||||
let Some(receiver_val) = receiver else {
|
||||
continue; // free-function call — no devirtualisation possible
|
||||
continue; // free-function call, no devirtualisation possible
|
||||
};
|
||||
let Some(kind) = type_facts.get_type(*receiver_val) else {
|
||||
continue; // type unknown — fall back to name-only resolution
|
||||
continue; // type unknown, fall back to name-only resolution
|
||||
};
|
||||
let Some(container) = kind.container_name() else {
|
||||
continue; // scalar/unknown type — no useful container
|
||||
continue; // scalar/unknown type, no useful container
|
||||
};
|
||||
let Some(node_info) = cfg.node_weight(inst.cfg_node) else {
|
||||
continue;
|
||||
|
|
@ -2150,7 +2144,7 @@ fn collect_typed_call_receivers(
|
|||
// A single SSA call instruction maps 1:1 with a CFG call
|
||||
// node, so each ordinal should appear at most once. The
|
||||
// dedup guard exists in case lowering ever introduces a
|
||||
// second SSA Call sharing a cfg_node — first wins.
|
||||
// second SSA Call sharing a cfg_node, first wins.
|
||||
if !seen.insert(ordinal) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -2211,7 +2205,7 @@ pub(crate) fn build_eligible_bodies(
|
|||
continue;
|
||||
}
|
||||
// Populate node metadata against the per-body graph whose NodeIndex
|
||||
// space the SSA was produced on — otherwise cross-file replay can't
|
||||
// space the SSA was produced on, otherwise cross-file replay can't
|
||||
// find the original CFG nodes.
|
||||
//
|
||||
// `key.namespace` was already normalised against `scan_root` in
|
||||
|
|
|
|||
|
|
@ -35,13 +35,13 @@ pub enum PredicateKind {
|
|||
/// Commonly paired with [`ShellMetaValidated`] in OR-chain rejection
|
||||
/// idioms (`if x.len() > MAX || x.contains(";") { reject }`). Counts as
|
||||
/// a dominator guard for `cfg-unguarded-sink` purposes, but intentionally
|
||||
/// does **not** mark variables as validated — the rejection direction is
|
||||
/// does **not** mark variables as validated, the rejection direction is
|
||||
/// ambiguous from the condition alone (a `.len() > 5 { sink(x) }`
|
||||
/// gate is a precondition, not a rejection).
|
||||
BoundedLength,
|
||||
/// Comparison operators: `x == 5`, `x > threshold`
|
||||
Comparison,
|
||||
/// Generic boolean test — cannot classify further.
|
||||
/// Generic boolean test, cannot classify further.
|
||||
Unknown,
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ pub enum PredicateKind {
|
|||
///
|
||||
/// Presence of any of these in user input is sufficient to enable shell
|
||||
/// injection, so rejecting input that contains them is a real sanitizer.
|
||||
/// `"foo"` or other non-metachar needles don't qualify — a rejection of
|
||||
/// `"foo"` or other non-metachar needles don't qualify, a rejection of
|
||||
/// those is business logic, not security.
|
||||
const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r", "\0"];
|
||||
|
||||
|
|
@ -65,7 +65,7 @@ const SHELL_METACHARS: &[&str] = &[";", "|", "&", "`", "$", ">", "<", "\n", "\r"
|
|||
/// character class containing only metacharacters.
|
||||
///
|
||||
/// Returns `false` if the needle is a non-metachar literal or cannot be
|
||||
/// extracted — falls through to broader classification.
|
||||
/// extracted, falls through to broader classification.
|
||||
fn is_shell_metachar_rejection(text: &str) -> bool {
|
||||
// Method-call form: `.contains(…)` / `.includes(…)` / `.include?(…)`
|
||||
for method in [".contains(", ".includes(", ".include?("] {
|
||||
|
|
@ -134,7 +134,7 @@ fn extract_first_string_arg(after_open: &str) -> Option<String> {
|
|||
}
|
||||
|
||||
/// For Python `"<METACHAR>" in x` (needle on the left side of ` in `), return
|
||||
/// the needle. Returns `None` for `x in ALLOWED` (identifier on the left) —
|
||||
/// the needle. Returns `None` for `x in ALLOWED` (identifier on the left) ,
|
||||
/// that is an allowlist check, not a rejection.
|
||||
fn extract_python_in_needle(text: &str) -> Option<String> {
|
||||
let pos = text.find(" in ")?;
|
||||
|
|
@ -155,7 +155,7 @@ fn extract_python_in_needle(text: &str) -> Option<String> {
|
|||
|
||||
/// Detect regex character classes that contain only shell metacharacters:
|
||||
/// `[;|&]`, `[;&`$]`, etc. Missing: escape-class metacharacters inside the
|
||||
/// class (e.g. `[\n]`) — conservative, returns false there.
|
||||
/// class (e.g. `[\n]`), conservative, returns false there.
|
||||
fn is_metachar_regex_class(text: &str) -> bool {
|
||||
// Find `[` followed by content and `]`, anywhere in the text.
|
||||
let mut rest = text;
|
||||
|
|
@ -180,7 +180,7 @@ fn is_metachar_regex_class(text: &str) -> bool {
|
|||
|
||||
/// Check whether `text` looks like a bounded-length rejection:
|
||||
/// `x.len() > N`, `x.len() < N`, `x.length >= N`, etc. where `N` is an
|
||||
/// integer literal >= 2. Excludes `> 0` / `>= 1` / `< 1` — those are
|
||||
/// integer literal >= 2. Excludes `> 0` / `>= 1` / `< 1`, those are
|
||||
/// non-empty checks, which are not length-bound validations.
|
||||
fn is_bounded_length_check(lower: &str) -> bool {
|
||||
const PROBES: &[&str] = &[
|
||||
|
|
@ -290,7 +290,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
// Matched BEFORE AllowlistCheck so that `x.contains(";")` is recognized
|
||||
// as a rejection idiom rather than a membership test. Checked on the
|
||||
// raw (non-lowercased) text so metacharacter comparisons stay
|
||||
// case-accurate — `;` / `|` / `&` have no case.
|
||||
// case-accurate, `;` / `|` / `&` have no case.
|
||||
if is_shell_metachar_rejection(text) {
|
||||
return PredicateKind::ShellMetaValidated;
|
||||
}
|
||||
|
|
@ -409,7 +409,7 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
/// validator's effect is opaque: we can't tell which argument is being
|
||||
/// checked. Returning the original kind with `None` target would cause
|
||||
/// upstream code to over-validate (mark every `condition_var` as validated).
|
||||
/// Instead, we fall back to `PredicateKind::Unknown` — safer to assume the
|
||||
/// Instead, we fall back to `PredicateKind::Unknown`, safer to assume the
|
||||
/// validator did nothing than to assume it validated every variable in the
|
||||
/// condition. Single-argument calls retain `(kind, None)` so downstream code
|
||||
/// can still use the predicate-summary bit tracking.
|
||||
|
|
@ -442,7 +442,7 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
|
|||
(kind, target)
|
||||
}
|
||||
PredicateKind::Comparison => {
|
||||
// `x === '/login'`, `x == 5`, `null != obj` — when exactly one
|
||||
// `x === '/login'`, `x == 5`, `null != obj`, when exactly one
|
||||
// side is a literal, extract the identifier side as the target.
|
||||
// Downstream `apply_branch_predicates` uses this to mark the
|
||||
// variable as `validated_may` on the true (equal) branch.
|
||||
|
|
@ -464,7 +464,7 @@ pub fn classify_condition_with_target(text: &str) -> (PredicateKind, Option<Stri
|
|||
/// - `'a' == 'b'` → `None` (both sides are literals)
|
||||
/// - `obj.field == 3` → `None` (not a bare identifier)
|
||||
///
|
||||
/// Best-effort text analysis — kept conservative to avoid false validation.
|
||||
/// Best-effort text analysis, kept conservative to avoid false validation.
|
||||
fn extract_comparison_target(text: &str) -> Option<String> {
|
||||
let trimmed = text.trim();
|
||||
|
||||
|
|
@ -537,7 +537,7 @@ fn is_comparison_literal(s: &str) -> bool {
|
|||
/// `Some(0)` for a call with empty argument list. Respects paren/bracket/brace
|
||||
/// nesting so `f(g(a, b), c)` counts as 2 top-level args.
|
||||
///
|
||||
/// Best-effort — operates on source text, not an AST. Used by
|
||||
/// Best-effort, operates on source text, not an AST. Used by
|
||||
/// `classify_condition_with_target` to distinguish single-arg vs multi-arg
|
||||
/// validator calls when target extraction fails.
|
||||
fn count_call_args(text: &str) -> Option<usize> {
|
||||
|
|
@ -592,7 +592,7 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
// Function call pattern: `func(x, ...)` — extract first argument
|
||||
// Function call pattern: `func(x, ...)`, extract first argument
|
||||
// Strip closing paren if present
|
||||
let args_inner = args_part.trim_end().strip_suffix(')').unwrap_or(args_part);
|
||||
// Take text up to first comma (first argument)
|
||||
|
|
@ -653,7 +653,7 @@ fn extract_allowlist_target(text: &str) -> Option<String> {
|
|||
|
||||
// Python `in` operator: `cmd in ALLOWED` / `cmd not in ALLOWED`
|
||||
if lower.contains(" in ") {
|
||||
// Find the leftmost ` in ` — everything before it is the target expression
|
||||
// Find the leftmost ` in `, everything before it is the target expression
|
||||
// Handle `not in` by looking for ` not in ` first
|
||||
let target_part = if let Some(pos) = lower.find(" not in ") {
|
||||
&trimmed[..pos]
|
||||
|
|
@ -857,7 +857,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn classify_validation_requires_paren() {
|
||||
// `x_valid == true` should NOT be ValidationCall — no `(` call syntax.
|
||||
// `x_valid == true` should NOT be ValidationCall, no `(` call syntax.
|
||||
assert_eq!(
|
||||
classify_condition("x_valid == true"),
|
||||
PredicateKind::Comparison
|
||||
|
|
@ -978,7 +978,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn target_multi_arg_fallback_opaque_expr_is_unknown() {
|
||||
// `validate(x + 1, y)` — first arg is an expression, not an identifier.
|
||||
// `validate(x + 1, y)`, first arg is an expression, not an identifier.
|
||||
// Target extraction fails. Multi-arg call, so fall back to Unknown
|
||||
// rather than letting upstream validate every condition var.
|
||||
let (kind, target) = classify_condition_with_target("validate(x + 1, y)");
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
//! Taint event emission and conversion to [`crate::taint::Finding`].
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`SsaTaintEvent`] — the raw event struct produced by the block-level
|
||||
//! * [`SsaTaintEvent`], the raw event struct produced by the block-level
|
||||
//! worklist each time a tainted value reaches a sink.
|
||||
//! * [`ssa_events_to_findings`] — event → `Finding` conversion with the
|
||||
//! * [`ssa_events_to_findings`], event → `Finding` conversion with the
|
||||
//! `primary_location` invariant and dedup.
|
||||
//! * Flow-path reconstruction helpers ([`reconstruct_flow_path`] and
|
||||
//! operand pickers).
|
||||
|
|
@ -38,14 +38,14 @@ pub struct SsaTaintEvent {
|
|||
/// `sink_caps`. When multiple [`SinkSite`]s for the same `(param_idx,
|
||||
/// cap mask)` match, the emission site produces one event per
|
||||
/// [`SinkSite`] so each downstream [`crate::taint::Finding`] carries a
|
||||
/// single primary attribution — the multi-primary case collapses to
|
||||
/// single primary attribution, the multi-primary case collapses to
|
||||
/// multiple single-primary events.
|
||||
///
|
||||
/// `None` for:
|
||||
/// * intra-procedural sinks (`uses_summary == false`), where the
|
||||
/// caller's sink span already names the dangerous instruction;
|
||||
/// * summary-resolved sinks whose callee summary carried only cap-only
|
||||
/// [`SinkSite`]s (no source coordinates — e.g. pass-2 transient
|
||||
/// [`SinkSite`]s (no source coordinates, e.g. pass-2 transient
|
||||
/// summaries or local `LocalFuncSummary`-only callees).
|
||||
pub primary_sink_site: Option<SinkSite>,
|
||||
}
|
||||
|
|
@ -79,7 +79,7 @@ pub(super) fn block_distance(ssa: &SsaBody, source_node: NodeIndex, sink_node: N
|
|||
}
|
||||
}
|
||||
}
|
||||
0 // unreachable or not connected — conservative default
|
||||
0 // unreachable or not connected, conservative default
|
||||
}
|
||||
|
||||
// ── Flow Path Reconstruction ─────────────────────────────────────────────
|
||||
|
|
@ -204,7 +204,7 @@ pub(super) fn reconstruct_flow_path(
|
|||
SsaOp::FieldProj { receiver, .. } => {
|
||||
// Treat field projection as a one-step assignment for
|
||||
// flow-step reconstruction: taint reaching `obj.f` came
|
||||
// from `obj`. Phase 4 will refine the witness rendering
|
||||
// from `obj`. the analysis may refine the witness rendering
|
||||
// to include the field name in the step.
|
||||
steps.push(FlowStepRaw {
|
||||
cfg_node: inst.cfg_node,
|
||||
|
|
@ -270,7 +270,7 @@ fn pick_tainted_operand_call(
|
|||
///
|
||||
/// Note: this invariant is intentionally independent of `uses_summary`.
|
||||
/// The taint-chain flag tracks summary-propagated *taint*, not summary-
|
||||
/// resolved *sinks* — a local source can reach a cross-file sink, so
|
||||
/// resolved *sinks*, a local source can reach a cross-file sink, so
|
||||
/// `primary_location.is_some()` does not imply `uses_summary == true`.
|
||||
pub fn ssa_events_to_findings(
|
||||
events: &[SsaTaintEvent],
|
||||
|
|
@ -329,7 +329,7 @@ pub fn ssa_events_to_findings(
|
|||
|
||||
// Data-integrity invariant: a populated primary_location must at least
|
||||
// carry resolved line coordinates. `file_rel` may legitimately be
|
||||
// empty — when the scan root is the caller file itself (single-file
|
||||
// empty, when the scan root is the caller file itself (single-file
|
||||
// scans), every namespace normalizes to `""` and the callee's site
|
||||
// inherits that empty path; consumers resolve it against the file
|
||||
// under analysis. Line==0 is the only filter-worthy invariant.
|
||||
|
|
@ -340,7 +340,7 @@ pub fn ssa_events_to_findings(
|
|||
|
||||
// Dedup key includes primary location so multi-site events that
|
||||
// share a single (source, sink) pair still produce distinct findings
|
||||
// — one per resolved callee-internal site.
|
||||
//, one per resolved callee-internal site.
|
||||
let loc_key = primary_location
|
||||
.as_ref()
|
||||
.map(|l| (l.file_rel.clone(), l.line, l.col));
|
||||
|
|
@ -374,6 +374,11 @@ pub fn ssa_events_to_findings(
|
|||
path_hash,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: smallvec::SmallVec::new(),
|
||||
// Per-event mask from the multi-gate dispatch, picks
|
||||
// exactly the cap that fired (e.g. `Cap::DATA_EXFIL`
|
||||
// for a `fetch` body-flow finding versus `Cap::SSRF`
|
||||
// for a URL-flow finding on the same call).
|
||||
effective_sink_caps: event.sink_caps & *caps,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,34 +1,10 @@
|
|||
//! Context-sensitive inline analysis — cache, body, and attribution types.
|
||||
//! Context-sensitive inline analysis, cache, body, and attribution types.
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`ArgTaintSig`] — compact per-arg cap signature used as a cache key.
|
||||
//! * [`InlineResult`] / [`CachedInlineShape`] / [`ReturnShape`] — the
|
||||
//! callsite-adapted and callsite-agnostic inline-analysis result types.
|
||||
//! * [`InlineCache`] — the shared cache map keyed by
|
||||
//! `(FuncKey, ArgTaintSig)`.
|
||||
//! * [`CrossFileNodeMeta`] / [`CalleeSsaBody`] — the serde-able bodies
|
||||
//! persisted to SQLite for cross-file context-sensitive analysis.
|
||||
//! * [`populate_node_meta`] / [`rebuild_body_graph`] — bookkeeping for
|
||||
//! cross-file body proxy CFGs.
|
||||
//!
|
||||
//! The implementation functions (`inline_analyse_callee`,
|
||||
//! `apply_cached_shape`, `extract_inline_return_taint`) remain in the
|
||||
//! parent `mod.rs` because they depend tightly on the block worklist, the
|
||||
//! `run_ssa_taint_full` entry point, and the callee-resolution pipeline.
|
||||
//!
|
||||
//! # Cache key scope and origin attribution
|
||||
//!
|
||||
//! The inline-analysis cache below ([`InlineCache`]) is keyed by
|
||||
//! `(FuncKey, ArgTaintSig)`, where [`ArgTaintSig`] encodes **per-arg
|
||||
//! capability bits only** — not the identity of the source
|
||||
//! [`crate::taint::domain::TaintOrigin`]s that produced those caps. The
|
||||
//! stored value ([`CachedInlineShape`]) captures **only the structural**
|
||||
//! shape of the callee's return taint: return caps, callee-internal
|
||||
//! origins (from `Source` ops inside the callee body), and per-parameter
|
||||
//! provenance flags that record which formal parameters contributed to
|
||||
//! the return. Caller-specific origin identity is *not* stored — it is
|
||||
//! re-attributed at cache-apply time from the current call site's
|
||||
//! argument taint.
|
||||
//! The cache ([`InlineCache`]) is keyed by `(FuncKey, ArgTaintSig)`,
|
||||
//! where [`ArgTaintSig`] is per-arg cap bits only (not origin identity).
|
||||
//! Stored values ([`CachedInlineShape`]) capture the structural shape of
|
||||
//! the callee's return taint; caller-specific origins are re-attributed
|
||||
//! at apply time.
|
||||
|
||||
use crate::labels::Cap;
|
||||
use crate::ssa::ir::{SsaBody, Terminator};
|
||||
|
|
@ -42,61 +18,30 @@ use std::collections::HashMap;
|
|||
/// Maximum SSA blocks in a callee body before skipping inline analysis.
|
||||
pub(super) const MAX_INLINE_BLOCKS: usize = 500;
|
||||
|
||||
/// Compact cache key: per-arg-position cap bits (sorted, non-empty only).
|
||||
///
|
||||
/// Two calls with identical `ArgTaintSig` produce identical inline results
|
||||
/// for soundness purposes (return caps, callee-internal sink activations).
|
||||
/// Origin identity is **not** part of the key — see the module-level note
|
||||
/// above on origin-attribution non-determinism.
|
||||
/// Compact cache key: per-arg-position cap bits (sorted, non-empty
|
||||
/// only). Origin identity is not part of the key.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub(crate) struct ArgTaintSig(pub(super) SmallVec<[(usize, u16); 4]>);
|
||||
|
||||
/// Call-site-adapted result of inline-analyzing a callee.
|
||||
///
|
||||
/// Constructed fresh per call site by `apply_cached_shape` from a stored
|
||||
/// [`CachedInlineShape`]; carries origins that point to the *current*
|
||||
/// caller's source chain, not to whichever caller first populated the
|
||||
/// cache entry.
|
||||
/// Call-site-adapted result of inline-analyzing a callee. Built fresh
|
||||
/// per call site so origins point to the current caller's chain.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct InlineResult {
|
||||
/// Taint on the return value after inline analysis.
|
||||
pub(super) return_taint: Option<VarTaint>,
|
||||
/// PathFact on the return value after inline analysis.
|
||||
///
|
||||
/// Non-top when the callee's body provably narrows the
|
||||
/// [`crate::abstract_interp::PathFact`] of the value it returns (for
|
||||
/// example, a `sanitize_path(s) -> Option<String>` helper that
|
||||
/// early-returns on `s.contains("..")` / `s.starts_with('/')`). At
|
||||
/// apply time the caller sets its call-result SSA value's PathFact to
|
||||
/// this narrowed fact, so downstream FILE_IO sinks see the sanitised
|
||||
/// axis regardless of whether a named label-rule exists for the
|
||||
/// helper. Top when the callee produces no narrowing — matches
|
||||
/// pre-PathFact behaviour exactly.
|
||||
/// PathFact on the return value. Non-top when the callee body
|
||||
/// provably narrows it (e.g. a `sanitize_path` early-returning on
|
||||
/// `s.contains("..")`).
|
||||
pub(super) return_path_fact: crate::abstract_interp::PathFact,
|
||||
/// Per-return-path decomposition of [`Self::return_path_fact`].
|
||||
///
|
||||
/// Non-empty when the callee has ≥2 distinct return blocks whose
|
||||
/// predicate gates differ. Match-arm-sensitive callers pick the
|
||||
/// entry whose `variant_inner_fact` matches the arm binding's
|
||||
/// variant; path-resolvable callers may refuse infeasible entries.
|
||||
/// Callers unable to distinguish paths still consult
|
||||
/// [`Self::return_path_fact`] (the join of all entries) and see
|
||||
/// pre-decomposition behaviour.
|
||||
/// Per-return-path decomposition of `return_path_fact`. Non-empty
|
||||
/// when the callee has ≥2 return blocks with different predicate
|
||||
/// gates.
|
||||
#[allow(dead_code)]
|
||||
pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
}
|
||||
|
||||
/// Structural (callsite-agnostic) summary of an inline-analyzed callee.
|
||||
///
|
||||
/// Stored in [`InlineCache`] in place of a fully-attributed `InlineResult`.
|
||||
/// Origin-identity information that depends on the caller's argument chain
|
||||
/// is *not* kept here; instead, [`ReturnShape::param_provenance`]
|
||||
/// records which callee parameter positions contributed seed taint to the
|
||||
/// return, and the actual caller origins are re-unioned in at apply time.
|
||||
///
|
||||
/// `None` means "this callee produced no return taint for the given
|
||||
/// argument shape". A cached `None` is still a meaningful result — it
|
||||
/// short-circuits re-analysis on subsequent calls with matching caps.
|
||||
/// Structural (callsite-agnostic) summary of an inline-analyzed
|
||||
/// callee. `None` means "no return taint for this arg shape", still
|
||||
/// meaningful, short-circuits subsequent calls with matching caps.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);
|
||||
|
||||
|
|
@ -107,7 +52,7 @@ pub(crate) struct CachedInlineShape(pub(super) Option<ReturnShape>);
|
|||
/// origins. See the module-level note above on origin attribution.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ReturnShape {
|
||||
/// Return value caps (cap bits only — structural).
|
||||
/// Return value caps (cap bits only, structural).
|
||||
pub(super) caps: Cap,
|
||||
/// Origins produced **inside the callee body** (e.g. `Source` op fired
|
||||
/// in the callee). `node` is set to a placeholder; at apply time the
|
||||
|
|
@ -115,31 +60,19 @@ pub(crate) struct ReturnShape {
|
|||
/// stable (from the callee CFG) and preserved as-is.
|
||||
pub(super) internal_origins: SmallVec<[TaintOrigin; 2]>,
|
||||
/// Bit i set = callee's `Param(i)` seed taint reached the return value.
|
||||
/// At apply time, caller's argument origins at matching positions are
|
||||
/// unioned into the applied `VarTaint`. Params beyond index 63 are
|
||||
/// dropped (matching `SmallBitSet` semantics); the capped case is rare
|
||||
/// and still yields cap-correct results.
|
||||
/// At apply time, caller arg origins at matching positions are
|
||||
/// unioned into the applied `VarTaint`. Params beyond 63 are
|
||||
/// dropped (matches `SmallBitSet`); rare and still cap-correct.
|
||||
pub(super) param_provenance: u64,
|
||||
/// Whether the receiver (`SelfParam`) seed taint flowed to the return.
|
||||
/// Whether the receiver (`SelfParam`) seed taint flowed to return.
|
||||
pub(super) receiver_provenance: bool,
|
||||
/// Whether the applied `VarTaint` should be tagged `uses_summary`.
|
||||
pub(super) uses_summary: bool,
|
||||
/// PathFact of the return value observed from the callee's exit
|
||||
/// abstract state. Cache-safe because the callee is inline-analysed
|
||||
/// with [`crate::abstract_interp::PathFact::top`] Param seeds — the
|
||||
/// resulting fact describes the callee's intrinsic narrowing (e.g.
|
||||
/// the `Some` arm of a `sanitize(..) -> Option<String>` body
|
||||
/// proves `dotdot = No`) and does not depend on caller-side
|
||||
/// narrowing of the argument's PathFact. Top when the callee does
|
||||
/// not narrow.
|
||||
/// PathFact of the return value, observed from the callee exit
|
||||
/// state under Top-seeded Params. Describes the callee's intrinsic
|
||||
/// narrowing.
|
||||
pub(super) return_path_fact: crate::abstract_interp::PathFact,
|
||||
/// Per-return-path [`PathFact`] decomposition of the return value.
|
||||
///
|
||||
/// Populated alongside [`Self::return_path_fact`] when the callee
|
||||
/// has ≥2 distinct return blocks with different predicate gates.
|
||||
/// Cache-safe for the same reason as `return_path_fact`: entries
|
||||
/// describe callee-intrinsic narrowing under Top-seeded Params.
|
||||
/// Empty when no per-path distinction was observed.
|
||||
/// Per-return-path decomposition of the return value. Populated
|
||||
/// when the callee has ≥2 return blocks with different predicates.
|
||||
pub(super) return_path_facts: SmallVec<[PathFactReturnEntry; 2]>,
|
||||
}
|
||||
|
||||
|
|
@ -151,50 +84,21 @@ impl CachedInlineShape {
|
|||
}
|
||||
}
|
||||
|
||||
/// Cache for context-sensitive inline analysis results.
|
||||
///
|
||||
/// Keyed by the callee's canonical [`FuncKey`] rather than a bare function
|
||||
/// name so that same-name definitions (e.g. two `process/1` methods on
|
||||
/// different classes in the same file) never share or overwrite each
|
||||
/// other's cache entries. Values are stored as [`CachedInlineShape`]; see
|
||||
/// the module-level note above for why origins are stripped from the
|
||||
/// cache value and re-attributed at apply time.
|
||||
/// Cache for context-sensitive inline analysis results, keyed by
|
||||
/// canonical [`FuncKey`] so same-name definitions in different scopes
|
||||
/// never collide.
|
||||
pub(crate) type InlineCache = HashMap<(FuncKey, ArgTaintSig), CachedInlineShape>;
|
||||
|
||||
/// Drop every entry from an inline cache, marking the start of a new
|
||||
/// convergence epoch.
|
||||
///
|
||||
/// Cross-file SCC fixed-point iteration runs pass 2 repeatedly until the
|
||||
/// merged summaries stop changing. Between iterations the callee-summary
|
||||
/// inputs to inline analysis may have changed, so results cached under a
|
||||
/// stale snapshot must not leak into the next iteration — otherwise the
|
||||
/// engine could converge to a non-fixed-point (reporting a taint result
|
||||
/// that would not reproduce on a fresh run of the same file order).
|
||||
///
|
||||
/// The per-file inline cache is already reconstructed fresh at the top of
|
||||
/// each [`crate::taint::analyse_file`] call, so in the current code this
|
||||
/// call is effectively a no-op plumbing hook. Keeping the method (instead
|
||||
/// of relying on ambient re-construction) makes the lifecycle explicit for
|
||||
/// any future refactor that moves the cache up into the SCC orchestrator.
|
||||
#[allow(dead_code)] // semantic hook; used by tests and future shared-cache refactor
|
||||
/// Drop every entry from the inline cache between SCC fixpoint
|
||||
/// iterations so stale results don't leak forward.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn inline_cache_clear_epoch(cache: &mut InlineCache) {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
/// Set-equal fingerprint of an inline cache, used by the SCC orchestrator
|
||||
/// to detect when cross-file inline analysis has reached a fixed point
|
||||
/// alongside summary convergence.
|
||||
///
|
||||
/// Returns a `HashMap` mapping each `(FuncKey, ArgTaintSig)` cache key to
|
||||
/// the return-value capability bits of its inline result. `HashMap`
|
||||
/// equality is set-equal (unordered), so two caches with the same entries
|
||||
/// compare equal regardless of insertion order.
|
||||
///
|
||||
/// Origins are intentionally omitted — they are non-deterministic across
|
||||
/// callers with identical caps (see the module-level note on origin
|
||||
/// attribution) and would cause the fingerprint to oscillate without
|
||||
/// reflecting a real precision change.
|
||||
#[allow(dead_code)] // observability hook; used by tests and future shared-cache refactor
|
||||
/// Set-equal fingerprint of the inline cache, used by the SCC
|
||||
/// orchestrator to detect convergence.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn inline_cache_fingerprint(
|
||||
cache: &InlineCache,
|
||||
) -> HashMap<(FuncKey, ArgTaintSig), u16> {
|
||||
|
|
@ -206,24 +110,11 @@ pub(crate) fn inline_cache_fingerprint(
|
|||
|
||||
/// CFG node metadata embedded in cross-file callee bodies.
|
||||
///
|
||||
/// ## Why a full [`crate::cfg::NodeInfo`] lives here
|
||||
///
|
||||
/// An earlier variant carried only the two fields the symex executor reads
|
||||
/// (`bin_op`, `labels`). That was sufficient for symex but not for the
|
||||
/// taint engine, which reads ~20 fields off `cfg[inst.cfg_node]` across
|
||||
/// `transfer_inst`, `collect_block_events`, `compute_succ_states`, and
|
||||
/// helpers (callee name, `arg_uses`, `arg_callees`, `call_ordinal`,
|
||||
/// `outer_callee`, `kwargs`, `arg_string_literals`, `ast.span`,
|
||||
/// `ast.enclosing_func`, `condition_*`, `all_args_literal`, `catch_param`,
|
||||
/// `parameterized_query`, `in_defer`, `cast_target_type`, `string_prefix`,
|
||||
/// `taint.uses`, `taint.defines`, `taint.extra_defines`,
|
||||
/// `taint.const_text`, …). Rather than shuttling each of those through a
|
||||
/// `CfgView` accessor at every callsite, we store a full serde-able
|
||||
/// [`crate::cfg::NodeInfo`] snapshot here so the indexed-scan path can
|
||||
/// rehydrate an equivalent `Cfg` on load (see [`rebuild_body_graph`]).
|
||||
/// Both scan paths then feed the same `&Cfg` into the taint engine, and
|
||||
/// cross-file inline fires regardless of whether the body came from pass
|
||||
/// 1 or from SQLite.
|
||||
/// Stores a full serde-able [`crate::cfg::NodeInfo`] snapshot rather
|
||||
/// than projecting individual fields, so the indexed-scan path can
|
||||
/// rehydrate an equivalent `Cfg` (see [`rebuild_body_graph`]) and feed
|
||||
/// the same `&Cfg` into the taint engine regardless of whether the
|
||||
/// body came from pass 1 or SQLite.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
pub struct CrossFileNodeMeta {
|
||||
/// Full `NodeInfo` snapshot for this body-local NodeIndex.
|
||||
|
|
@ -268,7 +159,7 @@ pub fn populate_node_meta(body: &mut CalleeSsaBody, cfg: &crate::cfg::Cfg) -> bo
|
|||
// `compute_succ_states` via `cfg[*cond]`, so without it the synthesized
|
||||
// cross-file proxy CFG (`rebuild_body_graph`) ends up too small whenever
|
||||
// the callee body has any conditional branch whose `cond` index sits
|
||||
// past the maximum `inst.cfg_node` index — inline analysis then panics
|
||||
// past the maximum `inst.cfg_node` index, inline analysis then panics
|
||||
// with an out-of-bounds index.
|
||||
let mut referenced: Vec<NodeIndex> = Vec::new();
|
||||
for block in &body.ssa.blocks {
|
||||
|
|
@ -320,7 +211,7 @@ pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
|
|||
// index. We fill any unreferenced intermediate indices with
|
||||
// `NodeInfo::default()`.
|
||||
//
|
||||
// Walks both instruction `cfg_node`s and `Terminator::Branch.cond` —
|
||||
// Walks both instruction `cfg_node`s and `Terminator::Branch.cond` ,
|
||||
// the latter is read by `compute_succ_states` via `cfg[*cond]`, so
|
||||
// missing it produces an OOB panic when a conditional branch's cond
|
||||
// node has a higher index than any `inst.cfg_node` in the body.
|
||||
|
|
@ -339,7 +230,7 @@ pub fn rebuild_body_graph(body: &mut CalleeSsaBody) -> bool {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Also consider node_meta keys — they should be a subset of the
|
||||
// Also consider node_meta keys, they should be a subset of the
|
||||
// SSA-referenced indices, but be defensive.
|
||||
for &k in body.node_meta.keys() {
|
||||
if k > max_idx {
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -2,7 +2,7 @@
|
|||
//! the original monolithic `ssa_transfer.rs`.
|
||||
//!
|
||||
//! Contains:
|
||||
//! * [`SsaTaintState`] — the per-block lattice value with `values`,
|
||||
//! * [`SsaTaintState`], the per-block lattice value with `values`,
|
||||
//! `validated_must`/`validated_may`, `predicates`, `heap`, `path_env`,
|
||||
//! `abstract_state`.
|
||||
//! * [`BindingKey`] / [`seed_lookup`] for cross-body taint seeding.
|
||||
|
|
@ -25,7 +25,7 @@ use std::collections::HashMap;
|
|||
|
||||
// NOTE: The per-SSA-value origin cap used to be a hardcoded
|
||||
// `MAX_ORIGINS: usize = 4`. It is now governed by the stable
|
||||
// `analysis.engine.max_origins` option (default `32`) — see
|
||||
// `analysis.engine.max_origins` option (default `32`), see
|
||||
// `crate::utils::analysis_options` and [`effective_max_origins`]. The
|
||||
// test-only override below still short-circuits the config read so
|
||||
// `engine_notes_tests.rs` can force a tiny cap to trigger truncation
|
||||
|
|
@ -42,7 +42,7 @@ static WORKLIST_CAP_OVERRIDE: std::sync::atomic::AtomicUsize =
|
|||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Records the MAX iteration count observed across every
|
||||
/// `run_ssa_taint_full` call since the most recent reset. Cheaper and
|
||||
/// more useful for regression tests than the last-call value — a cap
|
||||
/// more useful for regression tests than the last-call value, a cap
|
||||
/// hit anywhere in the scan is remembered.
|
||||
pub(super) static MAX_WORKLIST_ITERATIONS: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
|
|
@ -90,7 +90,7 @@ pub fn reset_worklist_observability() {
|
|||
/// force `OriginsTruncated` emission on small fixtures.
|
||||
static MAX_ORIGINS_OVERRIDE: std::sync::atomic::AtomicUsize =
|
||||
std::sync::atomic::AtomicUsize::new(0);
|
||||
/// Total number of origins dropped since the most recent reset — captured
|
||||
/// Total number of origins dropped since the most recent reset, captured
|
||||
/// from `merge_origins` and the post-hoc saturation scan. Used by tests
|
||||
/// to detect truncation events that don't propagate to a finding (e.g.
|
||||
/// when the cap is so tight no taint flow survives to emit a sink event).
|
||||
|
|
@ -136,7 +136,7 @@ pub fn reset_origins_observability() {
|
|||
thread_local! {
|
||||
/// Per-body engine-note collector. Cleared at the start of each
|
||||
/// `analyse_body_with_seed` invocation and drained after
|
||||
/// `run_ssa_taint_full` returns — notes are then attached to every
|
||||
/// `run_ssa_taint_full` returns, notes are then attached to every
|
||||
/// finding emitted from that body. Living as a thread-local avoids
|
||||
/// threading a `&RefCell` through the nearly-10-argument transfer
|
||||
/// struct; inline analysis recursion is intentionally allowed to
|
||||
|
|
@ -148,7 +148,7 @@ thread_local! {
|
|||
/// was suppressed by an SSA-engine path-safety proof (PathFact
|
||||
/// `dotdot=No && absolute=No`). Populated by `is_path_safe_for_sink`
|
||||
/// and consumed by the state-analysis pass to suppress
|
||||
/// `state-unauthed-access` on the same sink — when the taint engine
|
||||
/// `state-unauthed-access` on the same sink, when the taint engine
|
||||
/// has already proved the user-controlled input cannot escape into a
|
||||
/// privileged location, the auth concern on that sink is reduced.
|
||||
/// Reset at start of `analyse_file`, drained before state analysis.
|
||||
|
|
@ -156,7 +156,7 @@ thread_local! {
|
|||
RefCell::new(std::collections::HashSet::new());
|
||||
|
||||
/// File-level set of CFG sink spans where the SSA engine emitted an
|
||||
/// `all_validated` event — every tainted input to the sink passed
|
||||
/// `all_validated` event, every tainted input to the sink passed
|
||||
/// through a recognised validation/sanitisation predicate before
|
||||
/// reaching it. Distinct from `PATH_SAFE_SUPPRESSED_SPANS`, which
|
||||
/// is FILE_IO-scoped and feeds state analysis: this set is
|
||||
|
|
@ -167,7 +167,7 @@ thread_local! {
|
|||
///
|
||||
/// Without this signal the suppression gate has to fall back to
|
||||
/// "function emitted at least one taint-unsanitised-flow finding"
|
||||
/// or "function contains a labelled Sanitizer node" — both of
|
||||
/// or "function contains a labelled Sanitizer node", both of
|
||||
/// which miss validated/dominated/early-return safety where the
|
||||
/// engine cleared the flow without firing or hitting an explicit
|
||||
/// sanitiser.
|
||||
|
|
@ -227,7 +227,7 @@ pub fn take_path_safe_suppressed_spans() -> std::collections::HashSet<(usize, us
|
|||
|
||||
/// Record a sink CFG-node span where the SSA engine proved every
|
||||
/// tainted input was validated (`SsaTaintEvent::all_validated`).
|
||||
/// Cap-agnostic — fires for any sink the engine evaluated and cleared.
|
||||
/// Cap-agnostic, fires for any sink the engine evaluated and cleared.
|
||||
/// Consumed by `TaintSuppressionCtx::build` as positive evidence that
|
||||
/// taint analysis reached this line and proved safety, so AST-pattern
|
||||
/// findings on the same line can be suppressed without misclassifying
|
||||
|
|
@ -263,7 +263,7 @@ pub fn take_all_validated_spans() -> std::collections::HashSet<(usize, usize)> {
|
|||
/// into the seed map always specify the owning body's id; readers look
|
||||
/// up by the scope they know they want (typically their own
|
||||
/// `parent_body_id`, with a fallback to `BodyId(0)` for entries that
|
||||
/// the JS/TS two-level solve has re-keyed onto the top-level scope —
|
||||
/// the JS/TS two-level solve has re-keyed onto the top-level scope ,
|
||||
/// see [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct BindingKey {
|
||||
|
|
@ -284,7 +284,7 @@ impl BindingKey {
|
|||
/// Look up a binding in a seed map.
|
||||
///
|
||||
/// Thin wrapper over [`HashMap::get`] retained for call-site readability
|
||||
/// — every seed entry is now exactly scoped to a single `(name,
|
||||
///, every seed entry is now exactly scoped to a single `(name,
|
||||
/// BodyId)`, so the lookup is O(1) with no fallback. Writers that want
|
||||
/// cross-scope reachability must explicitly re-key their entries (see
|
||||
/// [`crate::taint::ssa_transfer::filter_seed_to_toplevel`]).
|
||||
|
|
@ -299,7 +299,7 @@ pub fn seed_lookup<'a>(
|
|||
|
||||
/// Compact key for a heap-field taint cell.
|
||||
///
|
||||
/// `(loc, field)` — `loc` is the abstract location of the *parent*
|
||||
/// `(loc, field)`, `loc` is the abstract location of the *parent*
|
||||
/// (interned by the body's [`crate::pointer::LocInterner`]), `field`
|
||||
/// is the [`FieldId`] of the projected field. The pair survives lattice
|
||||
/// joins / leq comparisons by `Ord`-derived sort.
|
||||
|
|
@ -309,16 +309,16 @@ pub struct FieldTaintKey {
|
|||
pub field: FieldId,
|
||||
}
|
||||
|
||||
/// Pointer-Phase 4 / W4: per-field-cell taint record.
|
||||
/// per-field-cell taint record.
|
||||
///
|
||||
/// Carries the union of writers' taint for the abstract field cell plus
|
||||
/// two validation channels:
|
||||
/// * `validated_must` — set when *every* writer recorded a value that was
|
||||
/// * `validated_must`, set when *every* writer recorded a value that was
|
||||
/// `validated_must` in its own SSA scope. Lattice join intersects
|
||||
/// (`AND`) — matching the symbol-keyed [`SsaTaintState::validated_must`]
|
||||
/// (`AND`), matching the symbol-keyed [`SsaTaintState::validated_must`]
|
||||
/// semantics for "validated on every path".
|
||||
/// * `validated_may` — set when *any* writer recorded a `validated_may`
|
||||
/// value. Lattice join unions (`OR`) — matching the symbol-keyed
|
||||
/// * `validated_may`, set when *any* writer recorded a `validated_may`
|
||||
/// value. Lattice join unions (`OR`), matching the symbol-keyed
|
||||
/// [`SsaTaintState::validated_may`] semantics for "validated on some
|
||||
/// path".
|
||||
///
|
||||
|
|
@ -332,7 +332,7 @@ pub struct FieldCell {
|
|||
}
|
||||
|
||||
impl FieldCell {
|
||||
/// Construct a cell with no validation bits — convenience for the
|
||||
/// Construct a cell with no validation bits, convenience for the
|
||||
/// pre-W4 callers that don't propagate symbol-level validation.
|
||||
pub fn unvalidated(taint: VarTaint) -> Self {
|
||||
Self {
|
||||
|
|
@ -365,17 +365,17 @@ pub struct SsaTaintState {
|
|||
/// interpretation is disabled (`analysis.engine.abstract_interpretation
|
||||
/// = false`).
|
||||
pub abstract_state: Option<AbstractState>,
|
||||
/// Pointer-Phase 3: per-heap-field taint cells, keyed by
|
||||
/// per-heap-field taint cells, keyed by
|
||||
/// `(parent_loc, field)`. Sorted by [`FieldTaintKey`] for O(n)
|
||||
/// merge-join. Populated only when the body's
|
||||
/// [`crate::pointer::PointsToFacts`] is available
|
||||
/// (`NYX_POINTER_ANALYSIS=1`); empty otherwise so the lattice join
|
||||
/// is a strict no-op for pointer-disabled runs. Field reads
|
||||
/// (`SsaOp::FieldProj`) consult the cells; field writes record into
|
||||
/// them. Cross-call propagation lands in Phase 5 via the
|
||||
/// them. Cross-call propagation lands during lowering via the
|
||||
/// field-granularity `PointsToSummary`.
|
||||
///
|
||||
/// Cell shape (Phase 4 / W4): [`FieldCell`] carries `taint` plus
|
||||
/// Cell shape: [`FieldCell`] carries `taint` plus
|
||||
/// `validated_must` / `validated_may` flags so validation flows
|
||||
/// through abstract field / element identity.
|
||||
pub field_taint: SmallVec<[(FieldTaintKey, FieldCell); 4]>,
|
||||
|
|
@ -403,7 +403,7 @@ impl SsaTaintState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3: read the field cell at `key`. Returns `None`
|
||||
/// read the field cell at `key`. Returns `None`
|
||||
/// when no cell has been recorded (caller should treat as
|
||||
/// untainted). O(log n) on the sorted [`field_taint`] list.
|
||||
pub fn get_field(&self, key: FieldTaintKey) -> Option<&FieldCell> {
|
||||
|
|
@ -413,13 +413,13 @@ impl SsaTaintState {
|
|||
.map(|idx| &self.field_taint[idx].1)
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3 / W4: union `t` into the field cell at `key`,
|
||||
/// union `t` into the field cell at `key`,
|
||||
/// recording per-write `validated_must` / `validated_may` channels.
|
||||
///
|
||||
/// Maintains sorted invariant. No-op when `t.caps` is empty (so the
|
||||
/// lattice bottom stays `[]`). When the cell already exists, the
|
||||
/// validation channels merge with the lattice-join semantics —
|
||||
/// `must` AND-intersects, `may` OR-unions — matching the symbol-
|
||||
/// validation channels merge with the lattice-join semantics ,
|
||||
/// `must` AND-intersects, `may` OR-unions, matching the symbol-
|
||||
/// keyed [`SsaTaintState::validated_must`] / `validated_may`
|
||||
/// semantics so a write coming through a non-validated path tears
|
||||
/// down `must` while preserving `may` of any earlier validated path.
|
||||
|
|
@ -563,15 +563,15 @@ impl Lattice for SsaTaintState {
|
|||
}
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3 / W4: merge-join two sorted `field_taint` lists.
|
||||
/// merge-join two sorted `field_taint` lists.
|
||||
/// Same shape as [`merge_join_ssa_vars`] but keyed on [`FieldTaintKey`]:
|
||||
/// * `taint.caps` — OR-union
|
||||
/// * `taint.origins` — merged with cap-respecting de-dup
|
||||
/// * `taint.uses_summary` — OR-union
|
||||
/// * `validated_must` — AND-intersect (matches the symbol-keyed
|
||||
/// * `taint.caps` , OR-union
|
||||
/// * `taint.origins`, merged with cap-respecting de-dup
|
||||
/// * `taint.uses_summary`, OR-union
|
||||
/// * `validated_must`, AND-intersect (matches the symbol-keyed
|
||||
/// `validated_must` lattice: a path that didn't validate this cell
|
||||
/// breaks the invariant)
|
||||
/// * `validated_may` — OR-union (any path's validation contributes)
|
||||
/// * `validated_may`, OR-union (any path's validation contributes)
|
||||
pub(super) fn merge_join_field_taint(
|
||||
a: &[(FieldTaintKey, FieldCell)],
|
||||
b: &[(FieldTaintKey, FieldCell)],
|
||||
|
|
@ -581,7 +581,7 @@ pub(super) fn merge_join_field_taint(
|
|||
while i < a.len() && j < b.len() {
|
||||
match a[i].0.cmp(&b[j].0) {
|
||||
std::cmp::Ordering::Less => {
|
||||
// Cell present only in `a` — counterpart in `b` is the
|
||||
// Cell present only in `a`, counterpart in `b` is the
|
||||
// lattice bottom (no validation, no taint), so:
|
||||
// must = a.must AND false = false
|
||||
// may = a.may OR false = a.may
|
||||
|
|
@ -637,11 +637,11 @@ pub(super) fn merge_join_field_taint(
|
|||
/// `a ≤ b` for sorted `field_taint` lists. Used by the convergence
|
||||
/// check in [`Lattice::leq`]. Per-cell criteria:
|
||||
///
|
||||
/// * `taint.caps` — `a ⊆ b` (sub-state on caps; matches per-SSA-value
|
||||
/// * `taint.caps`, `a ⊆ b` (sub-state on caps; matches per-SSA-value
|
||||
/// `ssa_vars_leq`).
|
||||
/// * `validated_must` — `a.must ⊇ b.must` (super-state on must; same
|
||||
/// * `validated_must`, `a.must ⊇ b.must` (super-state on must; same
|
||||
/// shape as the symbol-keyed `validated_must` leq).
|
||||
/// * `validated_may` — `a.may ⊆ b.may` (sub-state on may).
|
||||
/// * `validated_may`, `a.may ⊆ b.may` (sub-state on may).
|
||||
///
|
||||
/// When `b` lacks a key present in `a`, `b`'s side is the lattice
|
||||
/// bottom: no caps, no validation. `a`'s caps must also be empty
|
||||
|
|
@ -669,12 +669,12 @@ pub(super) fn field_taint_leq(
|
|||
if (ca.taint.caps - cb.taint.caps).bits() != 0 {
|
||||
return false;
|
||||
}
|
||||
// Must: a ⊇ b — every must-validated key in b is must-validated
|
||||
// Must: a ⊇ b, every must-validated key in b is must-validated
|
||||
// in a. Equivalently: !cb.must OR ca.must.
|
||||
if cb.validated_must && !ca.validated_must {
|
||||
return false;
|
||||
}
|
||||
// May: a ⊆ b — every may-validated key in a is may-validated
|
||||
// May: a ⊆ b, every may-validated key in a is may-validated
|
||||
// in b. Equivalently: !ca.may OR cb.may.
|
||||
if ca.validated_may && !cb.validated_may {
|
||||
return false;
|
||||
|
|
@ -735,7 +735,7 @@ pub(super) fn merge_join_ssa_vars(
|
|||
///
|
||||
/// Ordering is lexicographic over
|
||||
/// `(source_span_start, source_span_end, source_kind_tag, node_index)`.
|
||||
/// `source_span` is the most stable component across bodies — cross-body
|
||||
/// `source_span` is the most stable component across bodies, cross-body
|
||||
/// remapped origins carry the original byte span explicitly; intra-body
|
||||
/// origins default to `(0, 0)` and fall through to the secondary keys.
|
||||
///
|
||||
|
|
@ -760,7 +760,7 @@ fn origin_sort_key(o: &TaintOrigin) -> (usize, usize, u8, usize) {
|
|||
/// Bounded, deterministic insertion of an origin into a sorted origin
|
||||
/// set. Returns `true` when `new` was admitted (or de-duplicated against
|
||||
/// an existing entry), `false` when the cap forced a drop. On drop,
|
||||
/// the origin with the *largest* sort key is evicted first — the caller
|
||||
/// the origin with the *largest* sort key is evicted first, the caller
|
||||
/// sees a survivor set that depends only on the input multiset and
|
||||
/// [`effective_max_origins`], not on insertion order.
|
||||
///
|
||||
|
|
@ -774,7 +774,7 @@ pub(crate) fn push_origin_bounded(
|
|||
) -> bool {
|
||||
// Identity check: same node counts as the same origin. We keep
|
||||
// node-only dedup to match [`ssa_vars_leq`], which compares origin
|
||||
// sets by node membership — widening dedup here without tightening
|
||||
// sets by node membership, widening dedup here without tightening
|
||||
// there would break the monotonicity invariant.
|
||||
if target.iter().any(|o| o.node == new.node) {
|
||||
return true;
|
||||
|
|
@ -814,7 +814,7 @@ pub(crate) fn push_origin_bounded(
|
|||
target.insert(pos, new);
|
||||
true
|
||||
} else {
|
||||
// `new` itself is the worst — drop it instead of the survivor.
|
||||
// `new` itself is the worst, drop it instead of the survivor.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
|
@ -829,7 +829,7 @@ pub(super) fn merge_origins(
|
|||
a: &SmallVec<[TaintOrigin; 2]>,
|
||||
b: &SmallVec<[TaintOrigin; 2]>,
|
||||
) -> SmallVec<[TaintOrigin; 2]> {
|
||||
// Seed the result with `a` — but re-sort defensively in case the
|
||||
// Seed the result with `a`, but re-sort defensively in case the
|
||||
// caller constructed `a` through non-bounded paths. Historically
|
||||
// every write goes through `push_origin_bounded` (or `merge_origins`
|
||||
// itself), so this resort is a no-op on the steady state but costs
|
||||
|
|
@ -911,7 +911,7 @@ pub(super) fn merge_join_ssa_predicates(
|
|||
mod origin_cap_tests {
|
||||
//! Tests for the deterministic, config-driven origin cap. These
|
||||
//! cover the behavior at the `push_origin_bounded` / `merge_origins`
|
||||
//! boundary — the end-to-end engine-note signal is exercised in
|
||||
//! boundary, the end-to-end engine-note signal is exercised in
|
||||
//! `tests/engine_notes_tests.rs`.
|
||||
|
||||
use super::*;
|
||||
|
|
@ -1037,7 +1037,7 @@ mod origin_cap_tests {
|
|||
fn effective_cap_reads_runtime_config_when_override_zero() {
|
||||
// Override takes priority; override=0 falls through to config.
|
||||
// `current()` returns the default (32) when no runtime is
|
||||
// installed — which is the state the rest of the test suite runs
|
||||
// installed, which is the state the rest of the test suite runs
|
||||
// under. Guard that the fallback path reaches 32.
|
||||
let _g = TEST_GUARD.lock().unwrap_or_else(|e| e.into_inner());
|
||||
set_max_origins_override(0);
|
||||
|
|
@ -1053,7 +1053,7 @@ mod origin_cap_tests {
|
|||
|
||||
#[cfg(test)]
|
||||
mod field_taint_tests {
|
||||
//! Pointer-Phase 3: tests for the heap-field taint cells on
|
||||
//!: tests for the heap-field taint cells on
|
||||
//! [`SsaTaintState`]. Cover get/add round-trip, lattice join
|
||||
//! (cap union + origin merge), and `leq` convergence semantics.
|
||||
use super::*;
|
||||
|
|
@ -1202,7 +1202,7 @@ mod field_taint_tests {
|
|||
assert!(cell.validated_must, "a.must AND b.must = true");
|
||||
assert!(cell.validated_may);
|
||||
|
||||
// Now make `b`'s validated_must false — must should drop to
|
||||
// Now make `b`'s validated_must false, must should drop to
|
||||
// false on the join, may stays at OR.
|
||||
let mut c = SsaTaintState::initial();
|
||||
c.add_field(k, taint(Cap::ENV_VAR), false, true);
|
||||
|
|
@ -1213,7 +1213,7 @@ mod field_taint_tests {
|
|||
}
|
||||
|
||||
/// W4 audit: `merge_join_field_taint` OR-unions `validated_may`
|
||||
/// — any path's may-validation contributes to the joined cell.
|
||||
///, any path's may-validation contributes to the joined cell.
|
||||
#[test]
|
||||
fn lattice_validated_may_unions_on_join() {
|
||||
let k = key(1, 7);
|
||||
|
|
@ -1275,7 +1275,7 @@ mod field_taint_tests {
|
|||
a.leq(&b),
|
||||
"must super-state and equal caps: a ≤ b should hold"
|
||||
);
|
||||
// Reverse: b.must=false, a.must=true — for b ≤ a, we need
|
||||
// Reverse: b.must=false, a.must=true, for b ≤ a, we need
|
||||
// b.must ⊇ a.must which is false ⊇ true = false. So b ≤ a
|
||||
// must fail.
|
||||
assert!(!b.leq(&a), "b lacks the must invariant a holds");
|
||||
|
|
@ -1289,7 +1289,7 @@ mod field_taint_tests {
|
|||
assert!(!a2.leq(&b2), "a.may=true is NOT ⊆ b.may=false");
|
||||
}
|
||||
|
||||
/// Pointer-Phase 3 / A8 audit: the field_taint lattice is monotone
|
||||
/// the field_taint lattice is monotone
|
||||
/// and converges under a deterministic enumeration of inputs.
|
||||
/// Caps grow (OR), `uses_summary` grows (OR), origins grow modulo
|
||||
/// the cap (merge_origins is bounded). Joins must:
|
||||
|
|
@ -1409,7 +1409,7 @@ mod field_taint_tests {
|
|||
|
||||
/// `field_taint_leq` is the soundness gate for worklist
|
||||
/// convergence: once `next ≤ acc`, the worklist halts. Pin that
|
||||
/// `leq` is consistent with `join` — i.e. `s.leq(s.join(t))` holds
|
||||
/// `leq` is consistent with `join`, i.e. `s.leq(s.join(t))` holds
|
||||
/// for any `s, t`. Without this, the worklist could loop
|
||||
/// indefinitely on inputs whose join produces a state not
|
||||
/// dominated by both inputs.
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
//! SSA function-summary and container-flow extraction.
|
||||
//!
|
||||
//! Extracted from the monolithic `ssa_transfer.rs`. Contains:
|
||||
//! * [`extract_ssa_func_summary`] — runs per-parameter taint probes and
|
||||
//! * [`extract_ssa_func_summary`], runs per-parameter taint probes and
|
||||
//! synthesises an [`crate::summary::ssa_summary::SsaFuncSummary`] with
|
||||
//! source caps, return transforms, per-path transforms, and sink site
|
||||
//! attribution.
|
||||
//! * [`extract_container_flow_summary`] — structural scan for
|
||||
//! * [`extract_container_flow_summary`], structural scan for
|
||||
//! `param_container_to_return` + `param_to_container_store` pairs.
|
||||
//! * Private helpers for predicate-hash summarisation, abstract-transfer
|
||||
//! derivation, callback source detection, and return-type inference.
|
||||
|
|
@ -123,15 +123,15 @@ pub fn extract_ssa_func_summary_full(
|
|||
.collect();
|
||||
|
||||
// Collect all param SSA values to exclude from return cap collection.
|
||||
// Param values persist with their seeded taint throughout the function —
|
||||
// Param values persist with their seeded taint throughout the function ,
|
||||
// we only want caps on derived values (call results, assigns) at return.
|
||||
let all_param_values: std::collections::HashSet<SsaValue> =
|
||||
param_info.iter().map(|(_, _, v)| *v).collect();
|
||||
|
||||
// Per-return-block observation captured alongside the aggregate return
|
||||
// caps. Each entry records one return block's exit state — caps
|
||||
// caps. Each entry records one return block's exit state, caps
|
||||
// contributed on that path, path-predicate hash, known_true/false bits,
|
||||
// and the return SSA value's abstract fact — so the per-param loop can
|
||||
// and the return SSA value's abstract fact, so the per-param loop can
|
||||
// emit one [`ReturnPathTransform`] per distinct predicate gate.
|
||||
struct ReturnBlockObs {
|
||||
/// Caps at the return SSA value (or joined live values for
|
||||
|
|
@ -141,7 +141,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
/// (passthrough fallback).
|
||||
param_caps: Cap,
|
||||
/// Deterministic hash of the predicate gate at this return.
|
||||
/// `0` means "no predicate gate" — an unguarded return.
|
||||
/// `0` means "no predicate gate", an unguarded return.
|
||||
predicate_hash: u64,
|
||||
/// `PredicateSummary::known_true` bits intersected across all
|
||||
/// tracked variables at this return. Encoded via
|
||||
|
|
@ -268,7 +268,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
}
|
||||
}
|
||||
} else {
|
||||
// Return(None): implicit return — fall back to all live values.
|
||||
// Return(None): implicit return, fall back to all live values.
|
||||
for (val, taint) in &exit.values {
|
||||
if all_param_values.contains(val) {
|
||||
block_param_caps |= taint.caps;
|
||||
|
|
@ -348,7 +348,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
|
||||
// Per-return-path PathFact decomposition derived from the baseline
|
||||
// probe (no seeded taint). Abstract facts on the return rv are
|
||||
// independent of taint seeding — they describe the function's
|
||||
// independent of taint seeding, they describe the function's
|
||||
// intrinsic narrowing, so the baseline run captures them without
|
||||
// per-param noise.
|
||||
//
|
||||
|
|
@ -388,7 +388,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
let mut param_to_sink: Vec<(usize, SmallVec<[SinkSite; 1]>)> = Vec::new();
|
||||
let mut param_to_sink_param = Vec::new();
|
||||
// Per-param return-path decomposition. Populated only when the param
|
||||
// has ≥2 distinct return-block predicate hashes — a single-return-path
|
||||
// has ≥2 distinct return-block predicate hashes, a single-return-path
|
||||
// callee is already precise via `param_to_return`.
|
||||
let mut param_return_paths: Vec<(
|
||||
usize,
|
||||
|
|
@ -417,7 +417,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// expressions (e.g. `file._source.uri`) as their own
|
||||
// [`SsaOp::Param`] ops with composite `var_name`s like
|
||||
// `"file._source.uri"`. These phantom Params are the values
|
||||
// actually used as call arguments — not the formal-param SSA
|
||||
// actually used as call arguments, not the formal-param SSA
|
||||
// value the seed targets. Without this, the per-param probe
|
||||
// misses cross-call sinks because the call's arg SSA value is
|
||||
// a phantom Param with no seed entry, so `transfer_inst::Param`
|
||||
|
|
@ -447,7 +447,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
|
||||
let (return_caps, events, _, per_return_obs) = run_probe(seed);
|
||||
|
||||
// Subtract baseline source_caps — we only want param-contributed caps
|
||||
// Subtract baseline source_caps, we only want param-contributed caps
|
||||
let param_return_caps = return_caps & !source_caps;
|
||||
|
||||
if !param_return_caps.is_empty() {
|
||||
|
|
@ -464,7 +464,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
// observed return block, derive a `ReturnPathTransform` mirroring
|
||||
// the aggregate logic (prefer derived caps, fall back to param
|
||||
// caps, strip baseline source caps). Only emit when ≥2 distinct
|
||||
// predicate hashes are present — a single-hash summary adds no
|
||||
// predicate hashes are present, a single-hash summary adds no
|
||||
// signal over the aggregate `param_to_return`.
|
||||
if per_return_obs.len() >= 2 {
|
||||
let mut per_path: SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]> =
|
||||
|
|
@ -477,7 +477,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
};
|
||||
let block_contributed = block_return_caps & !source_caps;
|
||||
let transform_kind = if block_contributed.is_empty() {
|
||||
// No caps on this path — param does not reach return
|
||||
// No caps on this path, param does not reach return
|
||||
// under this predicate. A `StripBits(all)` records
|
||||
// "all bits cleared" so downstream join preserves the
|
||||
// disparity with other paths.
|
||||
|
|
@ -513,9 +513,31 @@ pub fn extract_ssa_func_summary_full(
|
|||
}
|
||||
}
|
||||
|
||||
// Collect sink caps + primary-location sites from events + per-arg-position detail
|
||||
// Collect sink caps + primary-location sites from events + per-arg-position detail.
|
||||
//
|
||||
// Skip events flagged `all_validated`: every tainted SSA value
|
||||
// that reached the sink was already proved validated by a
|
||||
// dominating predicate (AllowlistCheck / TypeCheck /
|
||||
// ValidationCall, including the indirect-validator branch
|
||||
// narrowing for `validate*` / `is_valid*` callees). Those
|
||||
// events would have been dropped by `ssa_events_to_findings` at
|
||||
// the per-file finding step; carrying them into
|
||||
// `param_to_sink` / `param_to_sink_param` re-publishes a sink
|
||||
// attribution callers can no longer suppress, because the
|
||||
// caller can't see the validator that lives inside the
|
||||
// callee body.
|
||||
//
|
||||
// Strict-additive: `all_validated` is set only when every
|
||||
// tainted operand at the sink has its `var_name` in
|
||||
// `state.validated_may`, single-path single-validator helpers
|
||||
// cleanly skip; mixed-tainted-with-some-unvalidated events
|
||||
// still propagate. Closes the helper-summary precision gap
|
||||
// surfaced by Novu CVE GHSA-4x48-cgf9-q33f.
|
||||
let mut param_sites: SmallVec<[SinkSite; 1]> = SmallVec::new();
|
||||
for event in &events {
|
||||
if event.all_validated {
|
||||
continue;
|
||||
}
|
||||
for pos in extract_sink_arg_positions(event, ssa) {
|
||||
param_to_sink_param.push((idx, pos, event.sink_caps));
|
||||
}
|
||||
|
|
@ -601,14 +623,14 @@ pub fn extract_ssa_func_summary_full(
|
|||
|
||||
// Per-parameter abstract-domain transfers.
|
||||
//
|
||||
// Derived structurally from the SSA body — no additional taint probes.
|
||||
// Derived structurally from the SSA body, no additional taint probes.
|
||||
// Three-step inference per parameter:
|
||||
// 1. Identity: return SSA value at every return block traces back to
|
||||
// this parameter (possibly through assigns / phi merges all feeding
|
||||
// from the same param).
|
||||
// 2. Callee-intrinsic bound: baseline `return_abstract` carries a
|
||||
// concrete fact (bounded interval or known prefix) that holds
|
||||
// regardless of caller input — record it once per parameter as
|
||||
// regardless of caller input, record it once per parameter as
|
||||
// `Clamped` / `LiteralPrefix` so the caller sees the bound even
|
||||
// when it has no abstract info on its own argument.
|
||||
// 3. Top: default; the entry is omitted (empty transfer is meaningless).
|
||||
|
|
@ -630,14 +652,14 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_return_paths,
|
||||
return_path_facts,
|
||||
points_to,
|
||||
// Pointer-Phase 5 extension — empty until the field-granularity
|
||||
// extension, empty until the field-granularity
|
||||
// extractor is wired (`NYX_POINTER_ANALYSIS=1` only). Default
|
||||
// path stays bit-identical to today.
|
||||
field_points_to: crate::summary::points_to::FieldPointsToSummary::empty(),
|
||||
// Populated post-extraction in
|
||||
// `taint::lower_all_functions_from_bodies` once SSA optimisation
|
||||
// has computed `opt.type_facts`. Empty here means the
|
||||
// extractor itself doesn't carry receiver-type info — the
|
||||
// extractor itself doesn't carry receiver-type info, the
|
||||
// caller patches it in.
|
||||
typed_call_receivers: Vec::new(),
|
||||
}
|
||||
|
|
@ -699,14 +721,14 @@ pub(super) fn summarise_return_predicates(state: &SsaTaintState) -> (u64, u8, u8
|
|||
///
|
||||
/// `return_abstract` is the callee's intrinsic baseline (from the no-seed
|
||||
/// probe). When present, it describes a fact that holds for the return
|
||||
/// regardless of parameter input — so it can be attached as a
|
||||
/// regardless of parameter input, so it can be attached as a
|
||||
/// `Clamped` / `LiteralPrefix` transform to every parameter that flows to
|
||||
/// the return.
|
||||
///
|
||||
/// Identity detection is structural: walk the return values back through
|
||||
/// [`SsaOp::Assign`] / [`SsaOp::Phi`] chains (bounded) and check whether
|
||||
/// every leaf resolves to the same [`SsaOp::Param`]. The trace is cheap
|
||||
/// and can only produce `Identity` for passthrough callees — anything
|
||||
/// and can only produce `Identity` for passthrough callees, anything
|
||||
/// more complex degrades to the baseline fact or `Top`.
|
||||
fn derive_abstract_transfer(
|
||||
ssa: &SsaBody,
|
||||
|
|
@ -780,7 +802,7 @@ fn derive_abstract_transfer(
|
|||
}
|
||||
|
||||
// Derive a baseline-invariant transform from `return_abstract`. This is
|
||||
// the "callee intrinsic" fact that always holds — each parameter that
|
||||
// the "callee intrinsic" fact that always holds, each parameter that
|
||||
// flows to the return gets it attached as the conservative transfer.
|
||||
let baseline_invariant: Option<AbstractTransfer> = return_abstract.map(|av| {
|
||||
let interval = match (av.interval.lo, av.interval.hi) {
|
||||
|
|
@ -805,7 +827,7 @@ fn derive_abstract_transfer(
|
|||
} else if let Some(base) = baseline_invariant.as_ref() {
|
||||
// Baseline intrinsic bound applies to every parameter that could
|
||||
// reach the return. We conservatively attach it to all params
|
||||
// — at apply time the caller meets it with the real return
|
||||
//, at apply time the caller meets it with the real return
|
||||
// abstract (also from this same summary), so double-counting
|
||||
// would collapse to the tighter of the two.
|
||||
transfer = base.clone();
|
||||
|
|
@ -879,7 +901,7 @@ fn infer_summary_return_type(
|
|||
lang: Lang,
|
||||
) -> Option<crate::ssa::type_facts::TypeKind> {
|
||||
// Find blocks with Return terminators, then look at the last defined value
|
||||
// in those blocks — if it's a Call with a known constructor, that's our type.
|
||||
// in those blocks, if it's a Call with a known constructor, that's our type.
|
||||
for block in &ssa.blocks {
|
||||
if !matches!(block.terminator, Terminator::Return(_)) {
|
||||
continue;
|
||||
|
|
@ -965,7 +987,7 @@ pub(crate) fn extract_container_flow_summary(
|
|||
// `trace_to_param` will happily return any `SsaOp::Param { index }`, but
|
||||
// scoped lowering synthesises `Param` ops for external captures (module
|
||||
// imports, free identifiers) at indices beyond the formal parameter count.
|
||||
// Those must not enter the summary — the key's arity only covers formal
|
||||
// Those must not enter the summary, the key's arity only covers formal
|
||||
// params, and an out-of-range index trips `ssa_summary_fits_arity`, forcing
|
||||
// the reconciliation probe to generate a synthetic disambiguator that no
|
||||
// caller will ever look up.
|
||||
|
|
@ -1035,7 +1057,7 @@ pub(crate) fn extract_container_flow_summary(
|
|||
};
|
||||
|
||||
// Trace container to positional param (SelfParam → None, so
|
||||
// when the container is the receiver we skip — the caller
|
||||
// when the container is the receiver we skip, the caller
|
||||
// tracks that via `receiver_to_container_store` if needed).
|
||||
// Same arity filter as above: reject synthetic Param ops that
|
||||
// were injected for free captures.
|
||||
|
|
|
|||
|
|
@ -221,7 +221,7 @@ mod cross_file_tests {
|
|||
mod inline_cache_epoch_tests {
|
||||
//! Hooks for cross-file SCC joint fixed-point iteration.
|
||||
//!
|
||||
//! These do not exercise the full inline pipeline — they lock down the
|
||||
//! These do not exercise the full inline pipeline, they lock down the
|
||||
//! semantic contract of [`inline_cache_clear_epoch`] and
|
||||
//! [`inline_cache_fingerprint`] so the SCC orchestrator can rely on:
|
||||
//!
|
||||
|
|
@ -229,7 +229,7 @@ mod inline_cache_epoch_tests {
|
|||
//! * `fingerprint` is deterministic across equivalent caches (same
|
||||
//! keys → same bytes). Two caches with identical entries produce
|
||||
//! identical fingerprints regardless of insertion order.
|
||||
//! * `fingerprint` changes when return caps change — the signal the
|
||||
//! * `fingerprint` changes when return caps change, the signal the
|
||||
//! orchestrator will use to detect inline-cache convergence.
|
||||
|
||||
use super::super::*;
|
||||
|
|
@ -675,7 +675,7 @@ mod worklist_tests {
|
|||
|
||||
#[test]
|
||||
fn dense_successors_no_duplicates() {
|
||||
// Many successors, some repeated — old O(n) contains() would be slow here
|
||||
// Many successors, some repeated, old O(n) contains() would be slow here
|
||||
let mut wl = VecDeque::new();
|
||||
let mut in_wl = HashSet::new();
|
||||
|
||||
|
|
@ -735,8 +735,8 @@ mod primary_sink_location_tests {
|
|||
//! [`SsaTaintEvent::primary_sink_site`] →
|
||||
//! [`crate::taint::Finding::primary_location`].
|
||||
//!
|
||||
//! The test is deliberately low-level — it wires up synthetic SSA and
|
||||
//! drives the three emission stages directly — so any future refactor
|
||||
//! The test is deliberately low-level, it wires up synthetic SSA and
|
||||
//! drives the three emission stages directly, so any future refactor
|
||||
//! that drops the site on the floor between stages fails here rather
|
||||
//! than only at the corpus/benchmark layer.
|
||||
use super::super::*;
|
||||
|
|
@ -841,7 +841,7 @@ mod primary_sink_location_tests {
|
|||
/// If this fails, something on the summary→event→finding path
|
||||
/// (`pick_primary_sink_sites`, `emit_ssa_taint_events`, or
|
||||
/// `ssa_events_to_findings`) has silently stopped forwarding
|
||||
/// coordinates. Fixing that path — not this test — is the right
|
||||
/// coordinates. Fixing that path, not this test, is the right
|
||||
/// response.
|
||||
#[test]
|
||||
fn ssa_summary_sinksite_surfaces_as_finding_primary_location() {
|
||||
|
|
@ -863,7 +863,7 @@ mod primary_sink_location_tests {
|
|||
};
|
||||
|
||||
// Drive the three emission stages with the summary's own
|
||||
// `param_to_sink` — that is what summary resolution feeds in the
|
||||
// `param_to_sink`, that is what summary resolution feeds in the
|
||||
// real pipeline.
|
||||
let tainted: Vec<(SsaValue, Cap, SmallVec<[TaintOrigin; 2]>)> = vec![(
|
||||
SsaValue(0),
|
||||
|
|
@ -944,7 +944,7 @@ mod goto_succ_propagation_tests {
|
|||
|
||||
#[test]
|
||||
fn goto_propagates_to_every_succ_on_three_way_collapse() {
|
||||
// Build a block with Terminator::Goto(1) but succs = [1, 2, 3] — the
|
||||
// Build a block with Terminator::Goto(1) but succs = [1, 2, 3], the
|
||||
// shape lowering emits for a 3-way fanout.
|
||||
let block = SsaBlock {
|
||||
id: BlockId(0),
|
||||
|
|
@ -1001,7 +1001,7 @@ mod goto_succ_propagation_tests {
|
|||
pointer_facts: None,
|
||||
};
|
||||
|
||||
// A non-bottom exit state — the test only cares that *every* succ
|
||||
// A non-bottom exit state, the test only cares that *every* succ
|
||||
// receives a clone of it, so any distinguishable state works.
|
||||
let mut exit_state = SsaTaintState::initial();
|
||||
exit_state.values.push((
|
||||
|
|
@ -1259,7 +1259,7 @@ mod goto_succ_propagation_tests {
|
|||
fn is_path_safe_for_sink_unknown_axis_returns_false() {
|
||||
use crate::abstract_interp::PathFact;
|
||||
|
||||
// Only dotdot is cleared — absolute stays Maybe → not path-safe.
|
||||
// Only dotdot is cleared, absolute stays Maybe → not path-safe.
|
||||
let half_fact = PathFact::default().with_dotdot_cleared();
|
||||
assert!(!half_fact.is_path_safe());
|
||||
}
|
||||
|
|
@ -1328,9 +1328,9 @@ mod goto_succ_propagation_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Phase 4.2: receiver_candidates_for_type_lookup walks FieldProj ──────
|
||||
// ── receiver_candidates_for_type_lookup walks FieldProj ──────
|
||||
//
|
||||
// After Phase 2 SSA decomposition, `c.client.send(req)` lowers to
|
||||
// After SSA decomposition, `c.client.send(req)` lowers to
|
||||
// v_c = Param("c", 0)
|
||||
// v_client = FieldProj(v_c, "client")
|
||||
// v_call = Call("send", receiver: v_client, args: [v_req])
|
||||
|
|
@ -1430,7 +1430,7 @@ mod receiver_candidates_field_proj_tests {
|
|||
fn field_proj_receiver_walks_to_typed_root_in_go() {
|
||||
// Go is not Rust, so pre-Phase-4 the candidate walk would have
|
||||
// returned ONLY the immediate receiver (v2 = FieldProj). With
|
||||
// Phase 4 we walk through FieldProj.receiver to recover v0 (the
|
||||
// We walk through FieldProj.receiver to recover v0 (the
|
||||
// typed root `c`).
|
||||
let body = body_with_field_proj_chain();
|
||||
let cands =
|
||||
|
|
@ -1516,7 +1516,7 @@ mod receiver_candidates_field_proj_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Phase 6 hierarchy fan-out: ResolvedSummary union semantics ──────────
|
||||
// ── Hierarchy: ResolvedSummary union semantics ──────────
|
||||
//
|
||||
// `merge_resolved_summaries_fanout` is invoked at virtual-dispatch call
|
||||
// sites where the receiver's static type has multiple concrete
|
||||
|
|
@ -1553,7 +1553,7 @@ mod fanout_merge_tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// B1 — caps that grow taint signal (source/sink/receiver_to_sink)
|
||||
/// B1, caps that grow taint signal (source/sink/receiver_to_sink)
|
||||
/// are unioned. sanitizer_caps are intersected so only bits
|
||||
/// stripped by EVERY implementer count as cleared at the call site.
|
||||
#[test]
|
||||
|
|
@ -1581,7 +1581,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B2 — propagates_taint is OR'd; propagating_params is the union
|
||||
/// B2, propagates_taint is OR'd; propagating_params is the union
|
||||
/// (any implementer's propagator counts).
|
||||
#[test]
|
||||
fn merge_propagation_unions() {
|
||||
|
|
@ -1600,7 +1600,7 @@ mod fanout_merge_tests {
|
|||
assert_eq!(params, vec![0, 1, 2]);
|
||||
}
|
||||
|
||||
/// B3 — param_to_sink merges per-parameter caps (OR). An impl
|
||||
/// B3, param_to_sink merges per-parameter caps (OR). An impl
|
||||
/// that adds a sink at param N composes with another impl that
|
||||
/// adds a different cap at the same N.
|
||||
#[test]
|
||||
|
|
@ -1630,7 +1630,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B4 — param_to_sink_sites merges per-parameter site lists with
|
||||
/// B4, param_to_sink_sites merges per-parameter site lists with
|
||||
/// PartialEq dedup. The same site appearing in both impls (e.g.
|
||||
/// inherited definition) must not be reported twice.
|
||||
#[test]
|
||||
|
|
@ -1675,7 +1675,7 @@ mod fanout_merge_tests {
|
|||
assert!(sites.iter().any(|s| s == &unique_b));
|
||||
}
|
||||
|
||||
/// B5 — SSA-precision fields are dropped on disagreement. Two
|
||||
/// B5, SSA-precision fields are dropped on disagreement. Two
|
||||
/// summaries with different `return_type` collapse to None;
|
||||
/// agreement is preserved.
|
||||
#[test]
|
||||
|
|
@ -1704,7 +1704,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B6 — abstract_transfer + param_return_paths drop on
|
||||
/// B6, abstract_transfer + param_return_paths drop on
|
||||
/// disagreement (precise predicate-path data is not safely
|
||||
/// composable across distinct function bodies).
|
||||
#[test]
|
||||
|
|
@ -1737,7 +1737,7 @@ mod fanout_merge_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// B7 — empty + empty = empty (no panic on degenerate inputs).
|
||||
/// B7, empty + empty = empty (no panic on degenerate inputs).
|
||||
#[test]
|
||||
fn merge_empties_is_identity() {
|
||||
let m = merge_resolved_summaries_fanout(empty(), empty());
|
||||
|
|
@ -1748,7 +1748,7 @@ mod fanout_merge_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 3 / W1: synthetic field-WRITE round-trip ──────────────
|
||||
//── synthetic field-WRITE round-trip ──────────────
|
||||
//
|
||||
// SSA lowering populates `SsaBody.field_writes` with entries that lift a
|
||||
// synthetic base-update Assign (`obj.f = rhs`) into a structural field
|
||||
|
|
@ -1918,8 +1918,8 @@ mod field_write_tests {
|
|||
crate::pointer::analyse_body(body, crate::cfg::BodyId(7))
|
||||
}
|
||||
|
||||
/// Reuse `make_cfg`'s nodes — the body's instructions all reference
|
||||
/// them — so `transfer_inst` can index `cfg[cfg_node]`.
|
||||
/// Reuse `make_cfg`'s nodes, the body's instructions all reference
|
||||
/// them, so `transfer_inst` can index `cfg[cfg_node]`.
|
||||
fn drive(body: &SsaBody, pf: &PointsToFacts) -> SsaTaintState {
|
||||
// We need a CFG that contains the bodies' cfg_nodes.
|
||||
let (cfg, _, _, _, _) = make_cfg();
|
||||
|
|
@ -1998,7 +1998,7 @@ mod field_write_tests {
|
|||
|
||||
/// Pointer-disabled run (`pointer_facts: None`): no field cell is
|
||||
/// recorded, no taint flows through the `obj.cache` projection. The
|
||||
/// strict-additive contract — pointer-disabled behaviour is the
|
||||
/// strict-additive contract, pointer-disabled behaviour is the
|
||||
/// pre-W1 baseline.
|
||||
#[test]
|
||||
fn pointer_disabled_run_produces_no_field_taint() {
|
||||
|
|
@ -2047,8 +2047,8 @@ mod field_write_tests {
|
|||
state.field_taint.is_empty(),
|
||||
"pointer-disabled run must not populate field_taint",
|
||||
);
|
||||
// FieldProj reads still produce the receiver's existing taint —
|
||||
// none — so no entry for SsaValue(3) either.
|
||||
// FieldProj reads still produce the receiver's existing taint ,
|
||||
// none, so no entry for SsaValue(3) either.
|
||||
assert!(state.get(SsaValue(3)).is_none());
|
||||
let _ = cache_id;
|
||||
}
|
||||
|
|
@ -2059,7 +2059,7 @@ mod field_write_tests {
|
|||
/// projected value's symbol-level `validated_must` from the cell.
|
||||
///
|
||||
/// This is the key invariant: validation flows *through* abstract
|
||||
/// field identity — the read recovers what the write recorded.
|
||||
/// field identity, the read recovers what the write recorded.
|
||||
#[test]
|
||||
fn write_then_read_preserves_validated_must() {
|
||||
let (body, cache_id) = make_body();
|
||||
|
|
@ -2208,7 +2208,7 @@ mod field_write_tests {
|
|||
},
|
||||
};
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
|
||||
// v0 is Const → empty pt — the hook should not insert anything.
|
||||
// v0 is Const → empty pt, the hook should not insert anything.
|
||||
assert!(
|
||||
pf.pt(SsaValue(0)).is_empty(),
|
||||
"Const value should have empty pt set",
|
||||
|
|
@ -2259,7 +2259,7 @@ mod field_write_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 4 / W2: container ELEM write/read round-trip ──────────
|
||||
//── container ELEM write/read round-trip ──────────
|
||||
//
|
||||
// Container methods like `arr.push(v)` / `arr.shift()` flow per-element
|
||||
// taint through the `Field(_, ELEM)` cells on `SsaTaintState`. These
|
||||
|
|
@ -2351,7 +2351,7 @@ mod container_elem_tests {
|
|||
state
|
||||
}
|
||||
|
||||
/// `arr.push(source()); arr.shift()` — the read picks the source's
|
||||
/// `arr.push(source()); arr.shift()`, the read picks the source's
|
||||
/// caps up via the ELEM cell.
|
||||
#[test]
|
||||
fn container_write_then_read_round_trips_taint() {
|
||||
|
|
@ -2456,7 +2456,7 @@ mod container_elem_tests {
|
|||
);
|
||||
|
||||
// Drive the transfer. `e := arr.shift()` goes through the
|
||||
// existing Call arm — the W2 path is the *write* on `push`.
|
||||
// existing Call arm, the W2 path is the *write* on `push`.
|
||||
// The element-read side already exists on `analyse_body`; the
|
||||
// taint engine doesn't yet read field cells through call-result
|
||||
// paths (Call args are walked by Call's own argument-taint
|
||||
|
|
@ -2482,7 +2482,7 @@ mod container_elem_tests {
|
|||
}
|
||||
}
|
||||
|
||||
/// W4: `arr.push(validate(src)); arr.shift()` — the push records
|
||||
/// W4: `arr.push(validate(src)); arr.shift()`, the push records
|
||||
/// `validated_must = true` on the ELEM cell because the pushed
|
||||
/// value's symbol carried `validated_must`. The shift call result
|
||||
/// reads through the cell and seeds the result symbol's
|
||||
|
|
@ -2761,7 +2761,7 @@ mod container_elem_tests {
|
|||
}
|
||||
}
|
||||
|
||||
// ── Pointer-Phase 5 / W3: cross-call field-points-to application ────────
|
||||
//── cross-call field-points-to application ────────
|
||||
//
|
||||
// `apply_field_points_to_writes` is the resolver-side hook that turns
|
||||
// callee-summary `field_points_to.param_field_writes` into caller-side
|
||||
|
|
@ -2783,7 +2783,7 @@ mod cross_call_field_tests {
|
|||
use smallvec::smallvec;
|
||||
use std::collections::HashMap;
|
||||
|
||||
/// W3 / W4: shared empty interner — these unit tests don't seed
|
||||
/// W3 / W4: shared empty interner, these unit tests don't seed
|
||||
/// validation bits, so a fresh interner is sufficient for the
|
||||
/// `interner` parameter on `apply_field_points_to_writes`.
|
||||
fn empty_interner() -> SymbolInterner {
|
||||
|
|
@ -2861,23 +2861,23 @@ mod cross_call_field_tests {
|
|||
state
|
||||
}
|
||||
|
||||
/// Callee summary with `param_field_writes[(0, ["cache"])]` —
|
||||
/// Callee summary with `param_field_writes[(0, ["cache"])]` ,
|
||||
/// "callee writes cache field on parameter 0 (obj)".
|
||||
/// Caller passes `(obj, source)` to this callee — `arg 0 = obj`,
|
||||
/// Caller passes `(obj, source)` to this callee, `arg 0 = obj`,
|
||||
/// but the W3 hook resolves the *value at arg position 0* as the
|
||||
/// receiver of the field write, populating its pt's cells.
|
||||
///
|
||||
/// We model the caller as `callee(obj, source)` with arg 0 = obj
|
||||
/// (the receiver) and arg 1 = source (the value being written).
|
||||
/// The callee's signature is `fn store(obj, value) { obj.cache = value; }`
|
||||
/// — so the field write on param 0 is keyed by `pt(obj)` and the
|
||||
///, so the field write on param 0 is keyed by `pt(obj)` and the
|
||||
/// taint comes from arg 1's caps. Our helper conservatively unions
|
||||
/// every arg's taint into the cell — which over-tints (for this
|
||||
/// every arg's taint into the cell, which over-tints (for this
|
||||
/// shape, arg 0's pt member becomes the loc, with arg 0's own taint
|
||||
/// applied), but is sound.
|
||||
///
|
||||
/// To make the test precise, we model the simpler shape `fn store(obj)
|
||||
/// { obj.cache = source(); }` — callee writes a literal source into
|
||||
/// { obj.cache = source(); }`, callee writes a literal source into
|
||||
/// `obj.cache`, with no value parameter. Then the caller-side hook
|
||||
/// only sees param 0's taint (zero), so the cell is empty and the
|
||||
/// test fails.
|
||||
|
|
@ -2886,7 +2886,7 @@ mod cross_call_field_tests {
|
|||
/// at the call site arg 0 carries source taint. The hook then
|
||||
/// records (pt(arg0_value), cache) ← arg0_value's taint. In a
|
||||
/// real callee this corresponds to "callee writes its parameter
|
||||
/// value into a self.cache field internally" — but the spread we
|
||||
/// value into a self.cache field internally", but the spread we
|
||||
/// validate is just substitute-and-mirror.
|
||||
#[test]
|
||||
fn cross_call_writes_into_param_field_cell() {
|
||||
|
|
@ -2947,7 +2947,7 @@ mod cross_call_field_tests {
|
|||
fn cross_call_receiver_field_uses_max_sentinel() {
|
||||
let (body, cache_id, pf) = caller_body();
|
||||
let mut state = SsaTaintState::initial();
|
||||
// Seed receiver with taint — SsaValue(0) is the param/receiver.
|
||||
// Seed receiver with taint, SsaValue(0) is the param/receiver.
|
||||
state.set(
|
||||
SsaValue(0),
|
||||
VarTaint {
|
||||
|
|
@ -3026,7 +3026,7 @@ mod cross_call_field_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Field names the caller never interned are skipped silently —
|
||||
/// Field names the caller never interned are skipped silently ,
|
||||
/// no FieldProj read in the caller could observe such a cell.
|
||||
#[test]
|
||||
fn cross_call_unknown_field_name_skipped() {
|
||||
|
|
@ -3062,7 +3062,7 @@ mod cross_call_field_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// Overflow summary is treated conservatively as no-op — the
|
||||
/// Overflow summary is treated conservatively as no-op, the
|
||||
/// engine cannot soundly cell-flood, so it skips entirely.
|
||||
#[test]
|
||||
fn cross_call_overflow_summary_is_noop() {
|
||||
|
|
@ -3117,7 +3117,7 @@ mod cross_call_field_tests {
|
|||
//
|
||||
// `SsaTaintState.add_field` already routes through `merge_origins`, but
|
||||
// the FieldProj READ path used to walk the cell's origins inline,
|
||||
// deduping by node only — meaning a cell with N>cap origins surfaced
|
||||
// deduping by node only, meaning a cell with N>cap origins surfaced
|
||||
// all N to the projected SSA value. After A7, the read path uses
|
||||
// `push_origin_bounded`, ensuring the cap-driven survivor selection
|
||||
// applies on read too.
|
||||
|
|
@ -3225,7 +3225,7 @@ mod field_taint_origin_cap_tests {
|
|||
let (body, cache_id, cfg, _n_proj) = build_body();
|
||||
let pf = crate::pointer::analyse_body(&body, crate::cfg::BodyId(0));
|
||||
|
||||
// Pre-populate the (Param, cache) cell with 4 origins —
|
||||
// Pre-populate the (Param, cache) cell with 4 origins ,
|
||||
// 2× the cap. The `add_field` path already truncates via
|
||||
// `merge_origins`, so we go through it 4 times to grow.
|
||||
let mut state = SsaTaintState::initial();
|
||||
|
|
@ -3326,14 +3326,14 @@ mod field_taint_origin_cap_tests {
|
|||
// the field_taint cells.
|
||||
//
|
||||
// Two scenarios:
|
||||
// 1. `must_validated_flows_through_join` — both predecessor blocks
|
||||
// 1. `must_validated_flows_through_join`, both predecessor blocks
|
||||
// write the cell with `validated_must = true`. After the join, the
|
||||
// cell at the read site retains `validated_must = true` (AND
|
||||
// intersection of two `true`s).
|
||||
// 2. `early_exit_branch_drops_validated_must` — only one predecessor
|
||||
// 2. `early_exit_branch_drops_validated_must`, only one predecessor
|
||||
// writes; the other reaches the read block via an empty branch.
|
||||
// After the join, the cell has `validated_must = false`,
|
||||
// `validated_may = true` — W4's must/may intersection in action.
|
||||
// `validated_may = true`, W4's must/may intersection in action.
|
||||
#[cfg(test)]
|
||||
mod pointer_lattice_worklist_tests {
|
||||
use super::super::*;
|
||||
|
|
@ -3425,7 +3425,7 @@ mod pointer_lattice_worklist_tests {
|
|||
succs: smallvec![BlockId(1), BlockId(2)],
|
||||
};
|
||||
|
||||
// Block 1: synth `obj.cache = src` — field_writes[v2] = (v0, cache_id)
|
||||
// Block 1: synth `obj.cache = src`, field_writes[v2] = (v0, cache_id)
|
||||
let block1 = SsaBlock {
|
||||
id: BlockId(1),
|
||||
phis: vec![],
|
||||
|
|
@ -3441,7 +3441,7 @@ mod pointer_lattice_worklist_tests {
|
|||
succs: smallvec![BlockId(3)],
|
||||
};
|
||||
|
||||
// Block 2: identical synth write — keeps both branches
|
||||
// Block 2: identical synth write, keeps both branches
|
||||
// contributing the same cell so AND-intersection of must
|
||||
// preserves true on the join.
|
||||
let block2 = SsaBlock {
|
||||
|
|
@ -3459,7 +3459,7 @@ mod pointer_lattice_worklist_tests {
|
|||
succs: smallvec![BlockId(3)],
|
||||
};
|
||||
|
||||
// Block 3: read — FieldProj uses obj from a phi between B1 and B2.
|
||||
// Block 3: read, FieldProj uses obj from a phi between B1 and B2.
|
||||
let block3 = SsaBlock {
|
||||
id: BlockId(3),
|
||||
phis: vec![SsaInst {
|
||||
|
|
@ -3634,7 +3634,7 @@ mod pointer_lattice_worklist_tests {
|
|||
);
|
||||
}
|
||||
|
||||
/// A2.b: early-exit branch — only B1 writes, B2 reaches B3 via
|
||||
/// A2.b: early-exit branch, only B1 writes, B2 reaches B3 via
|
||||
/// an empty body. After the join, the cell exists (B1 wrote
|
||||
/// it), but `validated_must` is `false` (B2 didn't write, the
|
||||
/// orphan-side merge clears `must` per the W4 lattice rule);
|
||||
|
|
@ -3642,7 +3642,7 @@ mod pointer_lattice_worklist_tests {
|
|||
///
|
||||
/// To exercise the validation channels we synthesise the cell
|
||||
/// directly at the appropriate exit state, then run the
|
||||
/// worklist's join via two `SsaTaintState::join()` calls — the
|
||||
/// worklist's join via two `SsaTaintState::join()` calls, the
|
||||
/// body's worklist itself doesn't seed `validated_must` on the
|
||||
/// rhs of an Assign, so we model the "writer recorded must=true"
|
||||
/// scenario at the lattice level rather than driving it through
|
||||
|
|
|
|||
|
|
@ -698,7 +698,7 @@ fn cross_file_sink_finding_carries_primary_location() {
|
|||
);
|
||||
let finding = &findings[0];
|
||||
// Note: `uses_summary == false` here because the source (env::var) is
|
||||
// local — only the *sink* was summary-resolved. That's the case the
|
||||
// local, only the *sink* was summary-resolved. That's the case the
|
||||
// `primary_location` / `uses_summary` independence comment on
|
||||
// [`super::Finding::primary_location`] documents.
|
||||
let loc = finding
|
||||
|
|
@ -925,7 +925,7 @@ fn multi_file_sink_in_another_file() {
|
|||
}
|
||||
"#;
|
||||
|
||||
// File B: env::var → exec_cmd() — sink is cross-file.
|
||||
// File B: env::var → exec_cmd(), sink is cross-file.
|
||||
let caller_src = br#"
|
||||
use std::env;
|
||||
fn main() {
|
||||
|
|
@ -956,7 +956,7 @@ fn multi_file_sink_in_another_file() {
|
|||
fn multi_file_passthrough_preserves_taint() {
|
||||
use crate::summary::FuncSummary;
|
||||
|
||||
// identity() just returns its argument — it propagates taint but has no
|
||||
// identity() just returns its argument, it propagates taint but has no
|
||||
// source/sanitizer/sink caps of its own.
|
||||
let mut global = GlobalSummaries::new();
|
||||
let key = FuncKey {
|
||||
|
|
@ -1071,7 +1071,7 @@ fn multi_file_chain_source_sanitize_sink_across_files() {
|
|||
fn sanitizer_strips_only_matching_bits() {
|
||||
// Source(ALL) → shell_escape → sink_html (HTML sink).
|
||||
// shell_escape strips SHELL_ESCAPE but not HTML_ESCAPE.
|
||||
// sink_html is an HTML sink — HTML_ESCAPE bit is still set → 1 finding.
|
||||
// sink_html is an HTML sink, HTML_ESCAPE bit is still set → 1 finding.
|
||||
let src = br#"
|
||||
use std::env;
|
||||
fn sink_html(s: &str) {}
|
||||
|
|
@ -1142,7 +1142,7 @@ fn taint_through_variable_reassignment() {
|
|||
|
||||
#[test]
|
||||
fn untainted_variable_at_sink_is_safe() {
|
||||
// A string literal (not from a source) passed to Command — no finding.
|
||||
// A string literal (not from a source) passed to Command, no finding.
|
||||
let src = br#"
|
||||
use std::process::Command;
|
||||
fn main() {
|
||||
|
|
@ -1585,7 +1585,7 @@ fn cpp_source_to_sink() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Phase 2 (cpp-precision): `c_str()` is a const accessor on `std::string`
|
||||
/// `c_str()` is a const accessor on `std::string`
|
||||
/// that returns a pointer to the same buffer. It must propagate taint from
|
||||
/// the receiver to the result so the downstream sink fires.
|
||||
#[test]
|
||||
|
|
@ -1597,12 +1597,12 @@ fn cpp_c_str_propagates_taint() {
|
|||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"C++: tainted s.c_str() into system() must fire (Phase 2 c_str passthrough)",
|
||||
"C++: tainted s.c_str() into system() must fire",
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 2: `std::move(x)` returns its argument unchanged in terms of
|
||||
/// data flow — the rvalue cast is a representation move, not a sanitiser.
|
||||
/// `std::move(x)` returns its argument unchanged in terms of
|
||||
/// data flow, the rvalue cast is a representation move, not a sanitiser.
|
||||
/// Default propagation collects argument taint into the result.
|
||||
#[test]
|
||||
fn cpp_std_move_propagates_taint() {
|
||||
|
|
@ -1617,7 +1617,7 @@ fn cpp_std_move_propagates_taint() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Phase 2: `static_cast<T>(x)` is parsed as a call expression by
|
||||
/// `static_cast<T>(x)` is parsed as a call expression by
|
||||
/// tree-sitter-cpp; default propagation transports taint from the casted
|
||||
/// argument to the result.
|
||||
#[test]
|
||||
|
|
@ -1633,7 +1633,7 @@ fn cpp_static_cast_propagates_taint() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Phase 5 (cpp-precision): a fluent builder chain whose host
|
||||
/// a fluent builder chain whose host
|
||||
/// argument is tainted should fire on the terminal `.connect()`
|
||||
/// SSRF sink. The chained `.host(...)` / `.port(...)` calls return
|
||||
/// the receiver, and default Call-arg propagation puts the tainted
|
||||
|
|
@ -1647,12 +1647,12 @@ fn cpp_builder_chain_user_host_fires() {
|
|||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"C++: tainted host through fluent builder chain must reach terminal connect() (Phase 5)",
|
||||
"C++: tainted host through fluent builder chain must reach terminal connect()",
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 5: a fluent builder chain with a hardcoded host literal
|
||||
/// must NOT fire on the terminal connect() sink — the chain carries
|
||||
/// a fluent builder chain with a hardcoded host literal
|
||||
/// must NOT fire on the terminal connect() sink, the chain carries
|
||||
/// no taint.
|
||||
#[test]
|
||||
fn cpp_builder_chain_const_host_silent() {
|
||||
|
|
@ -1663,11 +1663,11 @@ fn cpp_builder_chain_const_host_silent() {
|
|||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"C++: builder chain with literal host must NOT fire (Phase 5 negative)",
|
||||
"C++: builder chain with literal host must NOT fire (Negative)",
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 4 (cpp-precision): inline member-function bodies inside a
|
||||
/// inline member-function bodies inside a
|
||||
/// `class_specifier` must be extracted as separate functions and
|
||||
/// intra-file calls must resolve to their bodies. Pre-Phase-4, the
|
||||
/// `class_specifier` AST kind was unmapped in cpp KINDS, so the CFG
|
||||
|
|
@ -1682,11 +1682,11 @@ fn cpp_inline_class_method_resolves() {
|
|||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Cpp, "test.cpp", &[], None);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"C++: tainted arg through inline class method must reach system() (Phase 4)",
|
||||
"C++: tainted arg through inline class method must reach system()",
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 3 (cpp-precision): a tainted argument passed through an
|
||||
/// a tainted argument passed through an
|
||||
/// identity-style lambda (`auto echo = [](const char* s) { return s; }`)
|
||||
/// must reach the downstream sink. This is handled by the same default
|
||||
/// Call-arg propagation as `std::move`/`static_cast`; pinning the
|
||||
|
|
@ -1705,7 +1705,7 @@ fn cpp_identity_lambda_propagates_taint() {
|
|||
);
|
||||
}
|
||||
|
||||
/// Phase 2: `std::vector<char>::data()` is a Load-style container op that
|
||||
/// `std::vector<char>::data()` is a Load-style container op that
|
||||
/// returns a pointer to the underlying buffer; `system(v.data())` should
|
||||
/// fire when `v` is tainted.
|
||||
#[test]
|
||||
|
|
@ -1801,7 +1801,7 @@ fn ruby_source_to_sink() {
|
|||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// Cross-language resolution now requires explicit InteropEdge declarations.
|
||||
// Without an edge, functions from different languages are never resolved —
|
||||
// Without an edge, functions from different languages are never resolved ,
|
||||
// this prevents false positives from name collisions across languages.
|
||||
|
||||
/// Extract cross-file summaries from any language's source bytes.
|
||||
|
|
@ -1984,7 +1984,7 @@ fn cross_lang_rust_sanitizer_in_js_via_interop() {
|
|||
None,
|
||||
);
|
||||
// eval uses Cap::all(), so a SHELL_ESCAPE sanitizer alone does NOT
|
||||
// neutralise taint — shell-escape is semantically wrong for code injection.
|
||||
// neutralise taint, shell-escape is semantically wrong for code injection.
|
||||
// The finding should still be reported.
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
|
|
@ -2481,7 +2481,7 @@ fn cross_lang_summary_preserves_lang_metadata() {
|
|||
|
||||
let global = merge_summaries(vec![py_summary, js_summary], None);
|
||||
|
||||
// They are now separate entries — not merged
|
||||
// They are now separate entries, not merged
|
||||
let py_matches = global.lookup_same_lang(Lang::Python, "helper");
|
||||
let js_matches = global.lookup_same_lang(Lang::JavaScript, "helper");
|
||||
|
||||
|
|
@ -2609,7 +2609,7 @@ fn ambiguous_resolution_returns_none() {
|
|||
);
|
||||
}
|
||||
|
||||
// Caller from c.rs calls helper() — ambiguous (two matches, neither is caller's namespace)
|
||||
// Caller from c.rs calls helper(), ambiguous (two matches, neither is caller's namespace)
|
||||
let src = br#"
|
||||
use std::process::Command;
|
||||
fn main() {
|
||||
|
|
@ -2855,7 +2855,7 @@ fn validate_and_early_return() {
|
|||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
|
||||
|
||||
// Validated findings are now suppressed — validate() guard means the
|
||||
// Validated findings are now suppressed, validate() guard means the
|
||||
// sink is on the safe path, so no finding should be emitted.
|
||||
assert_eq!(findings.len(), 0, "validated finding should be suppressed");
|
||||
}
|
||||
|
|
@ -2888,7 +2888,7 @@ fn validate_in_if_else_path_validated() {
|
|||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
|
||||
|
||||
// Validated findings are now suppressed — sink is in the validated
|
||||
// Validated findings are now suppressed, sink is in the validated
|
||||
// branch, so no finding should be emitted.
|
||||
assert_eq!(findings.len(), 0, "validated finding should be suppressed");
|
||||
}
|
||||
|
|
@ -2932,7 +2932,7 @@ fn contradictory_null_check_pruned() {
|
|||
|
||||
// Inner branch is infeasible: if x.is_none() then x cannot also be is_none().
|
||||
// After early return on is_none(), the fall-through path has polarity=false
|
||||
// for NullCheck. The inner `if x.is_none()` True branch has polarity=true —
|
||||
// for NullCheck. The inner `if x.is_none()` True branch has polarity=true ,
|
||||
// contradiction.
|
||||
let src = br#"
|
||||
use std::env; use std::process::Command;
|
||||
|
|
@ -3045,7 +3045,7 @@ fn path_state_budget_graceful() {
|
|||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
|
||||
|
||||
// Should still detect the flow — truncation shouldn't cause false negatives.
|
||||
// Should still detect the flow, truncation shouldn't cause false negatives.
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
1,
|
||||
|
|
@ -3080,7 +3080,7 @@ fn unknown_predicate_not_pruned() {
|
|||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
|
||||
|
||||
// Comparison is not in the whitelist — the path should NOT be pruned.
|
||||
// Comparison is not in the whitelist, the path should NOT be pruned.
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
1,
|
||||
|
|
@ -3096,7 +3096,7 @@ fn duplicate_null_guard_prunes_unreachable_sink() {
|
|||
// After `if y.is_none() { return; }`, the false arm proves
|
||||
// `y.is_none() == false` on the only surviving path. A second
|
||||
// `if y.is_none() { sink }` then adds `y.is_none() == true` on the
|
||||
// body's True arm — a per-symbol PredicateSummary contradiction
|
||||
// body's True arm, a per-symbol PredicateSummary contradiction
|
||||
// (known_true & known_false on bit NullCheck). The body is
|
||||
// structurally unreachable; the sink must not fire.
|
||||
//
|
||||
|
|
@ -3573,7 +3573,7 @@ fn js_two_level_converges_no_mutation() {
|
|||
|
||||
#[test]
|
||||
fn catch_param_to_sink_has_caught_exception_source_kind() {
|
||||
// Catch param flows to a sink — the finding source_kind must be
|
||||
// Catch param flows to a sink, the finding source_kind must be
|
||||
// CaughtException, not Unknown.
|
||||
let src = b"
|
||||
const { exec } = require('child_process');
|
||||
|
|
@ -3743,7 +3743,7 @@ fn assert_ssa_integration(src: &[u8]) {
|
|||
// High-level path (per-body analysis)
|
||||
let high_level = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
|
||||
|
||||
// Direct SSA path — use the first function body (fn main), not top-level
|
||||
// Direct SSA path, use the first function body (fn main), not top-level
|
||||
let body = if file_cfg.bodies.len() > 1 {
|
||||
&file_cfg.bodies[1]
|
||||
} else {
|
||||
|
|
@ -4654,7 +4654,7 @@ fn ssa_induction_var_no_taint() {
|
|||
|
||||
#[test]
|
||||
fn ssa_loop_tainted_var_not_induction() {
|
||||
// `x` is tainted and transformed in a loop — NOT an induction variable
|
||||
// `x` is tainted and transformed in a loop, NOT an induction variable
|
||||
let src = br#"
|
||||
use std::{env, process::Command};
|
||||
fn main() {
|
||||
|
|
@ -4766,7 +4766,7 @@ fn ssa_phi_path_sensitive_both_branches_validated() {
|
|||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(&file_cfg, summaries, None, Lang::Rust, "test.rs", &[], None);
|
||||
|
||||
// Validated findings are now suppressed — sink is in the validated
|
||||
// Validated findings are now suppressed, sink is in the validated
|
||||
// branch, so no finding should be emitted.
|
||||
assert_eq!(findings.len(), 0, "validated finding should be suppressed");
|
||||
}
|
||||
|
|
@ -5116,7 +5116,7 @@ fn abstract_ssrf_prefix_linear_suppression() {
|
|||
/// Two predecessor blocks produce string concat values with different safe
|
||||
/// prefixes ("https://api.example.com/users/" and "https://api.example.com/admins/").
|
||||
/// A phi merges them. The LCP of the prefixes is "https://api.example.com/" which
|
||||
/// still has scheme://host/ — so SSRF suppression should fire.
|
||||
/// still has scheme://host/, so SSRF suppression should fire.
|
||||
///
|
||||
/// Before the phi replay fix, collect_block_events did NOT replay abstract phis,
|
||||
/// leaving the phi result's abstract value as Top (stale). The SSRF suppression
|
||||
|
|
@ -5255,7 +5255,7 @@ fn phi_validated_must_requires_all_paths() {
|
|||
use tree_sitter::Language;
|
||||
|
||||
// Path A validates x, path B does NOT validate x.
|
||||
// The phi for x after the merge must NOT get validated_must — only
|
||||
// The phi for x after the merge must NOT get validated_must, only
|
||||
// validated_may (since at least one path validated). The sink after
|
||||
// the merge must still fire because the must-analysis says "not
|
||||
// definitely validated on all paths".
|
||||
|
|
@ -5324,7 +5324,7 @@ fn inline_return_constant_with_internal_source_produces_no_finding() {
|
|||
None,
|
||||
);
|
||||
|
||||
// transform() returns a constant — no taint should leak to caller
|
||||
// transform() returns a constant, no taint should leak to caller
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
0,
|
||||
|
|
@ -5386,7 +5386,7 @@ fn inline_return_taint_internal_source_does_not_widen_caps() {
|
|||
// Callee has an internal source (document.location) alongside a tainted
|
||||
// param. The explicit return value is the param. Without the C-1 fix,
|
||||
// extract_inline_return_taint would union ALL live tainted values' caps
|
||||
// — the internal source's derived-caps would override the param-caps
|
||||
//, the internal source's derived-caps would override the param-caps
|
||||
// (derived takes priority in the extraction logic). With the fix, only
|
||||
// the return value's taint is collected, so param taint is returned
|
||||
// correctly.
|
||||
|
|
@ -5420,7 +5420,7 @@ fn inline_return_taint_internal_source_does_not_widen_caps() {
|
|||
None,
|
||||
);
|
||||
|
||||
// The callee returns cmd (tainted param) — 1 finding expected.
|
||||
// The callee returns cmd (tainted param), 1 finding expected.
|
||||
// The internal document.location() should NOT widen the return taint.
|
||||
assert_eq!(
|
||||
findings.len(),
|
||||
|
|
@ -5435,7 +5435,7 @@ fn inline_return_taint_internal_source_does_not_widen_caps() {
|
|||
///
|
||||
/// Two class methods share the leaf name `process` in the same file. If the
|
||||
/// summary map were keyed by bare name (or raw file-path namespace), the
|
||||
/// second lowering would overwrite the first — both methods would end up
|
||||
/// second lowering would overwrite the first, both methods would end up
|
||||
/// pointing at whichever summary was extracted last.
|
||||
///
|
||||
/// With canonical `FuncKey` identity (`container` discriminates them) both
|
||||
|
|
@ -5483,7 +5483,7 @@ class Worker {
|
|||
summaries.keys().collect::<Vec<_>>(),
|
||||
);
|
||||
|
||||
// Same invariant on the cached-bodies map — inline analysis depends on
|
||||
// Same invariant on the cached-bodies map, inline analysis depends on
|
||||
// being able to fetch the correct body by full FuncKey.
|
||||
let mut body_containers: Vec<String> = bodies
|
||||
.iter()
|
||||
|
|
@ -5593,6 +5593,7 @@ fn make_finding_for_link_test(
|
|||
path_hash,
|
||||
finding_id: String::new(),
|
||||
alternative_finding_ids: smallvec::SmallVec::new(),
|
||||
effective_sink_caps: crate::labels::Cap::empty(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -5628,7 +5629,7 @@ fn finding_id_encodes_validation_and_path_hash() {
|
|||
);
|
||||
|
||||
// Differing path_hash produces a different ID even with the same
|
||||
// (body, source, sink, validated) — the whole point of the path
|
||||
// (body, source, sink, validated), the whole point of the path
|
||||
// component in the dedup key.
|
||||
let mut u2 = make_finding_for_link_test(1, 3, 7, 0xdead_beef_0000_0002, false);
|
||||
u2.finding_id = super::make_finding_id(&u2);
|
||||
|
|
@ -5639,7 +5640,7 @@ fn finding_id_encodes_validation_and_path_hash() {
|
|||
}
|
||||
|
||||
/// `link_alternative_paths` must cross-link findings that share
|
||||
/// `(body_id, sink, source)` — so a validated flow and an unvalidated
|
||||
/// `(body_id, sink, source)`, so a validated flow and an unvalidated
|
||||
/// flow on the same source/sink pair each list the other's ID.
|
||||
#[test]
|
||||
fn link_alternative_paths_cross_references_same_body_sink_source() {
|
||||
|
|
@ -5668,18 +5669,18 @@ fn link_alternative_paths_cross_references_same_body_sink_source() {
|
|||
}
|
||||
|
||||
/// Findings that differ on `(body_id, sink, source)` are independent
|
||||
/// vulnerabilities — they must **not** end up cross-linked as
|
||||
/// vulnerabilities, they must **not** end up cross-linked as
|
||||
/// alternatives, otherwise the "alternative path" framing becomes
|
||||
/// noise.
|
||||
#[test]
|
||||
fn link_alternative_paths_does_not_link_distinct_sink_source() {
|
||||
let mut findings = vec![
|
||||
make_finding_for_link_test(1, 3, 7, 0x1111, false),
|
||||
// Different sink — independent finding, not an alternative.
|
||||
// Different sink, independent finding, not an alternative.
|
||||
make_finding_for_link_test(1, 3, 8, 0x1111, false),
|
||||
// Different source — also independent.
|
||||
// Different source, also independent.
|
||||
make_finding_for_link_test(1, 4, 7, 0x1111, false),
|
||||
// Different body — also independent.
|
||||
// Different body, also independent.
|
||||
make_finding_for_link_test(2, 3, 7, 0x1111, false),
|
||||
];
|
||||
for f in &mut findings {
|
||||
|
|
@ -5697,7 +5698,7 @@ fn link_alternative_paths_does_not_link_distinct_sink_source() {
|
|||
|
||||
/// When the same `(body, sink, source)` has three sibling findings
|
||||
/// (e.g. validated, unvalidated-path-A, unvalidated-path-B), each
|
||||
/// finding must list the other two — the group is symmetric and
|
||||
/// finding must list the other two, the group is symmetric and
|
||||
/// complete rather than a chain.
|
||||
#[test]
|
||||
fn link_alternative_paths_three_way_group() {
|
||||
|
|
@ -5726,14 +5727,14 @@ fn link_alternative_paths_three_way_group() {
|
|||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Typed call-graph devirtualisation — Phase 2 (typed_call_receivers)
|
||||
// Typed call-graph devirtualisation (typed_call_receivers)
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Phase 2: when a method call's receiver was constructed from a known
|
||||
/// when a method call's receiver was constructed from a known
|
||||
/// constructor (`File::open` → `FileHandle`), the SSA-extraction
|
||||
/// pipeline must record `(call_ordinal, "FileHandle")` on the
|
||||
/// caller's [`crate::summary::ssa_summary::SsaFuncSummary::typed_call_receivers`]
|
||||
/// so Phase 3 can devirtualise the cross-file edge.
|
||||
/// so build_call_graph can devirtualise the cross-file edge.
|
||||
///
|
||||
/// Uses Java because `FileInputStream` / `FileOutputStream` are part
|
||||
/// of the [`crate::ssa::type_facts::constructor_type`] table for Java
|
||||
|
|
@ -5779,14 +5780,14 @@ class Reader {
|
|||
);
|
||||
}
|
||||
|
||||
/// Phase 2 negative control: free-function calls (no receiver) must
|
||||
/// Negative control: free-function calls (no receiver) must
|
||||
/// never appear in `typed_call_receivers`. Even when the callee is a
|
||||
/// known type-producing constructor, it sits in the body as a Call
|
||||
/// with `receiver = None` and is not a candidate for devirtualisation.
|
||||
#[test]
|
||||
fn typed_call_receivers_skips_free_function_calls() {
|
||||
// `new FileInputStream(...)` is a constructor invocation with no
|
||||
// receiver — exactly the shape we want to ignore.
|
||||
// receiver, exactly the shape we want to ignore.
|
||||
let src = br#"
|
||||
class Maker {
|
||||
void make() {
|
||||
|
|
@ -5808,10 +5809,10 @@ class Maker {
|
|||
|
||||
// make() has zero parameters and no fresh-allocation return, so the
|
||||
// generic insertion gate skips it. The phase-2 patch only force-
|
||||
// inserts when `typed_call_receivers` is non-empty — which it
|
||||
// inserts when `typed_call_receivers` is non-empty, which it
|
||||
// isn't here, since `new FileInputStream(...)` is a free-function-
|
||||
// shaped constructor call (no SSA receiver). So either the
|
||||
// summary is absent, or — if some other side effect inserted it —
|
||||
// summary is absent, or, if some other side effect inserted it ,
|
||||
// its `typed_call_receivers` is empty. Both forms prove no
|
||||
// spurious typed entry was recorded.
|
||||
let typed = summaries
|
||||
|
|
@ -5829,7 +5830,7 @@ class Maker {
|
|||
/// Regression: nested arrow functions inside `return new Promise((res,rej)
|
||||
/// => { ... })` must be lifted as separate bodies. Before the Kind::Return
|
||||
/// arm in cfg/mod.rs called `collect_nested_function_nodes`, only the
|
||||
/// outer function (`downloadFromUri`) was extracted — the executor and
|
||||
/// outer function (`downloadFromUri`) was extracted, the executor and
|
||||
/// its inner callbacks were silently swallowed, hiding the inner gated
|
||||
/// http.get sink from classification. Motivated by CVE-2025-64430.
|
||||
#[test]
|
||||
|
|
@ -5972,7 +5973,7 @@ const handler = (req) => {
|
|||
/// The augment pass populates `downloadFromUri.summary.param_to_sink:
|
||||
/// [(0, SSRF)]` (single-hop closure-capture lift). For the handler's
|
||||
/// `helper(req.body)` call to fire, `helper.summary.param_to_sink` must
|
||||
/// also contain `[(0, SSRF)]` — but that requires `helper`'s probe to
|
||||
/// also contain `[(0, SSRF)]`, but that requires `helper`'s probe to
|
||||
/// see `downloadFromUri`'s augmented summary at resolution time.
|
||||
///
|
||||
/// Because the probe currently runs with `ssa_summaries=None`,
|
||||
|
|
@ -6065,11 +6066,198 @@ const handler = (req) => {
|
|||
/// `middle.summary.param_to_sink`, then handler's call site picks it up.
|
||||
///
|
||||
/// Today the second-pass runs only once (no fixed-point), so depth-3+
|
||||
/// is expected to NOT fire — guards against accidental fixed-point
|
||||
/// is expected to NOT fire, guards against accidental fixed-point
|
||||
/// regression that would mask an over-eager rewrite. Marked
|
||||
/// `#[ignore]` so it documents the depth limit without breaking CI.
|
||||
/// Motivated by CVE-2025-64430 corner case; remove the `#[ignore]` and
|
||||
/// any guarding `assert!` polarity if a fixed-point is added later.
|
||||
/// Indirect-validator branch narrowing: when an if-condition is a
|
||||
/// bare result variable whose reaching SSA def is a Call to a
|
||||
/// callee classified by `classify_input_validator_callee` (e.g.
|
||||
/// `validateUrlSsrf`, `verifyToken`, `isValidUrl`), the validator's
|
||||
/// argument is treated as validated on the success branch.
|
||||
///
|
||||
/// This pins the SSA-level
|
||||
/// `apply_input_validator_branch_narrowing` regardless of whether
|
||||
/// downstream consumers (sink-arg taint, cfg-unguarded-sink) honor
|
||||
/// `validated_must`. Test asserts the symbol-keyed validation flag
|
||||
/// is set on the analysis exit state.
|
||||
///
|
||||
/// Direct-flow shape (no helper indirection); the helper-summary
|
||||
/// case still has open architectural gaps (validated_must doesn't
|
||||
/// propagate through `param_to_sink` summaries, same gap blocks
|
||||
/// AllowlistCheck-in-helper, see CVE_DEFERRED.md GHSA-4x48-cgf9-q33f).
|
||||
///
|
||||
/// Motivated by Novu CVE GHSA-4x48-cgf9-q33f
|
||||
/// (`const ssrfError = await validateUrlSsrf(child.webhookUrl); if (ssrfError) throw …;`).
|
||||
#[test]
|
||||
fn indirect_validator_narrowing_marks_arg_validated() {
|
||||
let src = br#"
|
||||
async function handler(req) {
|
||||
const target = req.query.url;
|
||||
const ssrfError = await validateUrlSsrf(target);
|
||||
if (ssrfError) {
|
||||
throw new Error('blocked');
|
||||
}
|
||||
await axios.get(target);
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
// Direct-flow: validator narrowing should clear axios.get's taint event.
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"validator narrowing should suppress direct-flow SSRF; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: `extract_ssa_func_summary` must skip `all_validated`
|
||||
/// events when populating `param_to_sink` / `param_to_sink_param`.
|
||||
///
|
||||
/// Helper bodies whose validator-call branch narrowing fired produce
|
||||
/// per-param probe events flagged `all_validated=true`. Without
|
||||
/// summary-extract suppression, callers would still see the helper
|
||||
/// in their summary's sink set and refire on `helper(taintedArg)`
|
||||
/// even though the validator inside the helper proved the path
|
||||
/// safe. The caller can't see the validator (it's behind the
|
||||
/// summary), so the gap manifests as a precision miss only when
|
||||
/// helper + caller are in the same file.
|
||||
///
|
||||
/// Closes the helper-summary half of Novu CVE GHSA-4x48-cgf9-q33f.
|
||||
#[test]
|
||||
fn helper_with_validator_does_not_propagate_to_caller_via_summary() {
|
||||
let src = br#"
|
||||
async function getWebhookResponse(child) {
|
||||
const ssrfError = await validateUrlSsrf(child.webhookUrl);
|
||||
if (ssrfError) {
|
||||
throw new Error('blocked');
|
||||
}
|
||||
return await axios.post(child.webhookUrl, {});
|
||||
}
|
||||
|
||||
async function handler(req) {
|
||||
const child = req.body.filter;
|
||||
const r = await getWebhookResponse(child);
|
||||
return r;
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
findings.is_empty(),
|
||||
"helper-with-validator should not propagate sink via summary; got {} finding(s)",
|
||||
findings.len()
|
||||
);
|
||||
}
|
||||
|
||||
/// Companion: same shape WITHOUT the validator inside the helper
|
||||
/// must still fire so the precision gain is targeted. Asserts
|
||||
/// `all_validated` skip doesn't accidentally suppress unsafe helpers.
|
||||
#[test]
|
||||
fn helper_without_validator_still_propagates_to_caller_via_summary() {
|
||||
let src = br#"
|
||||
async function getWebhookResponse(child) {
|
||||
return await axios.post(child.webhookUrl, {});
|
||||
}
|
||||
|
||||
async function handler(req) {
|
||||
const child = req.body.filter;
|
||||
const r = await getWebhookResponse(child);
|
||||
return r;
|
||||
}
|
||||
"#;
|
||||
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
|
||||
let file_cfg = parse_lang(src, "javascript", lang);
|
||||
let summaries = &file_cfg.summaries;
|
||||
let findings = analyse_file(
|
||||
&file_cfg,
|
||||
summaries,
|
||||
None,
|
||||
Lang::JavaScript,
|
||||
"test.js",
|
||||
&[],
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!findings.is_empty(),
|
||||
"helper-without-validator must still flag the cross-fn SSRF path",
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: `validate*`-named callees match
|
||||
/// `InputValidatorPolarity::ErrorReturning`, bare `if (err) throw`
|
||||
/// guards the success branch (false branch). `is_valid*`/`is_safe*`
|
||||
/// callees match `InputValidatorPolarity::BooleanTrueIsValid`, bare
|
||||
/// `if (!ok) throw` guards the success branch (true branch via
|
||||
/// `condition_negated`).
|
||||
#[test]
|
||||
fn classify_input_validator_callee_polarity_buckets() {
|
||||
use crate::ssa::type_facts::{InputValidatorPolarity, classify_input_validator_callee};
|
||||
|
||||
// ErrorReturning bucket
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("validateUrlSsrf"),
|
||||
Some(InputValidatorPolarity::ErrorReturning)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("verifyToken"),
|
||||
Some(InputValidatorPolarity::ErrorReturning)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("validate_url"),
|
||||
Some(InputValidatorPolarity::ErrorReturning)
|
||||
);
|
||||
|
||||
// BooleanTrueIsValid bucket
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("isValidUrl"),
|
||||
Some(InputValidatorPolarity::BooleanTrueIsValid)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("is_valid_email"),
|
||||
Some(InputValidatorPolarity::BooleanTrueIsValid)
|
||||
);
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("isSafe"),
|
||||
Some(InputValidatorPolarity::BooleanTrueIsValid)
|
||||
);
|
||||
|
||||
// Negative, names that look like validators but are auth-flavored
|
||||
// (`checkPermissions`, `is_authorized`) are intentionally not
|
||||
// matched here; they have separate semantics in the auth pipeline.
|
||||
assert_eq!(classify_input_validator_callee("checkPermissions"), None);
|
||||
assert_eq!(classify_input_validator_callee("is_authorized"), None);
|
||||
assert_eq!(classify_input_validator_callee("randomThing"), None);
|
||||
|
||||
// Path-prefix peeling: `obj.validateXxx` should classify the same
|
||||
// as the bare callee.
|
||||
assert_eq!(
|
||||
classify_input_validator_callee("validator.validateUrlSsrf"),
|
||||
Some(InputValidatorPolarity::ErrorReturning)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn cve_2025_64430_three_hop_transitive_documents_depth_limit() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue