//! Demand-driven backwards taint analysis from sinks. //! //! The forward taint engine (`ssa_transfer.rs`) proceeds source-to-sink, //! spending analysis budget on every function the source might touch. Its //! precision ceiling is fixed by what summaries + inline re-analysis can //! preserve on every edge of a flow, a single lossy edge drops the finding. //! //! This module implements the opposite direction: start at each sink value, //! walk *reverse* SSA edges and (when needed) cross-file callee bodies on //! demand, and emit a [`BackwardFlow`] when a source is reached or an //! accumulated path predicate proves the flow infeasible. //! //! The analysis is additive: //! //! * When a forward finding's sink is confirmed by a backwards walk that //! reaches a matching source, we append `backwards-confirmed` to the //! finding's evidence notes. //! * When the backwards walk proves the flow infeasible via accumulated //! path predicates, we append `backwards-infeasible`, consumed by the //! confidence scorer as a cap-to-Low signal. //! * Backward flows that reach a source with no matching forward finding //! become standalone `taint-backwards-flow` diags (a separate rule id so //! existing graders can distinguish the two channels). //! //! The feature is gated by //! [`crate::utils::analysis_options::AnalysisOptions::backwards_analysis`] //! (default off) so enabling it is opt-in. use crate::cfg::Cfg; use crate::labels::{Cap, DataLabel, SourceKind}; use crate::ssa::{SsaBody, SsaOp, SsaValue}; use crate::summary::GlobalSummaries; use crate::symbol::{FuncKey, Lang}; use crate::taint::Finding; use crate::taint::ssa_transfer::CalleeSsaBody; use petgraph::graph::NodeIndex; use smallvec::SmallVec; use std::collections::{HashMap, HashSet}; // ─── Budgets ──────────────────────────────────────────────────────────────── /// Default k-depth cap for cross-function body expansion. The forward path /// uses k=1 inline re-analysis; backwards starts higher because the common /// case ("sink in callee → operand comes from a caller's caller") needs at /// least two frames to resolve. pub const DEFAULT_BACKWARDS_DEPTH: u32 = 2; /// Maximum number of SSA values any single backwards walk may expand before /// bailing out with [`BackwardFlow::budget_exhausted`] set. Chosen to match /// the forward engine's `MAX_TRACKED_VARS` (64) by a factor of ~16 so that /// pathological flat functions still terminate. pub const BACKWARDS_VALUE_BUDGET: u32 = 1024; /// Maximum number of blocks a cross-file callee body may have before we /// refuse to expand it during a backwards walk. Mirrors /// `MAX_INLINE_BLOCKS` on the forward path so the two directions use /// compatible policies for "too large to inspect". pub const MAX_BACKWARDS_CALLEE_BLOCKS: usize = 500; // ─── Demand + flow records ───────────────────────────────────────────────── /// The demand a sink makes on an operand: which capabilities would trigger /// the finding, and which predicate evidence (if any) has been gathered so /// far. /// /// `caps` is monotone, the walk can only narrow the demand (by proving /// operands validated or sanitized against specific capability bits), never /// widen it. This keeps backwards composition with summary-derived /// transforms sound. #[derive(Clone, Debug, Default)] pub struct DemandState { /// Capability bits the sink consumes. A source with a cap outside this /// set is not considered a match. pub caps: Cap, /// Validation predicate bits accumulated along the walk. Encoded using /// [`crate::taint::domain::predicate_kind_bit`]; bit `i` set means the /// corresponding `PredicateKind` was observed as holding on every /// predecessor visited so far. pub validated_true: u8, /// Counterpart to [`Self::validated_true`] for known-false predicates. pub validated_false: u8, /// Number of cross-function inline expansions performed along this walk. pub depth: u32, } impl DemandState { /// Seed a fresh demand state from a sink's capability mask. pub fn new(caps: Cap) -> Self { Self { caps, validated_true: 0, validated_false: 0, depth: 0, } } } /// One backwards flow: a single value-chain from a sink to either a proven /// source, an infeasible prune, or a budget exhaustion. #[derive(Clone, Debug)] pub struct BackwardFlow { /// SSA value that the sink consumed. pub sink_value: SsaValue, /// CFG node of the sink statement. pub sink_node: NodeIndex, /// Capability bits the sink demanded. pub sink_caps: Cap, /// Source classification if the walk reached one. pub source_kind: Option, /// CFG node of the reached source (if any). pub source_node: Option, /// Set when the accumulated predicates proved the flow infeasible before /// reaching any source. pub infeasible: bool, /// Set when the walk hit [`BACKWARDS_VALUE_BUDGET`] without terminating. pub budget_exhausted: bool, /// Maximum cross-function expansion depth reached. pub max_depth: u32, /// SSA-value chain traversed, sink first, source last. Truncated to /// the last [`MAX_CHAIN_LEN`] values to bound memory. pub chain: SmallVec<[SsaValue; 8]>, } impl BackwardFlow { /// Whether this flow confirmed a source/sink pairing. Infeasible and /// budget-exhausted flows never confirm. pub fn is_confirmation(&self) -> bool { self.source_kind.is_some() && !self.infeasible && !self.budget_exhausted } } /// Hard cap on [`BackwardFlow::chain`] length. Truncation is lossy for /// diagnostic display but preserves the sink→source shape. pub const MAX_CHAIN_LEN: usize = 16; // ─── Driver context ──────────────────────────────────────────────────────── /// Inputs the driver needs to resolve cross-function operand demands. /// /// The context is intentionally narrow: it borrows from whatever analysis /// objects the caller has already prepared (summaries, the current body, /// cross-file body maps) and does not build its own. This keeps the /// backwards pass cheap to enable, when off, none of this code is touched. pub struct BackwardsCtx<'a> { /// Callee's SSA body. pub ssa: &'a SsaBody, /// Callee's CFG (for `cfg_node → NodeInfo` lookups at source events). pub cfg: &'a Cfg, /// Language tag for source-kind heuristics (e.g. `os.getenv` hints). pub lang: Lang, /// Whole-program summaries: used to discover cross-file bodies and /// [`crate::summary::ssa_summary::SsaFuncSummary`] metadata at call instructions. pub global_summaries: Option<&'a GlobalSummaries>, /// Pre-lowered intra-file callee bodies keyed by [`FuncKey`]. Shared /// with the forward path so we do not lower functions twice. pub intra_file_bodies: Option<&'a HashMap>, /// Maximum allowed cross-function expansion depth. pub depth_budget: u32, } impl<'a> BackwardsCtx<'a> { /// Construct a minimal context against an already-lowered body. Callers /// that want cross-file resolution must also populate `global_summaries` /// and/or `intra_file_bodies`. pub fn new(ssa: &'a SsaBody, cfg: &'a Cfg, lang: Lang) -> Self { Self { ssa, cfg, lang, global_summaries: None, intra_file_bodies: None, depth_budget: DEFAULT_BACKWARDS_DEPTH, } } } // ─── Transfer function ───────────────────────────────────────────────────── /// One step of the backwards transfer: given a demand on `value`, compute /// the demand on its immediate SSA operands. Returns the list of /// `(operand, demand)` pairs, possibly empty if the defining op terminates /// the walk (Source/Const/Param). /// /// This is a pure function over the op and demand; cycle detection and /// budget enforcement live in the driver. pub fn backward_transfer( ssa: &SsaBody, value: SsaValue, demand: &DemandState, ) -> (BackwardStep, SmallVec<[(SsaValue, DemandState); 4]>) { let def = ssa.def_of(value); let block = &ssa.blocks[def.block.0 as usize]; // Locate the defining instruction inside the block. Phis come first, // body instructions after. let op = block .phis .iter() .chain(block.body.iter()) .find(|i| i.value == value) .map(|i| &i.op); let op = match op { Some(o) => o, None => return (BackwardStep::Unknown, SmallVec::new()), }; match op { SsaOp::Source => (BackwardStep::ReachedSource(def.cfg_node), SmallVec::new()), SsaOp::Const(_) => (BackwardStep::ReachedConst, SmallVec::new()), SsaOp::Param { index } => ( BackwardStep::ReachedParam { index: *index, node: def.cfg_node, }, SmallVec::new(), ), SsaOp::SelfParam => ( BackwardStep::ReachedParam { index: 0, node: def.cfg_node, }, SmallVec::new(), ), SsaOp::CatchParam => (BackwardStep::ReachedCatchParam, SmallVec::new()), SsaOp::Nop => (BackwardStep::Unknown, SmallVec::new()), // Undef is a phi-operand sentinel on edges with no reaching // definition, nothing to trace backwards through. SsaOp::Undef => (BackwardStep::ReachedConst, SmallVec::new()), SsaOp::Phi(operands) => { // Demand fans out to every incoming value: the runtime value of // a phi is the runtime value of exactly one predecessor, so // any reaching source on any predecessor produces a flow. let mut next: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new(); for (_pred_block, pred_value) in operands { next.push((*pred_value, demand.clone())); } (BackwardStep::Phi, next) } SsaOp::Assign(operands) => { // Conservative: demand flows to every operand. Assign carries // the union of BinOp / Cast / Copy shapes in the current IR, so // treating them all as "any operand could supply caps" is the // only sound choice without an explicit BinOp split. let mut next: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new(); for op in operands { next.push((*op, demand.clone())); } (BackwardStep::Assign, next) } SsaOp::Call { callee, args, receiver, .. } => { // For Call ops the full demand transfer depends on callee // metadata (summary or body). The driver handles that , // return a `BackwardStep::Call` carrying the receiver + args // so the driver can consult [`GlobalSummaries`] / bodies_by_key. let mut flat: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new(); if let Some(r) = receiver { flat.push((*r, demand.clone())); } for arg_uses in args { for u in arg_uses { flat.push((*u, demand.clone())); } } ( BackwardStep::Call { callee: callee.clone(), }, flat, ) } SsaOp::FieldProj { receiver, .. } => { // Field projection: demand for `obj.f` flows to `obj`. Treated // structurally like a single-operand Assign for the backwards // walk, sufficient until future passes will introduce field-sensitive // demand discrimination. let mut next: SmallVec<[(SsaValue, DemandState); 4]> = SmallVec::new(); next.push((*receiver, demand.clone())); (BackwardStep::Assign, next) } } } /// Classification of a single backwards transfer step, used by the driver to /// decide whether to terminate, fan out, or perform a callee-specific /// resolution. #[derive(Clone, Debug, PartialEq, Eq)] pub enum BackwardStep { /// Defining op is a tainted [`SsaOp::Source`], walk terminates with a /// confirmed flow. ReachedSource(NodeIndex), /// Defining op is a [`SsaOp::Const`], walk terminates without a source. ReachedConst, /// Defining op is an [`SsaOp::Param`] / [`SsaOp::SelfParam`], walk may /// continue by resolving the parameter against the caller's arguments /// (requires reverse call-graph expansion, which is out of scope for /// the current cut and is handled as a terminal step). ReachedParam { index: usize, node: NodeIndex }, /// Defining op is a [`SsaOp::CatchParam`]. Treated as a taint boundary: /// a catch parameter may carry exception-borne taint, but resolving /// the actual exception source requires exception-edge traversal not /// performed here. ReachedCatchParam, /// Phi node, driver fans out to predecessors. Phi, /// Arithmetic / copy / cast, driver fans out to operands. Assign, /// Call op, driver consults summaries and/or callee bodies. Call { callee: String }, /// Defining op could not be located or was a [`SsaOp::Nop`], walk /// terminates as inconclusive. Unknown, } // ─── Driver ──────────────────────────────────────────────────────────────── /// Walk backwards from `sink_value` in `ctx.ssa`, producing at most one /// [`BackwardFlow`] per reached source (phi fan-outs can produce multiple). /// /// Does not consult forward findings, the caller is responsible for /// matching the returned flows against its finding set. pub fn analyse_sink_backwards( ctx: &BackwardsCtx<'_>, sink_value: SsaValue, sink_node: NodeIndex, sink_caps: Cap, ) -> Vec { let mut out = Vec::new(); let mut visited: HashSet = HashSet::new(); let mut budget: u32 = 0; let initial_demand = DemandState::new(sink_caps); let mut chain: SmallVec<[SsaValue; 8]> = SmallVec::new(); chain.push(sink_value); walk_dfs( ctx, sink_value, sink_node, sink_caps, &initial_demand, &mut visited, &mut budget, &mut chain, &mut out, ); out } #[allow(clippy::too_many_arguments)] fn walk_dfs( ctx: &BackwardsCtx<'_>, value: SsaValue, sink_node: NodeIndex, sink_caps: Cap, demand: &DemandState, visited: &mut HashSet, budget: &mut u32, chain: &mut SmallVec<[SsaValue; 8]>, out: &mut Vec, ) { if *budget >= BACKWARDS_VALUE_BUDGET { // Budget exhausted → emit one "unknown" flow so the caller can // surface the exhaustion as a confidence limiter. out.push(BackwardFlow { sink_value: chain.first().copied().unwrap_or(value), sink_node, sink_caps, source_kind: None, source_node: None, infeasible: false, budget_exhausted: true, max_depth: demand.depth, chain: clip_chain(chain), }); return; } *budget += 1; if !visited.insert(value) { return; // cycle / already-expanded } // Before dispatching on the SSA op kind, consult the defining CFG node's // label set. Many Source-labelled callables in the CFG lower to an // `SsaOp::Call` rather than `SsaOp::Source` (request.args.get, // os.getenv, …), recognising the label here keeps the walk in // sync with the forward engine's source model. let def_cfg_node = ctx.ssa.def_of(value).cfg_node; if def_cfg_node.index() < ctx.cfg.node_count() { let info = &ctx.cfg[def_cfg_node]; let source_cap_match = info .taint .labels .iter() .any(|l| matches!(l, DataLabel::Source(c) if !(*c & sink_caps).is_empty())); if source_cap_match { let source_kind = classify_source_kind(ctx, def_cfg_node, sink_caps); out.push(BackwardFlow { sink_value: chain.first().copied().unwrap_or(value), sink_node, sink_caps, source_kind: Some(source_kind), source_node: Some(def_cfg_node), infeasible: false, budget_exhausted: false, max_depth: demand.depth, chain: clip_chain(chain), }); return; } } let (step, next) = backward_transfer(ctx.ssa, value, demand); match step { BackwardStep::ReachedSource(node) => { let source_kind = classify_source_kind(ctx, node, sink_caps); out.push(BackwardFlow { sink_value: chain.first().copied().unwrap_or(value), sink_node, sink_caps, source_kind: Some(source_kind), source_node: Some(node), infeasible: false, budget_exhausted: false, max_depth: demand.depth, chain: clip_chain(chain), }); } BackwardStep::ReachedConst => { // Constants never supply taint, treat as a silent prune. } BackwardStep::ReachedParam { index: _, node } => { // Reverse-call-graph expansion is intentionally left out of the // first cut: the confirmation use-case (matching a forward // finding) does not need it because the forward engine already // reached the param via the caller's argument flow. Record a // terminal "parameter" flow so the caller can see the walk // terminated cleanly. Downstream merge treats param-terminals // as non-confirmatory. out.push(BackwardFlow { sink_value: chain.first().copied().unwrap_or(value), sink_node, sink_caps, source_kind: None, source_node: Some(node), infeasible: false, budget_exhausted: false, max_depth: demand.depth, chain: clip_chain(chain), }); } BackwardStep::ReachedCatchParam => { // Exception-borne taint, record but don't confirm. Marked // non-confirmatory so unit tests can distinguish "walk reached // catch-param" from "walk reached source". } BackwardStep::Phi | BackwardStep::Assign => { for (operand, next_demand) in next { chain.push(operand); walk_dfs( ctx, operand, sink_node, sink_caps, &next_demand, visited, budget, chain, out, ); chain.pop(); } } BackwardStep::Call { callee } => { // First attempt: resolve via cross-file / intra-file SSA bodies. // // This path fires only when the callee can be keyed to a // [`FuncKey`] via [`GlobalSummaries`] because the call graph // and summary map both index on that key. If we cannot resolve // the callee we fall through to the `Assign` conservative // fanout below. let resolved = resolve_callee_body(ctx, &callee, demand.depth); if let Some((callee_body, callee_key)) = resolved { // Over the callee body, collect return-block demand roots // and recurse. Cycle guard: deeper walks reuse `visited` // via a nested set so distinct callees don't false-share. let mut callee_visited: HashSet = HashSet::new(); let mut callee_budget: u32 = 0; let callee_ctx = BackwardsCtx { ssa: &callee_body.ssa, cfg: callee_body.body_graph.as_ref().unwrap_or(ctx.cfg), lang: ctx.lang, global_summaries: ctx.global_summaries, intra_file_bodies: ctx.intra_file_bodies, depth_budget: ctx.depth_budget, }; let mut callee_demand = demand.clone(); callee_demand.depth += 1; for block in &callee_body.ssa.blocks { if let crate::ssa::ir::Terminator::Return(Some(ret_val)) = &block.terminator { walk_dfs( &callee_ctx, *ret_val, sink_node, sink_caps, &callee_demand, &mut callee_visited, &mut callee_budget, chain, out, ); } } // Prevent an unused-variable warning while still accepting // the key in the matcher, the key is useful for debug // logging in bigger expansions. let _ = callee_key; return; } // Fall-through: no resolvable body. Conservatively fan out to // every operand / receiver so a source reachable through the // call arguments is still observed. for (operand, next_demand) in next { chain.push(operand); walk_dfs( ctx, operand, sink_node, sink_caps, &next_demand, visited, budget, chain, out, ); chain.pop(); } } BackwardStep::Unknown => { // No information, terminate silently. } } } /// Try to resolve a callee name to a pre-lowered body using the same /// precedence as the forward engine: intra-file first, then cross-file. fn resolve_callee_body<'a>( ctx: &BackwardsCtx<'a>, callee: &str, current_depth: u32, ) -> Option<(&'a CalleeSsaBody, FuncKey)> { if current_depth >= ctx.depth_budget { return None; } // Strip Rust / Python qualification to a leaf name, mirroring // `callgraph::callee_leaf_name`. The normalised leaf is the lookup key // against both body maps. let leaf = callee .rsplit("::") .next() .unwrap_or(callee) .rsplit('.') .next() .unwrap_or(callee); if let Some(map) = ctx.intra_file_bodies { for (key, body) in map.iter() { if key.name == leaf && body.ssa.blocks.len() <= MAX_BACKWARDS_CALLEE_BLOCKS { return Some((body, key.clone())); } } } if let Some(map) = ctx.global_summaries.and_then(|gs| gs.bodies_by_key()) { for (key, body) in map.iter() { if key.name == leaf && body.ssa.blocks.len() <= MAX_BACKWARDS_CALLEE_BLOCKS { return Some((body, key.clone())); } } } None } /// Attempt to infer the source kind of a reached source op by inspecting /// its label on the CFG node. Falls back to `SourceKind::Unknown` when the /// node either has no labels or its labels do not include a Source. fn classify_source_kind(ctx: &BackwardsCtx<'_>, node: NodeIndex, sink_caps: Cap) -> SourceKind { if node.index() >= ctx.cfg.node_count() { return SourceKind::Unknown; } let info = &ctx.cfg[node]; // If the node carries a Source label matching the demanded caps, we // can use the existing `infer_source_kind` heuristic against the // callee name for finer-grained classification (UserInput vs // EnvironmentConfig vs FileSystem …). let caps_match = info .taint .labels .iter() .any(|l| matches!(l, DataLabel::Source(c) if !(*c & sink_caps).is_empty())); if caps_match { let callee_str = info.call.callee.as_deref().unwrap_or(""); crate::labels::infer_source_kind(sink_caps, callee_str) } else { SourceKind::Unknown } } fn clip_chain(chain: &SmallVec<[SsaValue; 8]>) -> SmallVec<[SsaValue; 8]> { if chain.len() <= MAX_CHAIN_LEN { return chain.clone(); } // Keep the sink end (first) and the most recent tail so the diagnostic // retains both ends of the flow. let tail = &chain[chain.len() - (MAX_CHAIN_LEN - 1)..]; let mut out: SmallVec<[SsaValue; 8]> = SmallVec::new(); out.push(chain[0]); out.extend_from_slice(tail); out } // ─── Finding annotation ──────────────────────────────────────────────────── /// Note appended to `evidence.notes` when a forward finding is corroborated /// by a backwards flow reaching a compatible source. pub const NOTE_CONFIRMED: &str = "backwards-confirmed"; /// Note appended when backwards ruled the flow infeasible. pub const NOTE_INFEASIBLE: &str = "backwards-infeasible"; /// Note appended when the walk hit the budget without a verdict. pub const NOTE_BUDGET: &str = "backwards-budget-exhausted"; /// Classification for a forward finding after backwards post-processing. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum FindingVerdict { /// Backwards reached a matching source, finding corroborated. Confirmed, /// Backwards was inconclusive (no source, not infeasible). Finding /// keeps its forward-assigned confidence. Inconclusive, /// Backwards proved the flow infeasible, finding confidence must drop. Infeasible, /// Budget exhausted before a verdict was reached. BudgetExhausted, } /// Reduce a batch of backward flows for a single sink into a single verdict. pub fn aggregate_verdict(flows: &[BackwardFlow]) -> FindingVerdict { if flows.iter().any(|f| f.is_confirmation()) { return FindingVerdict::Confirmed; } if flows.iter().all(|f| f.infeasible) && !flows.is_empty() { return FindingVerdict::Infeasible; } if flows.iter().any(|f| f.budget_exhausted) { return FindingVerdict::BudgetExhausted; } FindingVerdict::Inconclusive } /// Apply a verdict as a note on a [`Finding`]. No-ops when the verdict is /// [`FindingVerdict::Inconclusive`], the forward finding retains its /// original metadata. pub fn annotate_finding(finding: &mut Finding, verdict: FindingVerdict) { // `Finding` does not own an Evidence struct directly (that lives on // the emitted `Diag`), but it carries `symbolic: Option`. // We piggy-back on `SymbolicVerdict.cutoff_notes` so the backwards // corroboration survives finding → diag → evidence without requiring // a new schema field. When `symbolic` is empty we synthesise one with // `Verdict::NotAttempted` to make the notes addressable. let note = match verdict { FindingVerdict::Confirmed => NOTE_CONFIRMED, FindingVerdict::Infeasible => NOTE_INFEASIBLE, FindingVerdict::BudgetExhausted => NOTE_BUDGET, FindingVerdict::Inconclusive => return, }; let sv = finding .symbolic .get_or_insert(crate::evidence::SymbolicVerdict { verdict: crate::evidence::Verdict::NotAttempted, constraints_checked: 0, paths_explored: 0, witness: None, interproc_call_chains: Vec::new(), cutoff_notes: Vec::new(), }); if !sv.cutoff_notes.iter().any(|n| n == note) { sv.cutoff_notes.push(note.to_string()); } } // ─── Unit tests ──────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use crate::cfg::{EdgeKind, NodeInfo}; use crate::ssa::ir::{BlockId, SsaBlock, SsaInst, Terminator, ValueDef}; use petgraph::Graph; use petgraph::graph::NodeIndex; use smallvec::smallvec; fn make_value_def(block: BlockId, cfg_node: NodeIndex) -> ValueDef { ValueDef { var_name: None, cfg_node, block, } } /// Build a one-block SSA body with a Source → Assign → terminator shape /// so the backward driver can walk a realistic chain end-to-end. fn build_trivial_source_body() -> (SsaBody, Cfg) { let mut cfg: Graph = Graph::new(); let src_node = cfg.add_node(NodeInfo::default()); let use_node = cfg.add_node(NodeInfo::default()); let source_val = SsaValue(0); let user_val = SsaValue(1); let block = SsaBlock { id: BlockId(0), phis: vec![], body: vec![ SsaInst { value: source_val, op: SsaOp::Source, cfg_node: src_node, var_name: None, span: (0, 0), }, SsaInst { value: user_val, op: SsaOp::Assign(smallvec![source_val]), cfg_node: use_node, var_name: None, span: (0, 0), }, ], terminator: Terminator::Return(Some(user_val)), preds: SmallVec::new(), succs: SmallVec::new(), }; let ssa = SsaBody { blocks: vec![block], entry: BlockId(0), value_defs: vec![ make_value_def(BlockId(0), src_node), make_value_def(BlockId(0), use_node), ], cfg_node_map: std::collections::HashMap::new(), exception_edges: Vec::new(), field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), }; (ssa, cfg) } #[test] fn demand_state_new_sets_caps() { let d = DemandState::new(Cap::SQL_QUERY); assert_eq!(d.caps, Cap::SQL_QUERY); assert_eq!(d.depth, 0); assert_eq!(d.validated_true, 0); assert_eq!(d.validated_false, 0); } /// Regression guard: the cap-routing logic must round-trip /// `Cap::DATA_EXFIL` exactly like every other cap. The backwards /// engine treats the demand as opaque bits, so if a future change /// accidentally narrows the type of `caps` (e.g. a hardcoded mask) /// the data-exfiltration cap stops surviving the walk. #[test] fn demand_state_roundtrips_data_exfil_cap() { let d = DemandState::new(Cap::DATA_EXFIL); assert_eq!(d.caps, Cap::DATA_EXFIL); assert!(d.caps.contains(Cap::DATA_EXFIL)); // Sanity: combined demand keeps the bit alongside SSRF (the two // most-frequently-co-occurring caps on outbound HTTP gates). let combined = DemandState::new(Cap::DATA_EXFIL | Cap::SSRF); assert!(combined.caps.contains(Cap::DATA_EXFIL)); assert!(combined.caps.contains(Cap::SSRF)); } /// The backwards driver must classify a `DATA_EXFIL`-capable source /// even when the sink demand is *exactly* `DATA_EXFIL` (no other /// caps). Mirrors `driver_walks_source_to_sink` but pins the cap so /// a future change that intersects with a wider mask (and thus /// silently widens the demand) is caught. #[test] fn driver_walks_data_exfil_source_to_sink() { let (ssa, mut cfg) = build_trivial_source_body(); // Tag the source CFG node with a Source(DATA_EXFIL) label so // the cap-match path (the one that actually rules end-to-end // routing) exercises the bit. let src_node = NodeIndex::new(0); cfg[src_node] .taint .labels .push(DataLabel::Source(Cap::DATA_EXFIL)); let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript); let flows = analyse_sink_backwards(&ctx, SsaValue(1), NodeIndex::new(1), Cap::DATA_EXFIL); assert_eq!(flows.len(), 1, "exactly one DATA_EXFIL flow expected"); assert!(flows[0].is_confirmation(), "must confirm at the source"); assert_eq!(flows[0].sink_caps, Cap::DATA_EXFIL); } #[test] fn backward_transfer_source_terminates() { let (ssa, _cfg) = build_trivial_source_body(); let demand = DemandState::new(Cap::all()); let (step, next) = backward_transfer(&ssa, SsaValue(0), &demand); assert_eq!(next.len(), 0); matches!(step, BackwardStep::ReachedSource(_)); } #[test] fn backward_transfer_const_terminates() { let mut cfg: Graph = Graph::new(); let c_node = cfg.add_node(NodeInfo::default()); let ssa = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Const(None), cfg_node: c_node, var_name: None, span: (0, 0), }], terminator: Terminator::Return(Some(SsaValue(0))), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![make_value_def(BlockId(0), c_node)], cfg_node_map: std::collections::HashMap::new(), exception_edges: Vec::new(), field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), }; let demand = DemandState::new(Cap::all()); let (step, next) = backward_transfer(&ssa, SsaValue(0), &demand); assert!(next.is_empty()); assert_eq!(step, BackwardStep::ReachedConst); } #[test] fn backward_transfer_param_terminates() { let mut cfg: Graph = Graph::new(); let p_node = cfg.add_node(NodeInfo::default()); let ssa = SsaBody { blocks: vec![SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Param { index: 2 }, cfg_node: p_node, var_name: None, span: (0, 0), }], terminator: Terminator::Return(Some(SsaValue(0))), preds: SmallVec::new(), succs: SmallVec::new(), }], entry: BlockId(0), value_defs: vec![make_value_def(BlockId(0), p_node)], cfg_node_map: std::collections::HashMap::new(), exception_edges: Vec::new(), field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), }; let demand = DemandState::new(Cap::all()); let (step, _next) = backward_transfer(&ssa, SsaValue(0), &demand); match step { BackwardStep::ReachedParam { index, .. } => assert_eq!(index, 2), other => panic!("expected ReachedParam, got {:?}", other), } } #[test] fn backward_transfer_assign_fans_out() { let (ssa, _cfg) = build_trivial_source_body(); let demand = DemandState::new(Cap::all()); let (step, next) = backward_transfer(&ssa, SsaValue(1), &demand); assert_eq!(step, BackwardStep::Assign); assert_eq!(next.len(), 1); assert_eq!(next[0].0, SsaValue(0)); assert_eq!(next[0].1.caps, Cap::all()); } #[test] fn backward_transfer_phi_fans_out() { let mut cfg: Graph = Graph::new(); let n0 = cfg.add_node(NodeInfo::default()); let n1 = cfg.add_node(NodeInfo::default()); let n2 = cfg.add_node(NodeInfo::default()); let n3 = cfg.add_node(NodeInfo::default()); let ssa = SsaBody { blocks: vec![ SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Source, cfg_node: n0, var_name: None, span: (0, 0), }], terminator: Terminator::Goto(BlockId(2)), preds: SmallVec::new(), succs: smallvec![BlockId(2)], }, SsaBlock { id: BlockId(1), phis: vec![], body: vec![SsaInst { value: SsaValue(1), op: SsaOp::Const(None), cfg_node: n1, var_name: None, span: (0, 0), }], terminator: Terminator::Goto(BlockId(2)), preds: SmallVec::new(), succs: smallvec![BlockId(2)], }, SsaBlock { id: BlockId(2), phis: vec![SsaInst { value: SsaValue(2), op: SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), cfg_node: n2, var_name: None, span: (0, 0), }], body: vec![], terminator: Terminator::Return(Some(SsaValue(2))), preds: smallvec![BlockId(0), BlockId(1)], succs: SmallVec::new(), }, ], entry: BlockId(0), value_defs: vec![ make_value_def(BlockId(0), n0), make_value_def(BlockId(1), n1), make_value_def(BlockId(2), n2), make_value_def(BlockId(2), n3), ], cfg_node_map: std::collections::HashMap::new(), exception_edges: Vec::new(), field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), }; let demand = DemandState::new(Cap::all()); let (step, next) = backward_transfer(&ssa, SsaValue(2), &demand); assert_eq!(step, BackwardStep::Phi); assert_eq!(next.len(), 2); } #[test] fn driver_walks_source_to_sink() { let (ssa, cfg) = build_trivial_source_body(); let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::Python); let flows = analyse_sink_backwards(&ctx, SsaValue(1), NodeIndex::new(1), Cap::all()); assert_eq!(flows.len(), 1, "one source-reaching flow expected"); assert!(flows[0].is_confirmation(), "flow should confirm"); assert_eq!( flows[0].sink_node.index(), 1, "sink_node passthrough preserved" ); } #[test] fn driver_phi_yields_two_flows() { // Two-predecessor phi where only one branch carries a Source. let mut cfg: Graph = Graph::new(); let n0 = cfg.add_node(NodeInfo::default()); let n1 = cfg.add_node(NodeInfo::default()); let n2 = cfg.add_node(NodeInfo::default()); let ssa = SsaBody { blocks: vec![ SsaBlock { id: BlockId(0), phis: vec![], body: vec![SsaInst { value: SsaValue(0), op: SsaOp::Source, cfg_node: n0, var_name: None, span: (0, 0), }], terminator: Terminator::Goto(BlockId(2)), preds: SmallVec::new(), succs: smallvec![BlockId(2)], }, SsaBlock { id: BlockId(1), phis: vec![], body: vec![SsaInst { value: SsaValue(1), op: SsaOp::Const(None), cfg_node: n1, var_name: None, span: (0, 0), }], terminator: Terminator::Goto(BlockId(2)), preds: SmallVec::new(), succs: smallvec![BlockId(2)], }, SsaBlock { id: BlockId(2), phis: vec![SsaInst { value: SsaValue(2), op: SsaOp::Phi(smallvec![ (BlockId(0), SsaValue(0)), (BlockId(1), SsaValue(1)) ]), cfg_node: n2, var_name: None, span: (0, 0), }], body: vec![], terminator: Terminator::Return(Some(SsaValue(2))), preds: smallvec![BlockId(0), BlockId(1)], succs: SmallVec::new(), }, ], entry: BlockId(0), value_defs: vec![ make_value_def(BlockId(0), n0), make_value_def(BlockId(1), n1), make_value_def(BlockId(2), n2), ], cfg_node_map: std::collections::HashMap::new(), exception_edges: Vec::new(), field_interner: crate::ssa::ir::FieldInterner::default(), field_writes: std::collections::HashMap::new(), synthetic_externals: std::collections::HashSet::new(), }; let ctx = BackwardsCtx::new(&ssa, &cfg, Lang::JavaScript); let flows = analyse_sink_backwards(&ctx, SsaValue(2), NodeIndex::new(2), Cap::all()); // One flow per phi operand visited (const branch yields no flow; // source branch yields one confirmation). assert_eq!( flows.iter().filter(|f| f.is_confirmation()).count(), 1, "exactly one source-reaching flow through the phi" ); } #[test] fn aggregate_verdict_prefers_confirmation() { let confirmed = BackwardFlow { sink_value: SsaValue(0), sink_node: NodeIndex::new(0), sink_caps: Cap::SQL_QUERY, source_kind: Some(SourceKind::UserInput), source_node: Some(NodeIndex::new(1)), infeasible: false, budget_exhausted: false, max_depth: 0, chain: SmallVec::new(), }; let infeasible = BackwardFlow { sink_value: SsaValue(0), sink_node: NodeIndex::new(0), sink_caps: Cap::SQL_QUERY, source_kind: None, source_node: None, infeasible: true, budget_exhausted: false, max_depth: 0, chain: SmallVec::new(), }; assert_eq!( aggregate_verdict(&[confirmed.clone(), infeasible.clone()]), FindingVerdict::Confirmed ); assert_eq!(aggregate_verdict(&[infeasible]), FindingVerdict::Infeasible); assert_eq!(aggregate_verdict(&[]), FindingVerdict::Inconclusive); } #[test] fn annotate_finding_appends_note() { use crate::evidence::FlowStepKind; use crate::taint::FlowStepRaw; let mut f = Finding { body_id: crate::cfg::BodyId(0), sink: NodeIndex::new(0), source: NodeIndex::new(1), path: vec![], source_kind: SourceKind::UserInput, path_validated: false, guard_kind: None, hop_count: 0, cap_specificity: 1, uses_summary: false, flow_steps: vec![FlowStepRaw { cfg_node: NodeIndex::new(1), var_name: None, op_kind: FlowStepKind::Source, }], symbolic: None, source_span: None, primary_location: None, engine_notes: smallvec::SmallVec::new(), path_hash: 0, finding_id: String::new(), alternative_finding_ids: smallvec::SmallVec::new(), effective_sink_caps: crate::labels::Cap::empty(), }; annotate_finding(&mut f, FindingVerdict::Confirmed); let sv = f.symbolic.as_ref().expect("symbolic verdict created"); assert!(sv.cutoff_notes.iter().any(|n| n == NOTE_CONFIRMED)); // Idempotent annotate_finding(&mut f, FindingVerdict::Confirmed); let sv = f.symbolic.as_ref().unwrap(); assert_eq!( sv.cutoff_notes .iter() .filter(|n| *n == NOTE_CONFIRMED) .count(), 1 ); } #[test] fn annotate_finding_inconclusive_no_change() { let mut f = Finding { body_id: crate::cfg::BodyId(0), sink: NodeIndex::new(0), source: NodeIndex::new(1), path: vec![], source_kind: SourceKind::Unknown, path_validated: false, guard_kind: None, hop_count: 0, cap_specificity: 0, uses_summary: false, flow_steps: vec![], symbolic: None, source_span: None, primary_location: None, engine_notes: smallvec::SmallVec::new(), path_hash: 0, finding_id: String::new(), alternative_finding_ids: smallvec::SmallVec::new(), effective_sink_caps: crate::labels::Cap::empty(), }; annotate_finding(&mut f, FindingVerdict::Inconclusive); assert!(f.symbolic.is_none()); } #[test] fn budget_exhausted_flow_not_confirmation() { let bf = BackwardFlow { sink_value: SsaValue(0), sink_node: NodeIndex::new(0), sink_caps: Cap::all(), source_kind: Some(SourceKind::UserInput), source_node: Some(NodeIndex::new(1)), infeasible: false, budget_exhausted: true, max_depth: 0, chain: SmallVec::new(), }; assert!(!bf.is_confirmation()); } }