//! Phase 25 — bounded path search for exploit-chain composition. //! //! Path topology: //! //! ```text //! Attacker (virtual) → EntryPoint → Finding* → Sink //! ``` //! //! The DFS starts at the implicit attacker node (virtually adjacent to //! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth) //! per-finding hops, and terminates at any //! [`crate::surface::DangerousLocal`] node. Each emitted //! [`ChainFinding`] is the deterministic minimum-length path through a //! given (entry, sink) pair. //! //! # Determinism //! //! 1. SurfaceMap nodes are canonicalised before search — every input //! list (entries, sinks) is iterated in `SourceLocation` order. //! 2. Candidate per-entry findings are sorted by //! [`crate::chain::edges::FindingRef::stable_hash`] before DFS, //! breaking ties by `rule_id` so collisions stay reproducible. //! 3. The emitted chain list is sorted by `score` descending (ties //! broken by `stable_hash` descending, then `implied_impact` //! descending) before return. //! //! Running the same fixture 10× produces a byte-identical chain list. //! //! # Phase 24 follow-ups closed here //! //! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields //! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path //! is only kept when the entry's `auth_required` is `false`. Auth- //! gated entries downgrade to the closest standalone impact. //! - SSRF + LocalListener refinement: when the lattice yields //! `InternalNetworkAccess` and the SurfaceMap exposes a local //! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`] //! bound to a loopback host), the path is preserved; without a local //! listener the chain is still emitted but scored lower (no boost). //! //! The "file-local reach → call-graph-aware reach" upgrade remains //! deferred (see deferred.md): the DFS still treats two findings as //! adjacent when they share a source file, mirroring Phase 24's //! `findings_to_edges` reach resolver. //! //! Entry-to-finding affinity is enforced symmetrically: the //! per-entry candidate filter requires the finding's source file to //! overlap with the entry's `handler_location.file` (or a //! call-graph reach hit) on top of the route+method match. Without //! this gate, two entries that happen to share a (route, method) in //! a monorepo would each claim every finding under that key, //! producing `O(entries × findings)` phantom chains that the dedup //! pass would then collapse. use crate::callgraph::FileReachMap; use crate::chain::edges::{ChainEdge, Reach}; use crate::chain::finding::{ChainFinding, ChainSink}; use crate::chain::impact::{ImpactCategory, lookup_impact}; use crate::chain::score::score_path; use crate::labels::Cap; use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode}; /// Bounded-DFS search configuration. #[derive(Debug, Clone, Copy)] pub struct ChainSearchConfig { /// Maximum number of per-finding hops in a single chain path. /// `0` disables search (no chain is ever emitted). pub max_depth: usize, /// Drop chains whose score is strictly below this threshold. pub min_score: f64, } impl Default for ChainSearchConfig { fn default() -> Self { Self { max_depth: 4, min_score: crate::chain::score::min_score_default(), } } } /// Result of one search pass: every chain whose score cleared /// `cfg.min_score`, deterministically ordered. pub fn find_chains( edges: &[ChainEdge], surface: &SurfaceMap, cfg: ChainSearchConfig, ) -> Vec { find_chains_with_reach(edges, surface, cfg, None) } /// Like [`find_chains`] but optionally consults a [`FileReachMap`] to /// widen the per-entry-per-sink file-scope filter beyond literal /// file-equality. /// /// When `reach` is `Some`, a candidate edge is in scope for a given /// sink whenever the finding's file *or* a transitive caller of it /// reaches the sink's file via the call graph. `reach = None` /// preserves the legacy file-local behaviour for callers that have /// not yet wired the call-graph reach map. pub fn find_chains_with_reach( edges: &[ChainEdge], surface: &SurfaceMap, cfg: ChainSearchConfig, reach: Option<&FileReachMap>, ) -> Vec { if cfg.max_depth == 0 || edges.is_empty() { return Vec::new(); } let sinks = collect_sinks(surface); let entries = collect_entries(surface); let local_listener_present = has_local_listener(surface); let mut chains: Vec = Vec::new(); for entry in &entries { // Per-entry candidate edge slice: every edge whose reach // points at this entry, sorted deterministically. let mut candidates: Vec<&ChainEdge> = edges .iter() .filter(|e| edge_reaches_entry(e, entry, reach)) .collect(); candidates.sort_by(|a, b| { ( a.finding.stable_hash, &a.finding.rule_id, &a.finding.location, ) .cmp(&( b.finding.stable_hash, &b.finding.rule_id, &b.finding.location, )) }); for sink in &sinks { // Scope candidates to the sink: same-file match (legacy), // optionally widened by a call-graph-derived reach map so // a finding in `internal_helper.py` whose enclosing // function is reached only through `routes.py` still // composes against a sink in `routes.py`. let scoped: Vec<&ChainEdge> = candidates .iter() .filter(|e| { paths_overlap(&e.finding.location.file, &sink.location.file) || reach.is_some_and(|r| { r.reaches(&e.finding.location.file, &sink.location.file) }) }) .copied() .collect(); if let Some(chain) = compose_chain(entry, sink, &scoped, cfg.max_depth, local_listener_present) && chain.score >= cfg.min_score { chains.push(chain); } } } canonicalise(&mut chains); chains } fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> { let mut out: Vec<&DangerousLocal> = surface .nodes .iter() .filter_map(|n| match n { SurfaceNode::DangerousLocal(d) => Some(d), _ => None, }) .collect(); out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name))); out } fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> { let mut out: Vec<&EntryPoint> = surface .nodes .iter() .filter_map(|n| match n { SurfaceNode::EntryPoint(e) => Some(e), _ => None, }) .collect(); out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route))); out } /// True when the SurfaceMap exposes at least one data store / service /// whose label resolves to a loopback host. Used by the SSRF + /// LocalListener refinement in [`compose_chain`]. fn has_local_listener(surface: &SurfaceMap) -> bool { surface.nodes.iter().any(|n| match n { SurfaceNode::DataStore(d) => is_loopback_label(&d.label), SurfaceNode::ExternalService(s) => is_loopback_label(&s.label), _ => false, }) } fn is_loopback_label(s: &str) -> bool { let lower = s.to_ascii_lowercase(); lower.contains("127.0.0.1") || lower.contains("localhost") || lower.contains("0.0.0.0") || lower.starts_with("unix:") || lower.contains("://localhost") } fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint, reach: Option<&FileReachMap>) -> bool { let route_method_match = match &edge.reach { Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method, Reach::Unreachable => return false, }; if !route_method_match { return false; } // File-affinity gate: the entry's handler must live in (or // transitively call into) the same file as the finding. // Without this, multiple entries that happen to declare the // same (route, method) — common in monorepos that ship // several small services side-by-side — would each claim // every finding, producing O(entries × findings) phantom // chains. The same shape as the sink-scope filter below: // literal file-suffix overlap first, fall back to the // call-graph reach map. let entry_file = &entry.handler_location.file; let finding_file = &edge.finding.location.file; paths_overlap(entry_file, finding_file) || reach.is_some_and(|r| r.reaches(entry_file, finding_file)) } fn paths_overlap(a: &str, b: &str) -> bool { if a == b { return true; } // Strip leading directory components and compare suffix. Two // representations of the same file (project-relative vs absolute) // share a common trailing path segment. let a_tail = a.rsplit('/').next().unwrap_or(a); let b_tail = b.rsplit('/').next().unwrap_or(b); a_tail == b_tail && !a_tail.is_empty() } /// Build a single chain for one (entry, sink) pair. /// /// Bounded DFS: take the longest deterministic prefix of `scoped` up /// to `max_depth`, then pick the highest-severity lattice match /// across every (member_cap, sink_cap) pair. Returning all in-scope /// edges as members matches the design doc's three-member output for /// the `CORS + NoAuth + websocket → shell tool` scenario; using the /// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC` /// lights up `BrowserToLocalRce` even when an unrelated finding (e.g. /// the standalone auth-gap diagnostic) is sorted first. fn compose_chain( entry: &EntryPoint, sink: &DangerousLocal, scoped: &[&ChainEdge], max_depth: usize, local_listener_present: bool, ) -> Option { if scoped.is_empty() { return None; } let bound = scoped.len().min(max_depth); let path: Vec<&ChainEdge> = scoped[..bound].to_vec(); let sink_cap = sole_cap(sink.cap_bits)?; let (impact, member_impacts) = resolve_impact(&path, sink_cap, entry, local_listener_present)?; let mut chain = build_chain(entry, sink, &path, impact, &member_impacts); // SSRF + LocalListener refinement (Phase 24 deferred close): when // the implied impact is `InternalNetworkAccess` AND the SurfaceMap // exposes a loopback listener, the chain is more concrete than the // bare lattice match — lift the score so it ranks above SSRF chains // without a corroborating in-process target. if impact == ImpactCategory::InternalNetworkAccess && local_listener_present { chain.score *= LOCAL_LISTENER_BOOST; } Some(chain) } /// Score multiplier applied when an `InternalNetworkAccess` chain has /// a corroborating loopback listener in the SurfaceMap. Calibrated to /// lift the chain above an otherwise-identical SSRF chain that lacks /// the listener context, without overtaking strictly more severe /// categories. const LOCAL_LISTENER_BOOST: f64 = 1.5; /// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no /// bit is set. Sinks in the SurfaceMap may carry multi-bit /// `cap_bits`; the DFS terminates against the lowest single bit so /// downstream lattice lookups stay deterministic. fn sole_cap(bits: u32) -> Option { crate::chain::edges::lowest_cap(bits) } /// Resolve the implied impact for a chain path. /// /// Walks every (member.primary_cap, sink_cap) pair and picks the /// highest-severity lattice match. Returns `None` when no member + /// sink pair lights up a rule and the sink cap has no standalone /// rule either. /// /// Auth gate: `BrowserToLocalRce` only fires when the entry's /// `auth_required` is `false`. Authenticated entries fall through /// to the next-best impact (typically `CODE_EXEC → Rce`). fn resolve_impact( path: &[&ChainEdge], sink_cap: Cap, entry: &EntryPoint, _local_listener_present: bool, ) -> Option<(ImpactCategory, Vec)> { let mut best: Option = None; for member in path { if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) { if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required { // Auth gate: this rule cannot fire when the entry is // authed. Keep walking — another pair may light up // a different rule. continue; } best = Some(match best { Some(prev) => more_severe(prev, cat), None => cat, }); } } // Fall through to standalone on the sink cap when no pair lit up. if best.is_none() { best = lookup_impact(sink_cap, None); } best.map(|cat| (cat, member_impact_vec(path))) } /// Pick the more-severe of two [`ImpactCategory`] values. Severity /// ordering matches the design doc's lattice criticality: /// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`. fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory { if severity_rank(a) >= severity_rank(b) { a } else { b } } fn severity_rank(c: ImpactCategory) -> u8 { match c { ImpactCategory::BrowserToLocalRce => 5, ImpactCategory::Rce => 4, ImpactCategory::SessionHijack => 3, ImpactCategory::InternalNetworkAccess => 2, ImpactCategory::InfoDisclosure => 1, } } fn member_impact_vec(path: &[&ChainEdge]) -> Vec { path.iter() .filter_map(|e| crate::chain::standalone_impact(e.primary_cap)) .collect() } fn build_chain( _entry: &EntryPoint, sink: &DangerousLocal, path: &[&ChainEdge], implied_impact: ImpactCategory, member_impacts: &[ImpactCategory], ) -> ChainFinding { let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect(); let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact); let owned_edges: Vec = path.iter().map(|e| (*e).clone()).collect(); let score = score_path(member_impacts, implied_impact, &owned_edges); let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges); let dynamic_verdict = composite_dynamic_verdict(&owned_edges); ChainFinding { stable_hash, members, sink: ChainSink { file: sink.location.file.clone(), line: sink.location.line, col: sink.location.col, function_name: sink.function_name.clone(), cap_bits: sink.cap_bits, }, implied_impact, severity, score, dynamic_verdict, reverify_reason: None, } } /// Phase 25 placeholder for composite verification. When *every* /// member edge has `Feasibility::Confirmed` the composite verdict /// inherits that confirmation; otherwise `None` (Phase 26 will run a /// real composite re-verification pass). fn composite_dynamic_verdict(_path: &[ChainEdge]) -> Option { None } fn canonicalise(chains: &mut Vec) { chains.sort_by(|a, b| { b.score .partial_cmp(&a.score) .unwrap_or(std::cmp::Ordering::Equal) .then(b.stable_hash.cmp(&a.stable_hash)) .then(b.implied_impact.cmp(&a.implied_impact)) }); // Drop duplicates: two chains with the same stable_hash and the // same terminal sink serialise byte-identically (stable_hash is a // function of members + implied_impact, and the wire format // exposes only members, sink, impact, severity, score). They arise // when multiple entry-points share a (route, method) but are // otherwise unrelated (e.g. monorepos, or a scan covering multiple // small apps), each claiming the same finding via the route-only // candidate filter in `find_chains_with_reach`. Keep the first // occurrence after the sort above; the sort is total enough that // the survivor is deterministic. chains.dedup_by(|a, b| a.stable_hash == b.stable_hash && a.sink == b.sink); } // Manual Ord/PartialOrd for ImpactCategory so the canonicalise // tie-break has a total order. Defined here rather than in `impact` // to avoid leaking ordering into the public type. impl PartialOrd for ImpactCategory { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for ImpactCategory { fn cmp(&self, other: &Self) -> std::cmp::Ordering { (*self as u8).cmp(&(*other as u8)) } } #[cfg(test)] mod tests { use super::*; use crate::chain::ChainSeverity; use crate::chain::edges::FindingRef; use crate::chain::feasibility::Feasibility; use crate::entry_points::HttpMethod; use crate::labels::Cap; use crate::surface::{ DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode, }; fn loc(file: &str, line: u32) -> SourceLocation { SourceLocation::new(file, line, 1) } fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode { SurfaceNode::EntryPoint(EntryPoint { location: loc(file, 1), framework: Framework::Flask, method: HttpMethod::POST, route: route.into(), handler_name: "h".into(), handler_location: loc(file, 2), auth_required: auth, }) } fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode { SurfaceNode::DangerousLocal(DangerousLocal { location: loc(file, line), function_name: fname.into(), cap_bits: caps.bits(), label: String::new(), }) } fn edge_with( file: &str, line: u32, rule: &str, cap: Cap, route: &str, method: HttpMethod, feas: Feasibility, ) -> ChainEdge { ChainEdge { finding: FindingRef { finding_id: format!("{rule}-{line}"), stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes() [..8] .try_into() .map(u64::from_le_bytes) .unwrap(), location: loc(file, line), rule_id: rule.into(), cap_bits: cap.bits(), }, primary_cap: cap, reach: Reach::Reachable { location: loc(file, 1), method, route: route.into(), auth_required: false, }, feasibility: feas, } } #[test] fn returns_empty_when_no_findings() { let surface = SurfaceMap::new(); let result = find_chains(&[], &surface, ChainSearchConfig::default()); assert!(result.is_empty()); } #[test] fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() { let mut surface = SurfaceMap::new(); surface.nodes.push(entry("app.py", "/exec", false)); surface .nodes .push(sink("app.py", 20, "os.system", Cap::CODE_EXEC)); let e = edge_with( "app.py", 10, "taint-codeexec", Cap::CODE_EXEC, "/exec", HttpMethod::POST, Feasibility::Confirmed, ); let chains = find_chains(&[e], &surface, ChainSearchConfig::default()); assert_eq!(chains.len(), 1); assert_eq!(chains[0].implied_impact, ImpactCategory::Rce); } #[test] fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() { let mut surface = SurfaceMap::new(); surface.nodes.push(entry("app.py", "/ws", false)); surface .nodes .push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC)); let cors = edge_with( "app.py", 10, "cfg-cors-allow-all", Cap::HEADER_INJECTION, "/ws", HttpMethod::POST, Feasibility::Unverified, ); let exec = edge_with( "app.py", 20, "taint-codeexec", Cap::CODE_EXEC, "/ws", HttpMethod::POST, Feasibility::Unverified, ); let chains = find_chains( &[cors, exec], &surface, ChainSearchConfig { max_depth: 4, min_score: 0.0, }, ); assert_eq!(chains.len(), 1); assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce); assert_eq!(chains[0].severity, ChainSeverity::Critical); } #[test] fn authed_entry_downgrades_browser_local_rce_to_rce() { let mut surface = SurfaceMap::new(); // Same fixture but entry is authed — should NOT light up // BrowserToLocalRce. surface.nodes.push(entry("app.py", "/ws", true)); surface .nodes .push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC)); let cors = edge_with( "app.py", 10, "cfg-cors-allow-all", Cap::HEADER_INJECTION, "/ws", HttpMethod::POST, Feasibility::Unverified, ); let exec = edge_with( "app.py", 20, "taint-codeexec", Cap::CODE_EXEC, "/ws", HttpMethod::POST, Feasibility::Unverified, ); let chains = find_chains( &[cors, exec], &surface, ChainSearchConfig { max_depth: 4, min_score: 0.0, }, ); assert_eq!(chains.len(), 1); assert_eq!(chains[0].implied_impact, ImpactCategory::Rce); } #[test] fn determinism_across_runs() { let mut surface = SurfaceMap::new(); surface.nodes.push(entry("app.py", "/exec", false)); surface .nodes .push(sink("app.py", 20, "os.system", Cap::CODE_EXEC)); let e = edge_with( "app.py", 10, "taint-codeexec", Cap::CODE_EXEC, "/exec", HttpMethod::POST, Feasibility::Confirmed, ); let cfg = ChainSearchConfig::default(); let first = find_chains(std::slice::from_ref(&e), &surface, cfg); let first_hashes: Vec = first.iter().map(|c| c.stable_hash).collect(); for _ in 0..9 { let again = find_chains(std::slice::from_ref(&e), &surface, cfg); let again_hashes: Vec = again.iter().map(|c| c.stable_hash).collect(); assert_eq!(again_hashes, first_hashes); } } #[test] fn ssrf_with_local_listener_scores_higher_than_without() { use crate::surface::{DataStore, DataStoreKind}; let edge = || -> ChainEdge { edge_with( "app.py", 10, "taint-ssrf", Cap::SSRF, "/fetch", HttpMethod::POST, Feasibility::Confirmed, ) }; let mut surface_no_listener = SurfaceMap::new(); surface_no_listener .nodes .push(entry("app.py", "/fetch", false)); surface_no_listener .nodes .push(sink("app.py", 20, "requests.get", Cap::SSRF)); let baseline = find_chains( &[edge()], &surface_no_listener, ChainSearchConfig { max_depth: 4, min_score: 0.0, }, ); assert_eq!(baseline.len(), 1); assert_eq!( baseline[0].implied_impact, ImpactCategory::InternalNetworkAccess ); let mut surface_with_listener = surface_no_listener.clone(); surface_with_listener .nodes .push(SurfaceNode::DataStore(DataStore { location: loc("app.py", 5), kind: DataStoreKind::KeyValue, label: "redis://127.0.0.1:6379".into(), owner: String::new(), access: Default::default(), })); let boosted = find_chains( &[edge()], &surface_with_listener, ChainSearchConfig { max_depth: 4, min_score: 0.0, }, ); assert_eq!(boosted.len(), 1); assert_eq!( boosted[0].implied_impact, ImpactCategory::InternalNetworkAccess ); let ratio = boosted[0].score / baseline[0].score; assert!( (ratio - LOCAL_LISTENER_BOOST).abs() < 1e-9, "expected ×{LOCAL_LISTENER_BOOST} boost, got ratio={ratio}" ); } #[test] fn score_threshold_drops_low_score_chains() { let mut surface = SurfaceMap::new(); surface.nodes.push(entry("app.py", "/r", false)); surface.nodes.push(sink("app.py", 20, "open", Cap::FILE_IO)); let e = edge_with( "app.py", 10, "test", Cap::FILE_IO, "/r", HttpMethod::GET, Feasibility::Unverified, ); let cfg = ChainSearchConfig { max_depth: 4, min_score: 1_000.0, }; let chains = find_chains(&[e], &surface, cfg); assert!(chains.is_empty()); } /// Sink in a different file than the finding composes only when the /// call-graph reach map records a transitive caller relationship. #[test] fn cross_file_chain_requires_reach_map() { use crate::callgraph::{FileReachMap, build_call_graph}; use crate::summary::{FuncSummary, merge_summaries}; let mut surface = SurfaceMap::new(); surface.nodes.push(entry("routes.py", "/exec", false)); // Sink lives in a helper file the entry handler transitively // reaches, not the entry file itself. surface .nodes .push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC)); let e = edge_with( "routes.py", 10, "taint-codeexec", Cap::CODE_EXEC, "/exec", HttpMethod::POST, Feasibility::Unverified, ); let cfg = ChainSearchConfig { max_depth: 4, min_score: 0.0, }; // No reach map: routes.py finding cannot compose against // helper.py sink because `paths_overlap` rejects the pair. let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg); assert!( baseline.is_empty(), "without reach map, cross-file chain must not compose" ); // Reach map: routes.py::handle calls helper.py::sink so // helper.py is reachable from routes.py. let handle = FuncSummary { name: "handle".into(), file_path: "routes.py".into(), lang: "python".into(), param_count: 0, callees: vec![crate::summary::CalleeSite::bare("sink")], ..Default::default() }; let sink_fn = FuncSummary { name: "sink".into(), file_path: "helper.py".into(), lang: "python".into(), param_count: 0, ..Default::default() }; let gs = merge_summaries(vec![handle, sink_fn], None); let cg = build_call_graph(&gs, &[]); let reach = FileReachMap::build(&cg); let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach)); assert_eq!( chains.len(), 1, "reach map should widen scope to include helper.py sink" ); assert_eq!(chains[0].implied_impact, ImpactCategory::Rce); } #[test] fn duplicate_chains_from_shared_route_method_are_deduped() { // Three unrelated handler files each declare POST /run. Each // file holds one finding + one dangerous-local sink. Without // the dedup pass, the per-entry candidate filter (route + // method only) lets every entry claim every finding, and the // sink-file scope filter then emits one chain per (entry, // sink) pair — 3 chains per file × 3 files = 9 chains where // each finding appears 3×. The wire format does not surface // the entry, so the duplicates serialise byte-identically. // `canonicalise` must drop them. let mut surface = SurfaceMap::new(); surface.nodes.push(entry("a.js", "/run", false)); surface.nodes.push(entry("b.js", "/run", false)); surface.nodes.push(entry("c.py", "/run", false)); surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC)); surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC)); surface.nodes.push(sink("c.py", 7, "eval", Cap::CODE_EXEC)); let edges = vec![ edge_with( "a.js", 7, "taint-codeexec", Cap::CODE_EXEC, "/run", HttpMethod::POST, Feasibility::Unverified, ), edge_with( "b.js", 7, "taint-codeexec", Cap::CODE_EXEC, "/run", HttpMethod::POST, Feasibility::Unverified, ), edge_with( "c.py", 7, "taint-codeexec", Cap::CODE_EXEC, "/run", HttpMethod::POST, Feasibility::Unverified, ), ]; let chains = find_chains(&edges, &surface, ChainSearchConfig::default()); assert_eq!( chains.len(), 3, "expected one chain per finding, not entries × findings", ); let mut hashes: Vec = chains.iter().map(|c| c.stable_hash).collect(); hashes.sort(); hashes.dedup(); assert_eq!( hashes.len(), 3, "surviving chains must have distinct hashes" ); } /// File-affinity gate on `edge_reaches_entry`: an entry only /// claims candidate findings that live in its own handler file /// (or are reached from it via the call graph). Two unrelated /// entries declaring the same (route, method) on different /// files do not cross-claim each other's findings. #[test] fn entry_file_affinity_rejects_cross_file_findings_without_reach() { let mut surface = SurfaceMap::new(); surface.nodes.push(entry("a.js", "/run", false)); surface.nodes.push(entry("b.js", "/run", false)); surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC)); surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC)); // Single finding lives in a.js only. Both entries match // route+method but only entry@a.js shares the file. let edges = vec![edge_with( "a.js", 7, "taint-codeexec", Cap::CODE_EXEC, "/run", HttpMethod::POST, Feasibility::Unverified, )]; let chains = find_chains(&edges, &surface, ChainSearchConfig::default()); assert_eq!( chains.len(), 1, "entry@b.js must not claim a finding in a.js without reach map", ); assert_eq!(chains[0].sink.file, "a.js"); } /// File-affinity gate widens through the call-graph reach map: /// an entry whose handler reaches the finding's file (via the /// `FileReachMap`) still claims the finding even when the /// literal file suffixes differ. #[test] fn entry_file_affinity_widens_with_reach_map() { use crate::callgraph::{FileReachMap, build_call_graph}; use crate::summary::{FuncSummary, merge_summaries}; let mut surface = SurfaceMap::new(); // Entry handler lives in routes.py. Finding lives in a // helper file that routes.py transitively calls. surface.nodes.push(entry("routes.py", "/run", false)); surface .nodes .push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC)); let e = edge_with( "helper.py", 10, "taint-codeexec", Cap::CODE_EXEC, "/run", HttpMethod::POST, Feasibility::Unverified, ); let cfg = ChainSearchConfig { max_depth: 4, min_score: 0.0, }; // Without a reach map the file-affinity gate rejects the // entry/finding pairing. let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg); assert!( baseline.is_empty(), "without reach map, cross-file entry/finding pair must reject", ); // Build a reach map where routes.py::handle calls // helper.py::sink, so helper.py is reachable from routes.py. let handle = FuncSummary { name: "handle".into(), file_path: "routes.py".into(), lang: "python".into(), param_count: 0, callees: vec![crate::summary::CalleeSite::bare("sink")], ..Default::default() }; let sink_fn = FuncSummary { name: "sink".into(), file_path: "helper.py".into(), lang: "python".into(), param_count: 0, ..Default::default() }; let gs = merge_summaries(vec![handle, sink_fn], None); let cg = build_call_graph(&gs, &[]); let reach = FileReachMap::build(&cg); let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach)); assert_eq!( chains.len(), 1, "reach map should widen entry-affinity to helper.py", ); assert_eq!(chains[0].sink.file, "helper.py"); } }