nyx/src/chain/search.rs

947 lines
34 KiB
Rust
Raw Normal View History

2026-06-05 10:16:30 -05:00
//! Phase 25 — bounded path search for exploit-chain composition.
//!
//! Path topology:
//!
//! ```text
//! Attacker (virtual) → EntryPoint → Finding* → Sink
//! ```
//!
//! The DFS starts at the implicit attacker node (virtually adjacent to
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth)
//! per-finding hops, and terminates at any
//! [`crate::surface::DangerousLocal`] node. Each emitted
//! [`ChainFinding`] is the deterministic minimum-length path through a
//! given (entry, sink) pair.
//!
//! # Determinism
//!
//! 1. SurfaceMap nodes are canonicalised before search — every input
//! list (entries, sinks) is iterated in `SourceLocation` order.
//! 2. Candidate per-entry findings are sorted by
//! [`crate::chain::edges::FindingRef::stable_hash`] before DFS,
//! breaking ties by `rule_id` so collisions stay reproducible.
//! 3. The emitted chain list is sorted by `score` descending (ties
//! broken by `stable_hash` descending, then `implied_impact`
//! descending) before return.
//!
//! Running the same fixture 10× produces a byte-identical chain list.
//!
//! # Phase 24 follow-ups closed here
//!
//! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields
//! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path
//! is only kept when the entry's `auth_required` is `false`. Auth-
//! gated entries downgrade to the closest standalone impact.
//! - SSRF + LocalListener refinement: when the lattice yields
//! `InternalNetworkAccess` and the SurfaceMap exposes a local
//! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`]
//! bound to a loopback host), the path is preserved; without a local
//! listener the chain is still emitted but scored lower (no boost).
//!
//! The "file-local reach → call-graph-aware reach" upgrade remains
//! deferred (see deferred.md): the DFS still treats two findings as
//! adjacent when they share a source file, mirroring Phase 24's
//! `findings_to_edges` reach resolver.
//!
//! Entry-to-finding affinity is enforced symmetrically: the
//! per-entry candidate filter requires the finding's source file to
//! overlap with the entry's `handler_location.file` (or a
//! call-graph reach hit) on top of the route+method match. Without
//! this gate, two entries that happen to share a (route, method) in
//! a monorepo would each claim every finding under that key,
//! producing `O(entries × findings)` phantom chains that the dedup
//! pass would then collapse.
use crate::callgraph::FileReachMap;
use crate::chain::edges::{ChainEdge, Reach};
use crate::chain::finding::{ChainFinding, ChainSink};
use crate::chain::impact::{ImpactCategory, lookup_impact};
use crate::chain::score::score_path;
use crate::labels::Cap;
use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode};
/// Bounded-DFS search configuration.
#[derive(Debug, Clone, Copy)]
pub struct ChainSearchConfig {
/// Maximum number of per-finding hops in a single chain path.
/// `0` disables search (no chain is ever emitted).
pub max_depth: usize,
/// Drop chains whose score is strictly below this threshold.
pub min_score: f64,
}
impl Default for ChainSearchConfig {
fn default() -> Self {
Self {
max_depth: 4,
min_score: crate::chain::score::min_score_default(),
}
}
}
/// Result of one search pass: every chain whose score cleared
/// `cfg.min_score`, deterministically ordered.
pub fn find_chains(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
) -> Vec<ChainFinding> {
find_chains_with_reach(edges, surface, cfg, None)
}
/// Like [`find_chains`] but optionally consults a [`FileReachMap`] to
/// widen the per-entry-per-sink file-scope filter beyond literal
/// file-equality.
///
/// When `reach` is `Some`, a candidate edge is in scope for a given
/// sink whenever the finding's file *or* a transitive caller of it
/// reaches the sink's file via the call graph. `reach = None`
/// preserves the legacy file-local behaviour for callers that have
/// not yet wired the call-graph reach map.
pub fn find_chains_with_reach(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
reach: Option<&FileReachMap>,
) -> Vec<ChainFinding> {
if cfg.max_depth == 0 || edges.is_empty() {
return Vec::new();
}
let sinks = collect_sinks(surface);
let entries = collect_entries(surface);
let local_listener_present = has_local_listener(surface);
let mut chains: Vec<ChainFinding> = Vec::new();
for entry in &entries {
// Per-entry candidate edge slice: every edge whose reach
// points at this entry, sorted deterministically.
let mut candidates: Vec<&ChainEdge> = edges
.iter()
.filter(|e| edge_reaches_entry(e, entry, reach))
.collect();
candidates.sort_by(|a, b| {
(
a.finding.stable_hash,
&a.finding.rule_id,
&a.finding.location,
)
.cmp(&(
b.finding.stable_hash,
&b.finding.rule_id,
&b.finding.location,
))
});
for sink in &sinks {
// Scope candidates to the sink: same-file match (legacy),
// optionally widened by a call-graph-derived reach map so
// a finding in `internal_helper.py` whose enclosing
// function is reached only through `routes.py` still
// composes against a sink in `routes.py`.
let scoped: Vec<&ChainEdge> = candidates
.iter()
.filter(|e| {
paths_overlap(&e.finding.location.file, &sink.location.file)
|| reach.is_some_and(|r| {
r.reaches(&e.finding.location.file, &sink.location.file)
})
})
.copied()
.collect();
if let Some(chain) =
compose_chain(entry, sink, &scoped, cfg.max_depth, local_listener_present)
&& chain.score >= cfg.min_score
{
chains.push(chain);
}
}
}
canonicalise(&mut chains);
chains
}
fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> {
let mut out: Vec<&DangerousLocal> = surface
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::DangerousLocal(d) => Some(d),
_ => None,
})
.collect();
out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name)));
out
}
fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> {
let mut out: Vec<&EntryPoint> = surface
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::EntryPoint(e) => Some(e),
_ => None,
})
.collect();
out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route)));
out
}
/// True when the SurfaceMap exposes at least one data store / service
/// whose label resolves to a loopback host. Used by the SSRF +
/// LocalListener refinement in [`compose_chain`].
fn has_local_listener(surface: &SurfaceMap) -> bool {
surface.nodes.iter().any(|n| match n {
SurfaceNode::DataStore(d) => is_loopback_label(&d.label),
SurfaceNode::ExternalService(s) => is_loopback_label(&s.label),
_ => false,
})
}
fn is_loopback_label(s: &str) -> bool {
let lower = s.to_ascii_lowercase();
lower.contains("127.0.0.1")
|| lower.contains("localhost")
|| lower.contains("0.0.0.0")
|| lower.starts_with("unix:")
|| lower.contains("://localhost")
}
fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint, reach: Option<&FileReachMap>) -> bool {
let route_method_match = match &edge.reach {
Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method,
Reach::Unreachable => return false,
};
if !route_method_match {
return false;
}
// File-affinity gate: the entry's handler must live in (or
// transitively call into) the same file as the finding.
// Without this, multiple entries that happen to declare the
// same (route, method) — common in monorepos that ship
// several small services side-by-side — would each claim
// every finding, producing O(entries × findings) phantom
// chains. The same shape as the sink-scope filter below:
// literal file-suffix overlap first, fall back to the
// call-graph reach map.
let entry_file = &entry.handler_location.file;
let finding_file = &edge.finding.location.file;
paths_overlap(entry_file, finding_file)
|| reach.is_some_and(|r| r.reaches(entry_file, finding_file))
}
fn paths_overlap(a: &str, b: &str) -> bool {
if a == b {
return true;
}
// Strip leading directory components and compare suffix. Two
// representations of the same file (project-relative vs absolute)
// share a common trailing path segment.
let a_tail = a.rsplit('/').next().unwrap_or(a);
let b_tail = b.rsplit('/').next().unwrap_or(b);
a_tail == b_tail && !a_tail.is_empty()
}
/// Build a single chain for one (entry, sink) pair.
///
/// Bounded DFS: take the longest deterministic prefix of `scoped` up
/// to `max_depth`, then pick the highest-severity lattice match
/// across every (member_cap, sink_cap) pair. Returning all in-scope
/// edges as members matches the design doc's three-member output for
/// the `CORS + NoAuth + websocket → shell tool` scenario; using the
/// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC`
/// lights up `BrowserToLocalRce` even when an unrelated finding (e.g.
/// the standalone auth-gap diagnostic) is sorted first.
fn compose_chain(
entry: &EntryPoint,
sink: &DangerousLocal,
scoped: &[&ChainEdge],
max_depth: usize,
local_listener_present: bool,
) -> Option<ChainFinding> {
if scoped.is_empty() {
return None;
}
let bound = scoped.len().min(max_depth);
let path: Vec<&ChainEdge> = scoped[..bound].to_vec();
let sink_cap = sole_cap(sink.cap_bits)?;
let (impact, member_impacts) = resolve_impact(&path, sink_cap, entry, local_listener_present)?;
let mut chain = build_chain(entry, sink, &path, impact, &member_impacts);
// SSRF + LocalListener refinement (Phase 24 deferred close): when
// the implied impact is `InternalNetworkAccess` AND the SurfaceMap
// exposes a loopback listener, the chain is more concrete than the
// bare lattice match — lift the score so it ranks above SSRF chains
// without a corroborating in-process target.
if impact == ImpactCategory::InternalNetworkAccess && local_listener_present {
chain.score *= LOCAL_LISTENER_BOOST;
}
Some(chain)
}
/// Score multiplier applied when an `InternalNetworkAccess` chain has
/// a corroborating loopback listener in the SurfaceMap. Calibrated to
/// lift the chain above an otherwise-identical SSRF chain that lacks
/// the listener context, without overtaking strictly more severe
/// categories.
const LOCAL_LISTENER_BOOST: f64 = 1.5;
/// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no
/// bit is set. Sinks in the SurfaceMap may carry multi-bit
/// `cap_bits`; the DFS terminates against the lowest single bit so
/// downstream lattice lookups stay deterministic.
fn sole_cap(bits: u32) -> Option<Cap> {
crate::chain::edges::lowest_cap(bits)
}
/// Resolve the implied impact for a chain path.
///
/// Walks every (member.primary_cap, sink_cap) pair and picks the
/// highest-severity lattice match. Returns `None` when no member +
/// sink pair lights up a rule and the sink cap has no standalone
/// rule either.
///
/// Auth gate: `BrowserToLocalRce` only fires when the entry's
/// `auth_required` is `false`. Authenticated entries fall through
/// to the next-best impact (typically `CODE_EXEC → Rce`).
fn resolve_impact(
path: &[&ChainEdge],
sink_cap: Cap,
entry: &EntryPoint,
_local_listener_present: bool,
) -> Option<(ImpactCategory, Vec<ImpactCategory>)> {
let mut best: Option<ImpactCategory> = None;
for member in path {
if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) {
if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required {
// Auth gate: this rule cannot fire when the entry is
// authed. Keep walking — another pair may light up
// a different rule.
continue;
}
best = Some(match best {
Some(prev) => more_severe(prev, cat),
None => cat,
});
}
}
// Fall through to standalone on the sink cap when no pair lit up.
if best.is_none() {
best = lookup_impact(sink_cap, None);
}
best.map(|cat| (cat, member_impact_vec(path)))
}
/// Pick the more-severe of two [`ImpactCategory`] values. Severity
/// ordering matches the design doc's lattice criticality:
/// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`.
fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory {
if severity_rank(a) >= severity_rank(b) {
a
} else {
b
}
}
fn severity_rank(c: ImpactCategory) -> u8 {
match c {
ImpactCategory::BrowserToLocalRce => 5,
ImpactCategory::Rce => 4,
ImpactCategory::SessionHijack => 3,
ImpactCategory::InternalNetworkAccess => 2,
ImpactCategory::InfoDisclosure => 1,
}
}
fn member_impact_vec(path: &[&ChainEdge]) -> Vec<ImpactCategory> {
path.iter()
.filter_map(|e| crate::chain::standalone_impact(e.primary_cap))
.collect()
}
fn build_chain(
_entry: &EntryPoint,
sink: &DangerousLocal,
path: &[&ChainEdge],
implied_impact: ImpactCategory,
member_impacts: &[ImpactCategory],
) -> ChainFinding {
let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect();
let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact);
let owned_edges: Vec<ChainEdge> = path.iter().map(|e| (*e).clone()).collect();
let score = score_path(member_impacts, implied_impact, &owned_edges);
let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges);
let dynamic_verdict = composite_dynamic_verdict(&owned_edges);
ChainFinding {
stable_hash,
members,
sink: ChainSink {
file: sink.location.file.clone(),
line: sink.location.line,
col: sink.location.col,
function_name: sink.function_name.clone(),
cap_bits: sink.cap_bits,
},
implied_impact,
severity,
score,
dynamic_verdict,
reverify_reason: None,
}
}
/// Phase 25 placeholder for composite verification. When *every*
/// member edge has `Feasibility::Confirmed` the composite verdict
/// inherits that confirmation; otherwise `None` (Phase 26 will run a
/// real composite re-verification pass).
fn composite_dynamic_verdict(_path: &[ChainEdge]) -> Option<crate::evidence::VerifyResult> {
None
}
fn canonicalise(chains: &mut Vec<ChainFinding>) {
chains.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then(b.stable_hash.cmp(&a.stable_hash))
.then(b.implied_impact.cmp(&a.implied_impact))
});
// Drop duplicates: two chains with the same stable_hash and the
// same terminal sink serialise byte-identically (stable_hash is a
// function of members + implied_impact, and the wire format
// exposes only members, sink, impact, severity, score). They arise
// when multiple entry-points share a (route, method) but are
// otherwise unrelated (e.g. monorepos, or a scan covering multiple
// small apps), each claiming the same finding via the route-only
// candidate filter in `find_chains_with_reach`. Keep the first
// occurrence after the sort above; the sort is total enough that
// the survivor is deterministic.
chains.dedup_by(|a, b| a.stable_hash == b.stable_hash && a.sink == b.sink);
}
// Manual Ord/PartialOrd for ImpactCategory so the canonicalise
// tie-break has a total order. Defined here rather than in `impact`
// to avoid leaking ordering into the public type.
impl PartialOrd for ImpactCategory {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ImpactCategory {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
(*self as u8).cmp(&(*other as u8))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chain::ChainSeverity;
use crate::chain::edges::FindingRef;
use crate::chain::feasibility::Feasibility;
use crate::entry_points::HttpMethod;
use crate::labels::Cap;
use crate::surface::{
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
};
fn loc(file: &str, line: u32) -> SourceLocation {
SourceLocation::new(file, line, 1)
}
fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode {
SurfaceNode::EntryPoint(EntryPoint {
location: loc(file, 1),
framework: Framework::Flask,
method: HttpMethod::POST,
route: route.into(),
handler_name: "h".into(),
handler_location: loc(file, 2),
auth_required: auth,
})
}
fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode {
SurfaceNode::DangerousLocal(DangerousLocal {
location: loc(file, line),
function_name: fname.into(),
cap_bits: caps.bits(),
label: String::new(),
2026-06-05 10:16:30 -05:00
})
}
fn edge_with(
file: &str,
line: u32,
rule: &str,
cap: Cap,
route: &str,
method: HttpMethod,
feas: Feasibility,
) -> ChainEdge {
ChainEdge {
finding: FindingRef {
finding_id: format!("{rule}-{line}"),
stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes()
[..8]
.try_into()
.map(u64::from_le_bytes)
.unwrap(),
location: loc(file, line),
rule_id: rule.into(),
cap_bits: cap.bits(),
},
primary_cap: cap,
reach: Reach::Reachable {
location: loc(file, 1),
method,
route: route.into(),
auth_required: false,
},
feasibility: feas,
}
}
#[test]
fn returns_empty_when_no_findings() {
let surface = SurfaceMap::new();
let result = find_chains(&[], &surface, ChainSearchConfig::default());
assert!(result.is_empty());
}
#[test]
fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/exec", false));
surface
.nodes
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"app.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Confirmed,
);
let chains = find_chains(&[e], &surface, ChainSearchConfig::default());
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/ws", false));
surface
.nodes
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
let cors = edge_with(
"app.py",
10,
"cfg-cors-allow-all",
Cap::HEADER_INJECTION,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let exec = edge_with(
"app.py",
20,
"taint-codeexec",
Cap::CODE_EXEC,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let chains = find_chains(
&[cors, exec],
&surface,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce);
assert_eq!(chains[0].severity, ChainSeverity::Critical);
}
#[test]
fn authed_entry_downgrades_browser_local_rce_to_rce() {
let mut surface = SurfaceMap::new();
// Same fixture but entry is authed — should NOT light up
// BrowserToLocalRce.
surface.nodes.push(entry("app.py", "/ws", true));
surface
.nodes
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
let cors = edge_with(
"app.py",
10,
"cfg-cors-allow-all",
Cap::HEADER_INJECTION,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let exec = edge_with(
"app.py",
20,
"taint-codeexec",
Cap::CODE_EXEC,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let chains = find_chains(
&[cors, exec],
&surface,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn determinism_across_runs() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/exec", false));
surface
.nodes
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"app.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Confirmed,
);
let cfg = ChainSearchConfig::default();
let first = find_chains(std::slice::from_ref(&e), &surface, cfg);
let first_hashes: Vec<u64> = first.iter().map(|c| c.stable_hash).collect();
for _ in 0..9 {
let again = find_chains(std::slice::from_ref(&e), &surface, cfg);
let again_hashes: Vec<u64> = again.iter().map(|c| c.stable_hash).collect();
assert_eq!(again_hashes, first_hashes);
}
}
#[test]
fn ssrf_with_local_listener_scores_higher_than_without() {
use crate::surface::{DataStore, DataStoreKind};
let edge = || -> ChainEdge {
edge_with(
"app.py",
10,
"taint-ssrf",
Cap::SSRF,
"/fetch",
HttpMethod::POST,
Feasibility::Confirmed,
)
};
let mut surface_no_listener = SurfaceMap::new();
surface_no_listener
.nodes
.push(entry("app.py", "/fetch", false));
surface_no_listener
.nodes
.push(sink("app.py", 20, "requests.get", Cap::SSRF));
let baseline = find_chains(
&[edge()],
&surface_no_listener,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(baseline.len(), 1);
assert_eq!(
baseline[0].implied_impact,
ImpactCategory::InternalNetworkAccess
);
let mut surface_with_listener = surface_no_listener.clone();
surface_with_listener
.nodes
.push(SurfaceNode::DataStore(DataStore {
location: loc("app.py", 5),
kind: DataStoreKind::KeyValue,
label: "redis://127.0.0.1:6379".into(),
owner: String::new(),
access: Default::default(),
2026-06-05 10:16:30 -05:00
}));
let boosted = find_chains(
&[edge()],
&surface_with_listener,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(boosted.len(), 1);
assert_eq!(
boosted[0].implied_impact,
ImpactCategory::InternalNetworkAccess
);
let ratio = boosted[0].score / baseline[0].score;
assert!(
(ratio - LOCAL_LISTENER_BOOST).abs() < 1e-9,
"expected ×{LOCAL_LISTENER_BOOST} boost, got ratio={ratio}"
);
}
#[test]
fn score_threshold_drops_low_score_chains() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/r", false));
surface.nodes.push(sink("app.py", 20, "open", Cap::FILE_IO));
let e = edge_with(
"app.py",
10,
"test",
Cap::FILE_IO,
"/r",
HttpMethod::GET,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 1_000.0,
};
let chains = find_chains(&[e], &surface, cfg);
assert!(chains.is_empty());
}
/// Sink in a different file than the finding composes only when the
/// call-graph reach map records a transitive caller relationship.
#[test]
fn cross_file_chain_requires_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("routes.py", "/exec", false));
// Sink lives in a helper file the entry handler transitively
// reaches, not the entry file itself.
surface
.nodes
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"routes.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// No reach map: routes.py finding cannot compose against
// helper.py sink because `paths_overlap` rejects the pair.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file chain must not compose"
);
// Reach map: routes.py::handle calls helper.py::sink so
// helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen scope to include helper.py sink"
);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn duplicate_chains_from_shared_route_method_are_deduped() {
// Three unrelated handler files each declare POST /run. Each
// file holds one finding + one dangerous-local sink. Without
// the dedup pass, the per-entry candidate filter (route +
// method only) lets every entry claim every finding, and the
// sink-file scope filter then emits one chain per (entry,
// sink) pair — 3 chains per file × 3 files = 9 chains where
// each finding appears 3×. The wire format does not surface
// the entry, so the duplicates serialise byte-identically.
// `canonicalise` must drop them.
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("a.js", "/run", false));
surface.nodes.push(entry("b.js", "/run", false));
surface.nodes.push(entry("c.py", "/run", false));
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("c.py", 7, "eval", Cap::CODE_EXEC));
let edges = vec![
edge_with(
"a.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
edge_with(
"b.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
edge_with(
"c.py",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
];
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
assert_eq!(
chains.len(),
3,
"expected one chain per finding, not entries × findings",
);
let mut hashes: Vec<u64> = chains.iter().map(|c| c.stable_hash).collect();
hashes.sort();
hashes.dedup();
assert_eq!(
hashes.len(),
3,
"surviving chains must have distinct hashes"
);
}
/// File-affinity gate on `edge_reaches_entry`: an entry only
/// claims candidate findings that live in its own handler file
/// (or are reached from it via the call graph). Two unrelated
/// entries declaring the same (route, method) on different
/// files do not cross-claim each other's findings.
#[test]
fn entry_file_affinity_rejects_cross_file_findings_without_reach() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("a.js", "/run", false));
surface.nodes.push(entry("b.js", "/run", false));
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
// Single finding lives in a.js only. Both entries match
// route+method but only entry@a.js shares the file.
let edges = vec![edge_with(
"a.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
)];
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
assert_eq!(
chains.len(),
1,
"entry@b.js must not claim a finding in a.js without reach map",
);
assert_eq!(chains[0].sink.file, "a.js");
}
/// File-affinity gate widens through the call-graph reach map:
/// an entry whose handler reaches the finding's file (via the
/// `FileReachMap`) still claims the finding even when the
/// literal file suffixes differ.
#[test]
fn entry_file_affinity_widens_with_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
// Entry handler lives in routes.py. Finding lives in a
// helper file that routes.py transitively calls.
surface.nodes.push(entry("routes.py", "/run", false));
surface
.nodes
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"helper.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// Without a reach map the file-affinity gate rejects the
// entry/finding pairing.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file entry/finding pair must reject",
);
// Build a reach map where routes.py::handle calls
// helper.py::sink, so helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen entry-affinity to helper.py",
);
assert_eq!(chains[0].sink.file, "helper.py");
}
}