nyx/src/chain/search.rs

946 lines
34 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Phase 25 — bounded path search for exploit-chain composition.
//!
//! Path topology:
//!
//! ```text
//! Attacker (virtual) → EntryPoint → Finding* → Sink
//! ```
//!
//! The DFS starts at the implicit attacker node (virtually adjacent to
//! every [`crate::surface::EntryPoint`]), traverses up to [`max_depth`](ChainSearchConfig::max_depth)
//! per-finding hops, and terminates at any
//! [`crate::surface::DangerousLocal`] node. Each emitted
//! [`ChainFinding`] is the deterministic minimum-length path through a
//! given (entry, sink) pair.
//!
//! # Determinism
//!
//! 1. SurfaceMap nodes are canonicalised before search — every input
//! list (entries, sinks) is iterated in `SourceLocation` order.
//! 2. Candidate per-entry findings are sorted by
//! [`crate::chain::edges::FindingRef::stable_hash`] before DFS,
//! breaking ties by `rule_id` so collisions stay reproducible.
//! 3. The emitted chain list is sorted by `score` descending (ties
//! broken by `stable_hash` descending, then `implied_impact`
//! descending) before return.
//!
//! Running the same fixture 10× produces a byte-identical chain list.
//!
//! # Phase 24 follow-ups closed here
//!
//! - `BrowserToLocalRce` auth-gate predicate: when the lattice yields
//! `BrowserToLocalRce` from `HEADER_INJECTION + CODE_EXEC`, the path
//! is only kept when the entry's `auth_required` is `false`. Auth-
//! gated entries downgrade to the closest standalone impact.
//! - SSRF + LocalListener refinement: when the lattice yields
//! `InternalNetworkAccess` and the SurfaceMap exposes a local
//! listener (a [`crate::surface::DataStore`] / [`crate::surface::ExternalService`]
//! bound to a loopback host), the path is preserved; without a local
//! listener the chain is still emitted but scored lower (no boost).
//!
//! The "file-local reach → call-graph-aware reach" upgrade remains
//! deferred (see deferred.md): the DFS still treats two findings as
//! adjacent when they share a source file, mirroring Phase 24's
//! `findings_to_edges` reach resolver.
//!
//! Entry-to-finding affinity is enforced symmetrically: the
//! per-entry candidate filter requires the finding's source file to
//! overlap with the entry's `handler_location.file` (or a
//! call-graph reach hit) on top of the route+method match. Without
//! this gate, two entries that happen to share a (route, method) in
//! a monorepo would each claim every finding under that key,
//! producing `O(entries × findings)` phantom chains that the dedup
//! pass would then collapse.
use crate::callgraph::FileReachMap;
use crate::chain::edges::{ChainEdge, Reach};
use crate::chain::finding::{ChainFinding, ChainSink};
use crate::chain::impact::{ImpactCategory, lookup_impact};
use crate::chain::score::score_path;
use crate::labels::Cap;
use crate::surface::{DangerousLocal, EntryPoint, SurfaceMap, SurfaceNode};
/// Bounded-DFS search configuration.
#[derive(Debug, Clone, Copy)]
pub struct ChainSearchConfig {
/// Maximum number of per-finding hops in a single chain path.
/// `0` disables search (no chain is ever emitted).
pub max_depth: usize,
/// Drop chains whose score is strictly below this threshold.
pub min_score: f64,
}
impl Default for ChainSearchConfig {
fn default() -> Self {
Self {
max_depth: 4,
min_score: crate::chain::score::min_score_default(),
}
}
}
/// Result of one search pass: every chain whose score cleared
/// `cfg.min_score`, deterministically ordered.
pub fn find_chains(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
) -> Vec<ChainFinding> {
find_chains_with_reach(edges, surface, cfg, None)
}
/// Like [`find_chains`] but optionally consults a [`FileReachMap`] to
/// widen the per-entry-per-sink file-scope filter beyond literal
/// file-equality.
///
/// When `reach` is `Some`, a candidate edge is in scope for a given
/// sink whenever the finding's file *or* a transitive caller of it
/// reaches the sink's file via the call graph. `reach = None`
/// preserves the legacy file-local behaviour for callers that have
/// not yet wired the call-graph reach map.
pub fn find_chains_with_reach(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
reach: Option<&FileReachMap>,
) -> Vec<ChainFinding> {
if cfg.max_depth == 0 || edges.is_empty() {
return Vec::new();
}
let sinks = collect_sinks(surface);
let entries = collect_entries(surface);
let local_listener_present = has_local_listener(surface);
let mut chains: Vec<ChainFinding> = Vec::new();
for entry in &entries {
// Per-entry candidate edge slice: every edge whose reach
// points at this entry, sorted deterministically.
let mut candidates: Vec<&ChainEdge> = edges
.iter()
.filter(|e| edge_reaches_entry(e, entry, reach))
.collect();
candidates.sort_by(|a, b| {
(
a.finding.stable_hash,
&a.finding.rule_id,
&a.finding.location,
)
.cmp(&(
b.finding.stable_hash,
&b.finding.rule_id,
&b.finding.location,
))
});
for sink in &sinks {
// Scope candidates to the sink: same-file match (legacy),
// optionally widened by a call-graph-derived reach map so
// a finding in `internal_helper.py` whose enclosing
// function is reached only through `routes.py` still
// composes against a sink in `routes.py`.
let scoped: Vec<&ChainEdge> = candidates
.iter()
.filter(|e| {
paths_overlap(&e.finding.location.file, &sink.location.file)
|| reach.is_some_and(|r| {
r.reaches(&e.finding.location.file, &sink.location.file)
})
})
.copied()
.collect();
if let Some(chain) =
compose_chain(entry, sink, &scoped, cfg.max_depth, local_listener_present)
&& chain.score >= cfg.min_score
{
chains.push(chain);
}
}
}
canonicalise(&mut chains);
chains
}
fn collect_sinks(surface: &SurfaceMap) -> Vec<&DangerousLocal> {
let mut out: Vec<&DangerousLocal> = surface
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::DangerousLocal(d) => Some(d),
_ => None,
})
.collect();
out.sort_by(|a, b| (&a.location, &a.function_name).cmp(&(&b.location, &b.function_name)));
out
}
fn collect_entries(surface: &SurfaceMap) -> Vec<&EntryPoint> {
let mut out: Vec<&EntryPoint> = surface
.nodes
.iter()
.filter_map(|n| match n {
SurfaceNode::EntryPoint(e) => Some(e),
_ => None,
})
.collect();
out.sort_by(|a, b| (&a.location, &a.route).cmp(&(&b.location, &b.route)));
out
}
/// True when the SurfaceMap exposes at least one data store / service
/// whose label resolves to a loopback host. Used by the SSRF +
/// LocalListener refinement in [`compose_chain`].
fn has_local_listener(surface: &SurfaceMap) -> bool {
surface.nodes.iter().any(|n| match n {
SurfaceNode::DataStore(d) => is_loopback_label(&d.label),
SurfaceNode::ExternalService(s) => is_loopback_label(&s.label),
_ => false,
})
}
fn is_loopback_label(s: &str) -> bool {
let lower = s.to_ascii_lowercase();
lower.contains("127.0.0.1")
|| lower.contains("localhost")
|| lower.contains("0.0.0.0")
|| lower.starts_with("unix:")
|| lower.contains("://localhost")
}
fn edge_reaches_entry(edge: &ChainEdge, entry: &EntryPoint, reach: Option<&FileReachMap>) -> bool {
let route_method_match = match &edge.reach {
Reach::Reachable { route, method, .. } => *route == entry.route && *method == entry.method,
Reach::Unreachable => return false,
};
if !route_method_match {
return false;
}
// File-affinity gate: the entry's handler must live in (or
// transitively call into) the same file as the finding.
// Without this, multiple entries that happen to declare the
// same (route, method) — common in monorepos that ship
// several small services side-by-side — would each claim
// every finding, producing O(entries × findings) phantom
// chains. The same shape as the sink-scope filter below:
// literal file-suffix overlap first, fall back to the
// call-graph reach map.
let entry_file = &entry.handler_location.file;
let finding_file = &edge.finding.location.file;
paths_overlap(entry_file, finding_file)
|| reach.is_some_and(|r| r.reaches(entry_file, finding_file))
}
fn paths_overlap(a: &str, b: &str) -> bool {
if a == b {
return true;
}
// Strip leading directory components and compare suffix. Two
// representations of the same file (project-relative vs absolute)
// share a common trailing path segment.
let a_tail = a.rsplit('/').next().unwrap_or(a);
let b_tail = b.rsplit('/').next().unwrap_or(b);
a_tail == b_tail && !a_tail.is_empty()
}
/// Build a single chain for one (entry, sink) pair.
///
/// Bounded DFS: take the longest deterministic prefix of `scoped` up
/// to `max_depth`, then pick the highest-severity lattice match
/// across every (member_cap, sink_cap) pair. Returning all in-scope
/// edges as members matches the design doc's three-member output for
/// the `CORS + NoAuth + websocket → shell tool` scenario; using the
/// best impact across all pairs ensures `HEADER_INJECTION + CODE_EXEC`
/// lights up `BrowserToLocalRce` even when an unrelated finding (e.g.
/// the standalone auth-gap diagnostic) is sorted first.
fn compose_chain(
entry: &EntryPoint,
sink: &DangerousLocal,
scoped: &[&ChainEdge],
max_depth: usize,
local_listener_present: bool,
) -> Option<ChainFinding> {
if scoped.is_empty() {
return None;
}
let bound = scoped.len().min(max_depth);
let path: Vec<&ChainEdge> = scoped[..bound].to_vec();
let sink_cap = sole_cap(sink.cap_bits)?;
let (impact, member_impacts) = resolve_impact(&path, sink_cap, entry, local_listener_present)?;
let mut chain = build_chain(entry, sink, &path, impact, &member_impacts);
// SSRF + LocalListener refinement (Phase 24 deferred close): when
// the implied impact is `InternalNetworkAccess` AND the SurfaceMap
// exposes a loopback listener, the chain is more concrete than the
// bare lattice match — lift the score so it ranks above SSRF chains
// without a corroborating in-process target.
if impact == ImpactCategory::InternalNetworkAccess && local_listener_present {
chain.score *= LOCAL_LISTENER_BOOST;
}
Some(chain)
}
/// Score multiplier applied when an `InternalNetworkAccess` chain has
/// a corroborating loopback listener in the SurfaceMap. Calibrated to
/// lift the chain above an otherwise-identical SSRF chain that lacks
/// the listener context, without overtaking strictly more severe
/// categories.
const LOCAL_LISTENER_BOOST: f64 = 1.5;
/// Pick the lowest-bit single [`Cap`] from `bits`, or `None` when no
/// bit is set. Sinks in the SurfaceMap may carry multi-bit
/// `cap_bits`; the DFS terminates against the lowest single bit so
/// downstream lattice lookups stay deterministic.
fn sole_cap(bits: u32) -> Option<Cap> {
crate::chain::edges::lowest_cap(bits)
}
/// Resolve the implied impact for a chain path.
///
/// Walks every (member.primary_cap, sink_cap) pair and picks the
/// highest-severity lattice match. Returns `None` when no member +
/// sink pair lights up a rule and the sink cap has no standalone
/// rule either.
///
/// Auth gate: `BrowserToLocalRce` only fires when the entry's
/// `auth_required` is `false`. Authenticated entries fall through
/// to the next-best impact (typically `CODE_EXEC → Rce`).
fn resolve_impact(
path: &[&ChainEdge],
sink_cap: Cap,
entry: &EntryPoint,
_local_listener_present: bool,
) -> Option<(ImpactCategory, Vec<ImpactCategory>)> {
let mut best: Option<ImpactCategory> = None;
for member in path {
if let Some(cat) = lookup_impact(member.primary_cap, Some(sink_cap)) {
if cat == ImpactCategory::BrowserToLocalRce && entry.auth_required {
// Auth gate: this rule cannot fire when the entry is
// authed. Keep walking — another pair may light up
// a different rule.
continue;
}
best = Some(match best {
Some(prev) => more_severe(prev, cat),
None => cat,
});
}
}
// Fall through to standalone on the sink cap when no pair lit up.
if best.is_none() {
best = lookup_impact(sink_cap, None);
}
best.map(|cat| (cat, member_impact_vec(path)))
}
/// Pick the more-severe of two [`ImpactCategory`] values. Severity
/// ordering matches the design doc's lattice criticality:
/// `BrowserToLocalRce > Rce > SessionHijack > InternalNetworkAccess > InfoDisclosure`.
fn more_severe(a: ImpactCategory, b: ImpactCategory) -> ImpactCategory {
if severity_rank(a) >= severity_rank(b) {
a
} else {
b
}
}
fn severity_rank(c: ImpactCategory) -> u8 {
match c {
ImpactCategory::BrowserToLocalRce => 5,
ImpactCategory::Rce => 4,
ImpactCategory::SessionHijack => 3,
ImpactCategory::InternalNetworkAccess => 2,
ImpactCategory::InfoDisclosure => 1,
}
}
fn member_impact_vec(path: &[&ChainEdge]) -> Vec<ImpactCategory> {
path.iter()
.filter_map(|e| crate::chain::standalone_impact(e.primary_cap))
.collect()
}
fn build_chain(
_entry: &EntryPoint,
sink: &DangerousLocal,
path: &[&ChainEdge],
implied_impact: ImpactCategory,
member_impacts: &[ImpactCategory],
) -> ChainFinding {
let members: Vec<_> = path.iter().map(|e| e.finding.clone()).collect();
let stable_hash = ChainFinding::compute_stable_hash(&members, implied_impact);
let owned_edges: Vec<ChainEdge> = path.iter().map(|e| (*e).clone()).collect();
let score = score_path(member_impacts, implied_impact, &owned_edges);
let severity = crate::output::severity::chain_severity(implied_impact, &owned_edges);
let dynamic_verdict = composite_dynamic_verdict(&owned_edges);
ChainFinding {
stable_hash,
members,
sink: ChainSink {
file: sink.location.file.clone(),
line: sink.location.line,
col: sink.location.col,
function_name: sink.function_name.clone(),
cap_bits: sink.cap_bits,
},
implied_impact,
severity,
score,
dynamic_verdict,
reverify_reason: None,
}
}
/// Phase 25 placeholder for composite verification. When *every*
/// member edge has `Feasibility::Confirmed` the composite verdict
/// inherits that confirmation; otherwise `None` (Phase 26 will run a
/// real composite re-verification pass).
fn composite_dynamic_verdict(_path: &[ChainEdge]) -> Option<crate::evidence::VerifyResult> {
None
}
fn canonicalise(chains: &mut Vec<ChainFinding>) {
chains.sort_by(|a, b| {
b.score
.partial_cmp(&a.score)
.unwrap_or(std::cmp::Ordering::Equal)
.then(b.stable_hash.cmp(&a.stable_hash))
.then(b.implied_impact.cmp(&a.implied_impact))
});
// Drop duplicates: two chains with the same stable_hash and the
// same terminal sink serialise byte-identically (stable_hash is a
// function of members + implied_impact, and the wire format
// exposes only members, sink, impact, severity, score). They arise
// when multiple entry-points share a (route, method) but are
// otherwise unrelated (e.g. monorepos, or a scan covering multiple
// small apps), each claiming the same finding via the route-only
// candidate filter in `find_chains_with_reach`. Keep the first
// occurrence after the sort above; the sort is total enough that
// the survivor is deterministic.
chains.dedup_by(|a, b| a.stable_hash == b.stable_hash && a.sink == b.sink);
}
// Manual Ord/PartialOrd for ImpactCategory so the canonicalise
// tie-break has a total order. Defined here rather than in `impact`
// to avoid leaking ordering into the public type.
impl PartialOrd for ImpactCategory {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ImpactCategory {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
(*self as u8).cmp(&(*other as u8))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::chain::ChainSeverity;
use crate::chain::edges::FindingRef;
use crate::chain::feasibility::Feasibility;
use crate::entry_points::HttpMethod;
use crate::labels::Cap;
use crate::surface::{
DangerousLocal, EntryPoint, Framework, SourceLocation, SurfaceMap, SurfaceNode,
};
fn loc(file: &str, line: u32) -> SourceLocation {
SourceLocation::new(file, line, 1)
}
fn entry(file: &str, route: &str, auth: bool) -> SurfaceNode {
SurfaceNode::EntryPoint(EntryPoint {
location: loc(file, 1),
framework: Framework::Flask,
method: HttpMethod::POST,
route: route.into(),
handler_name: "h".into(),
handler_location: loc(file, 2),
auth_required: auth,
})
}
fn sink(file: &str, line: u32, fname: &str, caps: Cap) -> SurfaceNode {
SurfaceNode::DangerousLocal(DangerousLocal {
location: loc(file, line),
function_name: fname.into(),
cap_bits: caps.bits(),
label: String::new(),
})
}
fn edge_with(
file: &str,
line: u32,
rule: &str,
cap: Cap,
route: &str,
method: HttpMethod,
feas: Feasibility,
) -> ChainEdge {
ChainEdge {
finding: FindingRef {
finding_id: format!("{rule}-{line}"),
stable_hash: blake3::hash(format!("{rule}:{file}:{line}").as_bytes()).as_bytes()
[..8]
.try_into()
.map(u64::from_le_bytes)
.unwrap(),
location: loc(file, line),
rule_id: rule.into(),
cap_bits: cap.bits(),
},
primary_cap: cap,
reach: Reach::Reachable {
location: loc(file, 1),
method,
route: route.into(),
auth_required: false,
},
feasibility: feas,
}
}
#[test]
fn returns_empty_when_no_findings() {
let surface = SurfaceMap::new();
let result = find_chains(&[], &surface, ChainSearchConfig::default());
assert!(result.is_empty());
}
#[test]
fn standalone_codeexec_via_unauthed_entry_emits_rce_chain() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/exec", false));
surface
.nodes
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"app.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Confirmed,
);
let chains = find_chains(&[e], &surface, ChainSearchConfig::default());
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn header_injection_plus_codeexec_via_unauthed_entry_is_browser_local_rce() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/ws", false));
surface
.nodes
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
let cors = edge_with(
"app.py",
10,
"cfg-cors-allow-all",
Cap::HEADER_INJECTION,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let exec = edge_with(
"app.py",
20,
"taint-codeexec",
Cap::CODE_EXEC,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let chains = find_chains(
&[cors, exec],
&surface,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::BrowserToLocalRce);
assert_eq!(chains[0].severity, ChainSeverity::Critical);
}
#[test]
fn authed_entry_downgrades_browser_local_rce_to_rce() {
let mut surface = SurfaceMap::new();
// Same fixture but entry is authed — should NOT light up
// BrowserToLocalRce.
surface.nodes.push(entry("app.py", "/ws", true));
surface
.nodes
.push(sink("app.py", 30, "shell.exec", Cap::CODE_EXEC));
let cors = edge_with(
"app.py",
10,
"cfg-cors-allow-all",
Cap::HEADER_INJECTION,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let exec = edge_with(
"app.py",
20,
"taint-codeexec",
Cap::CODE_EXEC,
"/ws",
HttpMethod::POST,
Feasibility::Unverified,
);
let chains = find_chains(
&[cors, exec],
&surface,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(chains.len(), 1);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn determinism_across_runs() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/exec", false));
surface
.nodes
.push(sink("app.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"app.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Confirmed,
);
let cfg = ChainSearchConfig::default();
let first = find_chains(std::slice::from_ref(&e), &surface, cfg);
let first_hashes: Vec<u64> = first.iter().map(|c| c.stable_hash).collect();
for _ in 0..9 {
let again = find_chains(std::slice::from_ref(&e), &surface, cfg);
let again_hashes: Vec<u64> = again.iter().map(|c| c.stable_hash).collect();
assert_eq!(again_hashes, first_hashes);
}
}
#[test]
fn ssrf_with_local_listener_scores_higher_than_without() {
use crate::surface::{DataStore, DataStoreKind};
let edge = || -> ChainEdge {
edge_with(
"app.py",
10,
"taint-ssrf",
Cap::SSRF,
"/fetch",
HttpMethod::POST,
Feasibility::Confirmed,
)
};
let mut surface_no_listener = SurfaceMap::new();
surface_no_listener
.nodes
.push(entry("app.py", "/fetch", false));
surface_no_listener
.nodes
.push(sink("app.py", 20, "requests.get", Cap::SSRF));
let baseline = find_chains(
&[edge()],
&surface_no_listener,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(baseline.len(), 1);
assert_eq!(
baseline[0].implied_impact,
ImpactCategory::InternalNetworkAccess
);
let mut surface_with_listener = surface_no_listener.clone();
surface_with_listener
.nodes
.push(SurfaceNode::DataStore(DataStore {
location: loc("app.py", 5),
kind: DataStoreKind::KeyValue,
label: "redis://127.0.0.1:6379".into(),
owner: String::new(),
access: Default::default(),
}));
let boosted = find_chains(
&[edge()],
&surface_with_listener,
ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
},
);
assert_eq!(boosted.len(), 1);
assert_eq!(
boosted[0].implied_impact,
ImpactCategory::InternalNetworkAccess
);
let ratio = boosted[0].score / baseline[0].score;
assert!(
(ratio - LOCAL_LISTENER_BOOST).abs() < 1e-9,
"expected ×{LOCAL_LISTENER_BOOST} boost, got ratio={ratio}"
);
}
#[test]
fn score_threshold_drops_low_score_chains() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("app.py", "/r", false));
surface.nodes.push(sink("app.py", 20, "open", Cap::FILE_IO));
let e = edge_with(
"app.py",
10,
"test",
Cap::FILE_IO,
"/r",
HttpMethod::GET,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 1_000.0,
};
let chains = find_chains(&[e], &surface, cfg);
assert!(chains.is_empty());
}
/// Sink in a different file than the finding composes only when the
/// call-graph reach map records a transitive caller relationship.
#[test]
fn cross_file_chain_requires_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("routes.py", "/exec", false));
// Sink lives in a helper file the entry handler transitively
// reaches, not the entry file itself.
surface
.nodes
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"routes.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// No reach map: routes.py finding cannot compose against
// helper.py sink because `paths_overlap` rejects the pair.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file chain must not compose"
);
// Reach map: routes.py::handle calls helper.py::sink so
// helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen scope to include helper.py sink"
);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
#[test]
fn duplicate_chains_from_shared_route_method_are_deduped() {
// Three unrelated handler files each declare POST /run. Each
// file holds one finding + one dangerous-local sink. Without
// the dedup pass, the per-entry candidate filter (route +
// method only) lets every entry claim every finding, and the
// sink-file scope filter then emits one chain per (entry,
// sink) pair — 3 chains per file × 3 files = 9 chains where
// each finding appears 3×. The wire format does not surface
// the entry, so the duplicates serialise byte-identically.
// `canonicalise` must drop them.
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("a.js", "/run", false));
surface.nodes.push(entry("b.js", "/run", false));
surface.nodes.push(entry("c.py", "/run", false));
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("c.py", 7, "eval", Cap::CODE_EXEC));
let edges = vec![
edge_with(
"a.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
edge_with(
"b.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
edge_with(
"c.py",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
),
];
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
assert_eq!(
chains.len(),
3,
"expected one chain per finding, not entries × findings",
);
let mut hashes: Vec<u64> = chains.iter().map(|c| c.stable_hash).collect();
hashes.sort();
hashes.dedup();
assert_eq!(
hashes.len(),
3,
"surviving chains must have distinct hashes"
);
}
/// File-affinity gate on `edge_reaches_entry`: an entry only
/// claims candidate findings that live in its own handler file
/// (or are reached from it via the call graph). Two unrelated
/// entries declaring the same (route, method) on different
/// files do not cross-claim each other's findings.
#[test]
fn entry_file_affinity_rejects_cross_file_findings_without_reach() {
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("a.js", "/run", false));
surface.nodes.push(entry("b.js", "/run", false));
surface.nodes.push(sink("a.js", 7, "eval", Cap::CODE_EXEC));
surface.nodes.push(sink("b.js", 7, "eval", Cap::CODE_EXEC));
// Single finding lives in a.js only. Both entries match
// route+method but only entry@a.js shares the file.
let edges = vec![edge_with(
"a.js",
7,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
)];
let chains = find_chains(&edges, &surface, ChainSearchConfig::default());
assert_eq!(
chains.len(),
1,
"entry@b.js must not claim a finding in a.js without reach map",
);
assert_eq!(chains[0].sink.file, "a.js");
}
/// File-affinity gate widens through the call-graph reach map:
/// an entry whose handler reaches the finding's file (via the
/// `FileReachMap`) still claims the finding even when the
/// literal file suffixes differ.
#[test]
fn entry_file_affinity_widens_with_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
// Entry handler lives in routes.py. Finding lives in a
// helper file that routes.py transitively calls.
surface.nodes.push(entry("routes.py", "/run", false));
surface
.nodes
.push(sink("helper.py", 20, "os.system", Cap::CODE_EXEC));
let e = edge_with(
"helper.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/run",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// Without a reach map the file-affinity gate rejects the
// entry/finding pairing.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file entry/finding pair must reject",
);
// Build a reach map where routes.py::handle calls
// helper.py::sink, so helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen entry-affinity to helper.py",
);
assert_eq!(chains[0].sink.file, "helper.py");
}
}