mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-09 19:45:13 +02:00
[pitboss/grind] deferred session-0005 (20260516T052512Z-20f8)
This commit is contained in:
parent
3e08382a3f
commit
bf8e61ffdb
6 changed files with 411 additions and 19 deletions
163
src/callgraph.rs
163
src/callgraph.rs
|
|
@ -863,6 +863,100 @@ pub fn callers_of(cg: &CallGraph, callee: &FuncKey) -> Vec<FuncKey> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Reverse-edge BFS: return every [`FuncKey`] that *transitively* calls
|
||||
/// `callee`, i.e. the union of [`callers_of`] applied recursively until
|
||||
/// the reverse frontier is exhausted.
|
||||
///
|
||||
/// Used by the chain composer to widen file-scoped reach: a sink inside
|
||||
/// `internal_helper.py` whose enclosing function is reached only through
|
||||
/// `routes.py` is *reachable* in the chain sense, but the file-local
|
||||
/// match in [`crate::chain::edges::locate_reach`] / [`crate::chain::search::compose_chain`]
|
||||
/// misses it. This helper produces the closure once so callers can
|
||||
/// resolve reach in O(1) afterwards.
|
||||
///
|
||||
/// Excludes `callee` itself from the returned set, matching the
|
||||
/// "strictly upstream" semantics callers want. Empty when `callee` is
|
||||
/// unknown to the graph.
|
||||
///
|
||||
/// Cost: O(V + E) BFS from `callee`'s reverse frontier; bounded by the
|
||||
/// connected component size.
|
||||
pub fn callers_transitive(cg: &CallGraph, callee: &FuncKey) -> std::collections::HashSet<FuncKey> {
|
||||
let mut seen: std::collections::HashSet<FuncKey> = std::collections::HashSet::new();
|
||||
let Some(&start) = cg.index.get(callee) else {
|
||||
return seen;
|
||||
};
|
||||
let mut frontier: Vec<NodeIndex> = cg
|
||||
.graph
|
||||
.neighbors_directed(start, petgraph::Direction::Incoming)
|
||||
.collect();
|
||||
while let Some(node) = frontier.pop() {
|
||||
let key = cg.graph[node].clone();
|
||||
if !seen.insert(key) {
|
||||
continue;
|
||||
}
|
||||
for next in cg
|
||||
.graph
|
||||
.neighbors_directed(node, petgraph::Direction::Incoming)
|
||||
{
|
||||
if !seen.contains(&cg.graph[next]) {
|
||||
frontier.push(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
seen
|
||||
}
|
||||
|
||||
/// File-level transitive reach map built from a [`CallGraph`].
|
||||
///
|
||||
/// For each `namespace` (file path) in the graph, records every other
|
||||
/// namespace that contains at least one transitive caller. Built once
|
||||
/// per scan so the chain composer can widen a finding's
|
||||
/// `Reach::Reachable` decision beyond the file-local heuristic in
|
||||
/// [`crate::chain::edges::locate_reach`] without re-running BFS per
|
||||
/// finding.
|
||||
///
|
||||
/// Map shape: `callee_namespace → { caller_namespace, … }`. A file
|
||||
/// always appears in its own caller set so intra-file recursion stays
|
||||
/// reachable.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
pub struct FileReachMap {
|
||||
by_callee_ns: HashMap<String, std::collections::HashSet<String>>,
|
||||
}
|
||||
|
||||
impl FileReachMap {
|
||||
/// Build the map from every function's reverse transitive closure.
|
||||
///
|
||||
/// O(V × (V + E)) worst case, but the per-function BFS is sparse on
|
||||
/// real call graphs (median in-degree < 4 on the eval corpus).
|
||||
pub fn build(cg: &CallGraph) -> Self {
|
||||
let mut by_callee_ns: HashMap<String, std::collections::HashSet<String>> = HashMap::new();
|
||||
for callee in cg.index.keys() {
|
||||
let entry = by_callee_ns.entry(callee.namespace.clone()).or_default();
|
||||
entry.insert(callee.namespace.clone());
|
||||
for caller in callers_transitive(cg, callee) {
|
||||
entry.insert(caller.namespace);
|
||||
}
|
||||
}
|
||||
FileReachMap { by_callee_ns }
|
||||
}
|
||||
|
||||
/// True when `caller_ns` transitively reaches at least one function
|
||||
/// defined in `callee_ns`. False when either namespace is unknown
|
||||
/// to the graph (conservative: chain composer falls back to the
|
||||
/// file-local heuristic).
|
||||
pub fn reaches(&self, caller_ns: &str, callee_ns: &str) -> bool {
|
||||
self.by_callee_ns
|
||||
.get(callee_ns)
|
||||
.is_some_and(|set| set.contains(caller_ns))
|
||||
}
|
||||
|
||||
/// Number of distinct callee namespaces tracked. Exposed for
|
||||
/// diagnostics / tests.
|
||||
pub fn callee_ns_len(&self) -> usize {
|
||||
self.by_callee_ns.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the set of file namespaces that must be re-analysed when a
|
||||
/// given set of callee [`FuncKey`]s have had their summaries refined.
|
||||
///
|
||||
|
|
@ -2799,4 +2893,73 @@ mod tests {
|
|||
assert!(cg.unresolved_not_found.is_empty());
|
||||
assert!(cg.unresolved_ambiguous.is_empty());
|
||||
}
|
||||
|
||||
// ── callers_transitive + FileReachMap ───────────────────────────────
|
||||
|
||||
/// Three-hop chain across three files:
|
||||
/// `routes.py::handle -> service.py::process -> helper.py::sink`
|
||||
/// `callers_transitive(sink)` must return both `process` and `handle`.
|
||||
/// `FileReachMap` must record `routes.py` and `service.py` as callers
|
||||
/// of `helper.py`.
|
||||
#[test]
|
||||
fn callers_transitive_walks_multi_hop_chain() {
|
||||
let handle = make_summary("handle", "routes.py", "python", 0, vec!["process"]);
|
||||
let process = make_summary("process", "service.py", "python", 0, vec!["sink"]);
|
||||
let sink = make_summary("sink", "helper.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![handle, process, sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
|
||||
let sink_key = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "helper.py".into(),
|
||||
name: "sink".into(),
|
||||
arity: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
let transitive = callers_transitive(&cg, &sink_key);
|
||||
let caller_names: std::collections::HashSet<String> =
|
||||
transitive.iter().map(|k| k.name.clone()).collect();
|
||||
assert!(caller_names.contains("process"), "process should reach sink");
|
||||
assert!(caller_names.contains("handle"), "handle should reach sink");
|
||||
assert_eq!(transitive.len(), 2, "sink itself must be excluded");
|
||||
|
||||
let reach = FileReachMap::build(&cg);
|
||||
assert!(reach.reaches("routes.py", "helper.py"));
|
||||
assert!(reach.reaches("service.py", "helper.py"));
|
||||
assert!(reach.reaches("helper.py", "helper.py"), "self-reach");
|
||||
assert!(!reach.reaches("helper.py", "routes.py"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callers_transitive_empty_for_unknown_key() {
|
||||
let leaf = make_summary("leaf", "a.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![leaf], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let ghost = FuncKey {
|
||||
lang: Lang::Python,
|
||||
namespace: "nowhere.py".into(),
|
||||
name: "ghost".into(),
|
||||
arity: Some(0),
|
||||
..Default::default()
|
||||
};
|
||||
assert!(callers_transitive(&cg, &ghost).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_reach_map_handles_disconnected_components() {
|
||||
let a_caller = make_summary("a_caller", "a.py", "python", 0, vec!["a_sink"]);
|
||||
let a_sink = make_summary("a_sink", "a.py", "python", 0, vec![]);
|
||||
let b_caller = make_summary("b_caller", "b.py", "python", 0, vec!["b_sink"]);
|
||||
let b_sink = make_summary("b_sink", "b.py", "python", 0, vec![]);
|
||||
let gs = merge_summaries(vec![a_caller, a_sink, b_caller, b_sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
|
||||
assert!(reach.reaches("a.py", "a.py"));
|
||||
assert!(reach.reaches("b.py", "b.py"));
|
||||
// Disconnected: a.py does not reach b.py.
|
||||
assert!(!reach.reaches("a.py", "b.py"));
|
||||
assert!(!reach.reaches("b.py", "a.py"));
|
||||
assert_eq!(reach.callee_ns_len(), 2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
//! search or do call-graph traversal: edges are emitted at finding
|
||||
//! granularity and carry only the file-local reach hint.
|
||||
|
||||
use crate::callgraph::FileReachMap;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::labels::Cap;
|
||||
|
|
@ -94,13 +95,39 @@ pub struct ChainEdge {
|
|||
/// The output order mirrors `findings`; the caller is responsible for
|
||||
/// any further canonicalisation.
|
||||
pub fn findings_to_edges(findings: &[Diag], surface: &SurfaceMap) -> Vec<ChainEdge> {
|
||||
findings_to_edges_with_reach(findings, surface, None)
|
||||
}
|
||||
|
||||
/// Like [`findings_to_edges`] but optionally consults a [`FileReachMap`]
|
||||
/// to widen `Reach::Reachable` beyond the file-local match.
|
||||
///
|
||||
/// When `reach` is `Some`, a finding's enclosing file is also considered
|
||||
/// `Reachable` whenever any [`SurfaceNode::EntryPoint`]'s
|
||||
/// `handler_location.file` transitively reaches the finding's file via
|
||||
/// the call graph. The first matching entry-point (surface-canonical
|
||||
/// order) is used to populate the `route` / `method` / `auth_required`
|
||||
/// fields.
|
||||
///
|
||||
/// `reach = None` is byte-identical to the legacy [`findings_to_edges`]
|
||||
/// behaviour. Path strings on both sides must use the same convention
|
||||
/// (project-relative POSIX) for the widening to fire; mismatched paths
|
||||
/// silently fall through to the file-local heuristic.
|
||||
pub fn findings_to_edges_with_reach(
|
||||
findings: &[Diag],
|
||||
surface: &SurfaceMap,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Vec<ChainEdge> {
|
||||
findings
|
||||
.iter()
|
||||
.filter_map(|d| build_edge(d, surface))
|
||||
.filter_map(|d| build_edge(d, surface, reach))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option<ChainEdge> {
|
||||
fn build_edge(
|
||||
diag: &Diag,
|
||||
surface: &SurfaceMap,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Option<ChainEdge> {
|
||||
let evidence = diag.evidence.as_ref()?;
|
||||
if evidence.sink_caps == 0 {
|
||||
return None;
|
||||
|
|
@ -108,7 +135,7 @@ fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option<ChainEdge> {
|
|||
let cap_bits = evidence.sink_caps;
|
||||
let primary_cap = pick_chain_cap(cap_bits)?;
|
||||
let location = SourceLocation::new(diag.path.clone(), diag.line as u32, diag.col as u32);
|
||||
let reach = locate_reach(&location, surface);
|
||||
let reach_kind = locate_reach(&location, surface, reach);
|
||||
let feasibility = Feasibility::for_finding(diag);
|
||||
let finding = FindingRef {
|
||||
finding_id: diag.finding_id.clone(),
|
||||
|
|
@ -120,7 +147,7 @@ fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option<ChainEdge> {
|
|||
Some(ChainEdge {
|
||||
finding,
|
||||
primary_cap,
|
||||
reach,
|
||||
reach: reach_kind,
|
||||
feasibility,
|
||||
})
|
||||
}
|
||||
|
|
@ -164,7 +191,12 @@ pub fn pick_chain_cap(bits: u32) -> Option<Cap> {
|
|||
lowest_cap(bits)
|
||||
}
|
||||
|
||||
fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap) -> Reach {
|
||||
fn locate_reach(
|
||||
loc: &SourceLocation,
|
||||
surface: &SurfaceMap,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Reach {
|
||||
// Pass 1: file-local match (legacy behaviour, always applies).
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node {
|
||||
if ep.handler_location.file == loc.file {
|
||||
|
|
@ -177,6 +209,23 @@ fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap) -> Reach {
|
|||
}
|
||||
}
|
||||
}
|
||||
// Pass 2: transitive caller match via the call graph. Only fires
|
||||
// when `reach` is supplied — keeps the legacy file-local behaviour
|
||||
// for callers that have not yet wired the call-graph reach map.
|
||||
if let Some(reach) = reach {
|
||||
for node in &surface.nodes {
|
||||
if let SurfaceNode::EntryPoint(ep) = node {
|
||||
if reach.reaches(&ep.handler_location.file, &loc.file) {
|
||||
return Reach::Reachable {
|
||||
location: ep.location.clone(),
|
||||
method: ep.method,
|
||||
route: ep.route.clone(),
|
||||
auth_required: ep.auth_required,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reach::Unreachable
|
||||
}
|
||||
|
||||
|
|
@ -247,4 +296,61 @@ mod tests {
|
|||
assert_eq!(edges.len(), 1);
|
||||
assert!(matches!(edges[0].reach, Reach::Unreachable));
|
||||
}
|
||||
|
||||
/// Cross-file finding becomes Reachable when the call-graph reach
|
||||
/// map records a transitive caller in the entry-point's file.
|
||||
#[test]
|
||||
fn reach_widens_with_file_reach_map() {
|
||||
use crate::callgraph::{FileReachMap, build_call_graph};
|
||||
use crate::entry_points::HttpMethod;
|
||||
use crate::summary::{FuncSummary, merge_summaries};
|
||||
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
|
||||
|
||||
// routes.py::handle -> helper.py::sink
|
||||
let handle = FuncSummary {
|
||||
name: "handle".into(),
|
||||
file_path: "routes.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![crate::summary::CalleeSite::bare("sink")],
|
||||
..Default::default()
|
||||
};
|
||||
let sink = FuncSummary {
|
||||
name: "sink".into(),
|
||||
file_path: "helper.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
let gs = merge_summaries(vec![handle, sink], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
|
||||
location: SourceLocation::new("routes.py", 1, 1),
|
||||
framework: Framework::Flask,
|
||||
method: HttpMethod::GET,
|
||||
route: "/".into(),
|
||||
handler_name: "handle".into(),
|
||||
handler_location: SourceLocation::new("routes.py", 2, 1),
|
||||
auth_required: false,
|
||||
}));
|
||||
|
||||
let d = diag_with_cap("helper.py", 10, Cap::CODE_EXEC);
|
||||
|
||||
// Without reach: file-local lookup leaves the finding Unreachable.
|
||||
let edges = findings_to_edges(&[d.clone()], &surface);
|
||||
assert!(matches!(edges[0].reach, Reach::Unreachable));
|
||||
|
||||
// With reach: transitive caller in `routes.py` lifts to Reachable.
|
||||
let edges = findings_to_edges_with_reach(&[d], &surface, Some(&reach));
|
||||
match &edges[0].reach {
|
||||
Reach::Reachable { route, method, .. } => {
|
||||
assert_eq!(route, "/");
|
||||
assert_eq!(*method, HttpMethod::GET);
|
||||
}
|
||||
other => panic!("expected Reachable, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ pub mod reverify;
|
|||
pub mod score;
|
||||
pub mod search;
|
||||
|
||||
pub use edges::{ChainEdge, FindingRef, findings_to_edges};
|
||||
pub use edges::{ChainEdge, FindingRef, findings_to_edges, findings_to_edges_with_reach};
|
||||
pub use feasibility::Feasibility;
|
||||
pub use finding::{ChainFinding, ChainMember, ChainSeverity, ChainSink};
|
||||
pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact};
|
||||
|
|
@ -51,7 +51,7 @@ pub use reverify::{
|
|||
reverify_chain_with, reverify_top_chains, reverify_top_chains_with,
|
||||
};
|
||||
pub use score::{ChainScoreConfig, category_weight, min_score_default, score_path};
|
||||
pub use search::{ChainSearchConfig, find_chains};
|
||||
pub use search::{ChainSearchConfig, find_chains, find_chains_with_reach};
|
||||
|
||||
/// One node in a [`ChainGraph`].
|
||||
///
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@
|
|||
//! adjacent when they share a source file, mirroring Phase 24's
|
||||
//! `findings_to_edges` reach resolver.
|
||||
|
||||
use crate::callgraph::FileReachMap;
|
||||
use crate::chain::edges::{ChainEdge, Reach};
|
||||
use crate::chain::finding::{ChainFinding, ChainSink};
|
||||
use crate::chain::impact::{ImpactCategory, lookup_impact};
|
||||
|
|
@ -75,6 +76,24 @@ pub fn find_chains(
|
|||
edges: &[ChainEdge],
|
||||
surface: &SurfaceMap,
|
||||
cfg: ChainSearchConfig,
|
||||
) -> Vec<ChainFinding> {
|
||||
find_chains_with_reach(edges, surface, cfg, None)
|
||||
}
|
||||
|
||||
/// Like [`find_chains`] but optionally consults a [`FileReachMap`] to
|
||||
/// widen the per-entry-per-sink file-scope filter beyond literal
|
||||
/// file-equality.
|
||||
///
|
||||
/// When `reach` is `Some`, a candidate edge is in scope for a given
|
||||
/// sink whenever the finding's file *or* a transitive caller of it
|
||||
/// reaches the sink's file via the call graph. `reach = None`
|
||||
/// preserves the legacy file-local behaviour for callers that have
|
||||
/// not yet wired the call-graph reach map.
|
||||
pub fn find_chains_with_reach(
|
||||
edges: &[ChainEdge],
|
||||
surface: &SurfaceMap,
|
||||
cfg: ChainSearchConfig,
|
||||
reach: Option<&FileReachMap>,
|
||||
) -> Vec<ChainFinding> {
|
||||
if cfg.max_depth == 0 || edges.is_empty() {
|
||||
return Vec::new();
|
||||
|
|
@ -96,18 +115,18 @@ pub fn find_chains(
|
|||
.cmp(&(b.finding.stable_hash, &b.finding.rule_id, &b.finding.location))
|
||||
});
|
||||
for sink in &sinks {
|
||||
// Phase 25 limits per-entry-per-sink search to those
|
||||
// candidates that share a file with the sink. Phase 25's
|
||||
// deferred call-graph follow-up will widen this.
|
||||
// Scope candidates to the sink: same-file match (legacy),
|
||||
// optionally widened by a call-graph-derived reach map so
|
||||
// a finding in `internal_helper.py` whose enclosing
|
||||
// function is reached only through `routes.py` still
|
||||
// composes against a sink in `routes.py`.
|
||||
let scoped: Vec<&ChainEdge> = candidates
|
||||
.iter()
|
||||
.filter(|e| {
|
||||
// Surface DangerousLocal location uses POSIX path;
|
||||
// the per-finding location is whatever the analyser
|
||||
// recorded. Match on the trailing path segment so
|
||||
// a project-relative vs absolute mismatch does not
|
||||
// gate the chain.
|
||||
paths_overlap(&e.finding.location.file, &sink.location.file)
|
||||
|| reach.is_some_and(|r| {
|
||||
r.reaches(&e.finding.location.file, &sink.location.file)
|
||||
})
|
||||
})
|
||||
.copied()
|
||||
.collect();
|
||||
|
|
@ -651,4 +670,74 @@ mod tests {
|
|||
let chains = find_chains(&[e], &surface, cfg);
|
||||
assert!(chains.is_empty());
|
||||
}
|
||||
|
||||
/// Sink in a different file than the finding composes only when the
|
||||
/// call-graph reach map records a transitive caller relationship.
|
||||
#[test]
|
||||
fn cross_file_chain_requires_reach_map() {
|
||||
use crate::callgraph::{FileReachMap, build_call_graph};
|
||||
use crate::summary::{FuncSummary, merge_summaries};
|
||||
|
||||
let mut surface = SurfaceMap::new();
|
||||
surface.nodes.push(entry("routes.py", "/exec", false));
|
||||
// Sink lives in a helper file the entry handler transitively
|
||||
// reaches, not the entry file itself.
|
||||
surface.nodes.push(sink(
|
||||
"helper.py",
|
||||
20,
|
||||
"os.system",
|
||||
Cap::CODE_EXEC,
|
||||
));
|
||||
let e = edge_with(
|
||||
"routes.py",
|
||||
10,
|
||||
"taint-codeexec",
|
||||
Cap::CODE_EXEC,
|
||||
"/exec",
|
||||
HttpMethod::POST,
|
||||
Feasibility::Unverified,
|
||||
);
|
||||
|
||||
let cfg = ChainSearchConfig {
|
||||
max_depth: 4,
|
||||
min_score: 0.0,
|
||||
};
|
||||
|
||||
// No reach map: routes.py finding cannot compose against
|
||||
// helper.py sink because `paths_overlap` rejects the pair.
|
||||
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
|
||||
assert!(
|
||||
baseline.is_empty(),
|
||||
"without reach map, cross-file chain must not compose"
|
||||
);
|
||||
|
||||
// Reach map: routes.py::handle calls helper.py::sink so
|
||||
// helper.py is reachable from routes.py.
|
||||
let handle = FuncSummary {
|
||||
name: "handle".into(),
|
||||
file_path: "routes.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
callees: vec![crate::summary::CalleeSite::bare("sink")],
|
||||
..Default::default()
|
||||
};
|
||||
let sink_fn = FuncSummary {
|
||||
name: "sink".into(),
|
||||
file_path: "helper.py".into(),
|
||||
lang: "python".into(),
|
||||
param_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
let gs = merge_summaries(vec![handle, sink_fn], None);
|
||||
let cg = build_call_graph(&gs, &[]);
|
||||
let reach = FileReachMap::build(&cg);
|
||||
|
||||
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
|
||||
assert_eq!(
|
||||
chains.len(),
|
||||
1,
|
||||
"reach map should widen scope to include helper.py sink"
|
||||
);
|
||||
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -439,6 +439,13 @@ pub fn handle(
|
|||
// functions below. Set to true if any C / C++ file is enumerated.
|
||||
let preview_tier_seen = Arc::new(AtomicBool::new(false));
|
||||
|
||||
// Call-graph-derived file reachability map. Populated by the inner
|
||||
// observer once the call graph is built, then consumed by the chain
|
||||
// composer below to widen cross-file Reach beyond the file-local
|
||||
// heuristic in `findings_to_edges`.
|
||||
let chain_reach_slot: std::sync::OnceLock<crate::callgraph::FileReachMap> =
|
||||
std::sync::OnceLock::new();
|
||||
|
||||
let (mut diags, surface_map): (Vec<Diag>, crate::surface::SurfaceMap) = if index_mode
|
||||
== IndexMode::Off
|
||||
{
|
||||
|
|
@ -450,6 +457,7 @@ pub fn handle(
|
|||
None,
|
||||
None,
|
||||
Some(&preview_tier_seen),
|
||||
Some(&chain_reach_slot),
|
||||
)?
|
||||
} else {
|
||||
if index_mode == IndexMode::Rebuild || !db_path.exists() {
|
||||
|
|
@ -484,6 +492,7 @@ pub fn handle(
|
|||
None,
|
||||
None,
|
||||
Some(&preview_tier_seen),
|
||||
Some(&chain_reach_slot),
|
||||
)?;
|
||||
let surface_map = {
|
||||
let idx = Indexer::from_pool(&project_name, &pool)?;
|
||||
|
|
@ -623,12 +632,25 @@ pub fn handle(
|
|||
};
|
||||
|
||||
// ── Phase 25: compose exploit chains from findings + SurfaceMap ────
|
||||
let chain_edges = crate::chain::findings_to_edges(&diags, &surface_map);
|
||||
// When the inner scan populated the call-graph reach map, pass it
|
||||
// to the chain layer so a finding in an internal helper whose
|
||||
// enclosing function is only reached through a route handler still
|
||||
// composes against a sink in the handler's file. When the slot is
|
||||
// empty (legacy / AST-only paths that never built a call graph),
|
||||
// the chain layer falls back to file-local reach.
|
||||
let chain_reach = chain_reach_slot.get();
|
||||
let chain_edges =
|
||||
crate::chain::findings_to_edges_with_reach(&diags, &surface_map, chain_reach);
|
||||
let chain_search_cfg = crate::chain::ChainSearchConfig {
|
||||
max_depth: config.chain.max_depth,
|
||||
min_score: config.chain.min_score,
|
||||
};
|
||||
let chains = crate::chain::find_chains(&chain_edges, &surface_map, chain_search_cfg);
|
||||
let chains = crate::chain::find_chains_with_reach(
|
||||
&chain_edges,
|
||||
&surface_map,
|
||||
chain_search_cfg,
|
||||
chain_reach,
|
||||
);
|
||||
let diags_for_output = crate::output::filter_constituents(
|
||||
diags.clone(),
|
||||
&chains,
|
||||
|
|
@ -1806,7 +1828,7 @@ pub(crate) fn scan_filesystem(
|
|||
cfg: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
scan_filesystem_with_observer(root, cfg, show_progress, None, None, None, None)
|
||||
scan_filesystem_with_observer(root, cfg, show_progress, None, None, None, None, None)
|
||||
.map(|(diags, _surface_map)| diags)
|
||||
}
|
||||
|
||||
|
|
@ -1820,7 +1842,7 @@ pub(crate) fn scan_filesystem_with_surface_map(
|
|||
cfg: &Config,
|
||||
show_progress: bool,
|
||||
) -> NyxResult<(Vec<Diag>, crate::surface::SurfaceMap)> {
|
||||
scan_filesystem_with_observer(root, cfg, show_progress, None, None, None, None)
|
||||
scan_filesystem_with_observer(root, cfg, show_progress, None, None, None, None, None)
|
||||
}
|
||||
|
||||
/// Walk the filesystem and perform a two-pass scan, optionally reporting
|
||||
|
|
@ -1838,6 +1860,7 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
metrics: Option<&Arc<ScanMetrics>>,
|
||||
logs: Option<&Arc<ScanLogCollector>>,
|
||||
preview_tier_seen: Option<&Arc<AtomicBool>>,
|
||||
chain_reach_out: Option<&std::sync::OnceLock<crate::callgraph::FileReachMap>>,
|
||||
) -> NyxResult<(Vec<Diag>, crate::surface::SurfaceMap)> {
|
||||
// Ensure framework context is available (handle sets it, but direct
|
||||
// callers like scan_no_index may not).
|
||||
|
|
@ -2177,6 +2200,10 @@ pub(crate) fn scan_filesystem_with_observer(
|
|||
);
|
||||
}
|
||||
|
||||
if let Some(out) = chain_reach_out {
|
||||
let _ = out.set(crate::callgraph::FileReachMap::build(&call_graph));
|
||||
}
|
||||
|
||||
// ── Pass 2: re-run with cross-file global summaries ──────────────────
|
||||
if let Some(p) = progress {
|
||||
p.set_stage(ScanStage::Analyzing);
|
||||
|
|
@ -2326,6 +2353,7 @@ pub fn scan_with_index_parallel(
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -2341,6 +2369,7 @@ pub fn scan_with_index_parallel_observer(
|
|||
metrics: Option<&Arc<ScanMetrics>>,
|
||||
logs: Option<&Arc<ScanLogCollector>>,
|
||||
preview_tier_seen: Option<&Arc<AtomicBool>>,
|
||||
chain_reach_out: Option<&std::sync::OnceLock<crate::callgraph::FileReachMap>>,
|
||||
) -> NyxResult<Vec<Diag>> {
|
||||
// Match scan_filesystem_with_observer: auto-fill framework detection when
|
||||
// the caller didn't supply one. Without this, directly-invoked indexed
|
||||
|
|
@ -2966,6 +2995,10 @@ pub fn scan_with_index_parallel_observer(
|
|||
);
|
||||
}
|
||||
|
||||
if let Some(out) = chain_reach_out {
|
||||
let _ = out.set(crate::callgraph::FileReachMap::build(&call_graph));
|
||||
}
|
||||
|
||||
let (batches, orphans) = crate::callgraph::scc_file_batches_with_metadata(
|
||||
&call_graph,
|
||||
&cg_analysis,
|
||||
|
|
|
|||
|
|
@ -249,6 +249,7 @@ impl JobManager {
|
|||
Some(&metrics),
|
||||
Some(&log_collector),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
});
|
||||
let elapsed = start.elapsed().as_secs_f64();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue