[pitboss/grind] deferred session-0005 (20260516T052512Z-20f8)

This commit is contained in:
pitboss 2026-05-16 03:38:45 -05:00
parent 3e08382a3f
commit bf8e61ffdb
6 changed files with 411 additions and 19 deletions

View file

@ -13,6 +13,7 @@
//! search or do call-graph traversal: edges are emitted at finding
//! granularity and carry only the file-local reach hint.
use crate::callgraph::FileReachMap;
use crate::commands::scan::Diag;
use crate::entry_points::HttpMethod;
use crate::labels::Cap;
@ -94,13 +95,39 @@ pub struct ChainEdge {
/// The output order mirrors `findings`; the caller is responsible for
/// any further canonicalisation.
pub fn findings_to_edges(findings: &[Diag], surface: &SurfaceMap) -> Vec<ChainEdge> {
findings_to_edges_with_reach(findings, surface, None)
}
/// Like [`findings_to_edges`] but optionally consults a [`FileReachMap`]
/// to widen `Reach::Reachable` beyond the file-local match.
///
/// When `reach` is `Some`, a finding's enclosing file is also considered
/// `Reachable` whenever any [`SurfaceNode::EntryPoint`]'s
/// `handler_location.file` transitively reaches the finding's file via
/// the call graph. The first matching entry-point (surface-canonical
/// order) is used to populate the `route` / `method` / `auth_required`
/// fields.
///
/// `reach = None` is byte-identical to the legacy [`findings_to_edges`]
/// behaviour. Path strings on both sides must use the same convention
/// (project-relative POSIX) for the widening to fire; mismatched paths
/// silently fall through to the file-local heuristic.
pub fn findings_to_edges_with_reach(
findings: &[Diag],
surface: &SurfaceMap,
reach: Option<&FileReachMap>,
) -> Vec<ChainEdge> {
findings
.iter()
.filter_map(|d| build_edge(d, surface))
.filter_map(|d| build_edge(d, surface, reach))
.collect()
}
fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option<ChainEdge> {
fn build_edge(
diag: &Diag,
surface: &SurfaceMap,
reach: Option<&FileReachMap>,
) -> Option<ChainEdge> {
let evidence = diag.evidence.as_ref()?;
if evidence.sink_caps == 0 {
return None;
@ -108,7 +135,7 @@ fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option<ChainEdge> {
let cap_bits = evidence.sink_caps;
let primary_cap = pick_chain_cap(cap_bits)?;
let location = SourceLocation::new(diag.path.clone(), diag.line as u32, diag.col as u32);
let reach = locate_reach(&location, surface);
let reach_kind = locate_reach(&location, surface, reach);
let feasibility = Feasibility::for_finding(diag);
let finding = FindingRef {
finding_id: diag.finding_id.clone(),
@ -120,7 +147,7 @@ fn build_edge(diag: &Diag, surface: &SurfaceMap) -> Option<ChainEdge> {
Some(ChainEdge {
finding,
primary_cap,
reach,
reach: reach_kind,
feasibility,
})
}
@ -164,7 +191,12 @@ pub fn pick_chain_cap(bits: u32) -> Option<Cap> {
lowest_cap(bits)
}
fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap) -> Reach {
fn locate_reach(
loc: &SourceLocation,
surface: &SurfaceMap,
reach: Option<&FileReachMap>,
) -> Reach {
// Pass 1: file-local match (legacy behaviour, always applies).
for node in &surface.nodes {
if let SurfaceNode::EntryPoint(ep) = node {
if ep.handler_location.file == loc.file {
@ -177,6 +209,23 @@ fn locate_reach(loc: &SourceLocation, surface: &SurfaceMap) -> Reach {
}
}
}
// Pass 2: transitive caller match via the call graph. Only fires
// when `reach` is supplied — keeps the legacy file-local behaviour
// for callers that have not yet wired the call-graph reach map.
if let Some(reach) = reach {
for node in &surface.nodes {
if let SurfaceNode::EntryPoint(ep) = node {
if reach.reaches(&ep.handler_location.file, &loc.file) {
return Reach::Reachable {
location: ep.location.clone(),
method: ep.method,
route: ep.route.clone(),
auth_required: ep.auth_required,
};
}
}
}
}
Reach::Unreachable
}
@ -247,4 +296,61 @@ mod tests {
assert_eq!(edges.len(), 1);
assert!(matches!(edges[0].reach, Reach::Unreachable));
}
/// Cross-file finding becomes Reachable when the call-graph reach
/// map records a transitive caller in the entry-point's file.
#[test]
fn reach_widens_with_file_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::entry_points::HttpMethod;
use crate::summary::{FuncSummary, merge_summaries};
use crate::surface::{EntryPoint, Framework, SourceLocation, SurfaceNode};
// routes.py::handle -> helper.py::sink
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let mut surface = SurfaceMap::new();
surface.nodes.push(SurfaceNode::EntryPoint(EntryPoint {
location: SourceLocation::new("routes.py", 1, 1),
framework: Framework::Flask,
method: HttpMethod::GET,
route: "/".into(),
handler_name: "handle".into(),
handler_location: SourceLocation::new("routes.py", 2, 1),
auth_required: false,
}));
let d = diag_with_cap("helper.py", 10, Cap::CODE_EXEC);
// Without reach: file-local lookup leaves the finding Unreachable.
let edges = findings_to_edges(&[d.clone()], &surface);
assert!(matches!(edges[0].reach, Reach::Unreachable));
// With reach: transitive caller in `routes.py` lifts to Reachable.
let edges = findings_to_edges_with_reach(&[d], &surface, Some(&reach));
match &edges[0].reach {
Reach::Reachable { route, method, .. } => {
assert_eq!(route, "/");
assert_eq!(*method, HttpMethod::GET);
}
other => panic!("expected Reachable, got {other:?}"),
}
}
}

View file

@ -41,7 +41,7 @@ pub mod reverify;
pub mod score;
pub mod search;
pub use edges::{ChainEdge, FindingRef, findings_to_edges};
pub use edges::{ChainEdge, FindingRef, findings_to_edges, findings_to_edges_with_reach};
pub use feasibility::Feasibility;
pub use finding::{ChainFinding, ChainMember, ChainSeverity, ChainSink};
pub use impact::{IMPACT_LATTICE, ImpactCategory, ImpactRule, lookup_impact};
@ -51,7 +51,7 @@ pub use reverify::{
reverify_chain_with, reverify_top_chains, reverify_top_chains_with,
};
pub use score::{ChainScoreConfig, category_weight, min_score_default, score_path};
pub use search::{ChainSearchConfig, find_chains};
pub use search::{ChainSearchConfig, find_chains, find_chains_with_reach};
/// One node in a [`ChainGraph`].
///

View file

@ -43,6 +43,7 @@
//! adjacent when they share a source file, mirroring Phase 24's
//! `findings_to_edges` reach resolver.
use crate::callgraph::FileReachMap;
use crate::chain::edges::{ChainEdge, Reach};
use crate::chain::finding::{ChainFinding, ChainSink};
use crate::chain::impact::{ImpactCategory, lookup_impact};
@ -75,6 +76,24 @@ pub fn find_chains(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
) -> Vec<ChainFinding> {
find_chains_with_reach(edges, surface, cfg, None)
}
/// Like [`find_chains`] but optionally consults a [`FileReachMap`] to
/// widen the per-entry-per-sink file-scope filter beyond literal
/// file-equality.
///
/// When `reach` is `Some`, a candidate edge is in scope for a given
/// sink whenever the finding's file *or* a transitive caller of it
/// reaches the sink's file via the call graph. `reach = None`
/// preserves the legacy file-local behaviour for callers that have
/// not yet wired the call-graph reach map.
pub fn find_chains_with_reach(
edges: &[ChainEdge],
surface: &SurfaceMap,
cfg: ChainSearchConfig,
reach: Option<&FileReachMap>,
) -> Vec<ChainFinding> {
if cfg.max_depth == 0 || edges.is_empty() {
return Vec::new();
@ -96,18 +115,18 @@ pub fn find_chains(
.cmp(&(b.finding.stable_hash, &b.finding.rule_id, &b.finding.location))
});
for sink in &sinks {
// Phase 25 limits per-entry-per-sink search to those
// candidates that share a file with the sink. Phase 25's
// deferred call-graph follow-up will widen this.
// Scope candidates to the sink: same-file match (legacy),
// optionally widened by a call-graph-derived reach map so
// a finding in `internal_helper.py` whose enclosing
// function is reached only through `routes.py` still
// composes against a sink in `routes.py`.
let scoped: Vec<&ChainEdge> = candidates
.iter()
.filter(|e| {
// Surface DangerousLocal location uses POSIX path;
// the per-finding location is whatever the analyser
// recorded. Match on the trailing path segment so
// a project-relative vs absolute mismatch does not
// gate the chain.
paths_overlap(&e.finding.location.file, &sink.location.file)
|| reach.is_some_and(|r| {
r.reaches(&e.finding.location.file, &sink.location.file)
})
})
.copied()
.collect();
@ -651,4 +670,74 @@ mod tests {
let chains = find_chains(&[e], &surface, cfg);
assert!(chains.is_empty());
}
/// Sink in a different file than the finding composes only when the
/// call-graph reach map records a transitive caller relationship.
#[test]
fn cross_file_chain_requires_reach_map() {
use crate::callgraph::{FileReachMap, build_call_graph};
use crate::summary::{FuncSummary, merge_summaries};
let mut surface = SurfaceMap::new();
surface.nodes.push(entry("routes.py", "/exec", false));
// Sink lives in a helper file the entry handler transitively
// reaches, not the entry file itself.
surface.nodes.push(sink(
"helper.py",
20,
"os.system",
Cap::CODE_EXEC,
));
let e = edge_with(
"routes.py",
10,
"taint-codeexec",
Cap::CODE_EXEC,
"/exec",
HttpMethod::POST,
Feasibility::Unverified,
);
let cfg = ChainSearchConfig {
max_depth: 4,
min_score: 0.0,
};
// No reach map: routes.py finding cannot compose against
// helper.py sink because `paths_overlap` rejects the pair.
let baseline = find_chains(std::slice::from_ref(&e), &surface, cfg);
assert!(
baseline.is_empty(),
"without reach map, cross-file chain must not compose"
);
// Reach map: routes.py::handle calls helper.py::sink so
// helper.py is reachable from routes.py.
let handle = FuncSummary {
name: "handle".into(),
file_path: "routes.py".into(),
lang: "python".into(),
param_count: 0,
callees: vec![crate::summary::CalleeSite::bare("sink")],
..Default::default()
};
let sink_fn = FuncSummary {
name: "sink".into(),
file_path: "helper.py".into(),
lang: "python".into(),
param_count: 0,
..Default::default()
};
let gs = merge_summaries(vec![handle, sink_fn], None);
let cg = build_call_graph(&gs, &[]);
let reach = FileReachMap::build(&cg);
let chains = find_chains_with_reach(&[e], &surface, cfg, Some(&reach));
assert_eq!(
chains.len(),
1,
"reach map should widen scope to include helper.py sink"
);
assert_eq!(chains[0].implied_impact, ImpactCategory::Rce);
}
}