diff --git a/src/dynamic/spec.rs b/src/dynamic/spec.rs index cca03568..5e0c9a8f 100644 --- a/src/dynamic/spec.rs +++ b/src/dynamic/spec.rs @@ -221,9 +221,15 @@ impl HarnessSpec { // Takes precedence over the four-strategy ladder because a route // handler / CLI entry is always a stronger driving anchor than // the helper function that physically contains the sink. + // + // Strict variant: only the reverse-edge BFS (`find_entry_via_callgraph`) + // counts here. The summary-entry-kind + rule-id substring fallbacks + // that live in `derive_from_callgraph_entry_full` stay at strategy-4 + // priority — calling them here would short-circuit the more precise + // strategies (FromFlowSteps / FromRuleNamespace / FromFuncSummaryAuto) + // whenever the rule id happens to contain `.http.` / `.cli.`. if let (Some(s), Some(cg)) = (summaries, callgraph) { - if let Some(spec) = derive_from_callgraph_entry_full(diag, evidence, Some(s), Some(cg)) - { + if let Some(spec) = derive_from_callgraph_walk_only(diag, evidence, s, cg) { return Ok(spec); } } @@ -516,6 +522,62 @@ pub fn derive_from_callgraph_entry_with( derive_from_callgraph_entry_full(diag, evidence, summaries, None) } +/// Strict reverse-edge-BFS-only variant of +/// [`derive_from_callgraph_entry_full`]. +/// +/// Returns `Some(spec)` only when [`find_entry_via_callgraph`] resolves +/// the sink's enclosing function to a framework-bound ancestor via the +/// whole-program callgraph. Unlike +/// [`derive_from_callgraph_entry_full`], the summary-entry-kind fallback +/// on the enclosing function and the rule-id `.http.` / `.cli.` +/// substring heuristic are *not* consulted here — those remain +/// strategy-4 last-chance behaviour invoked from +/// [`HarnessSpec::from_finding_full`]'s strategy ladder. +/// +/// Used by the Phase 04 pre-step in [`HarnessSpec::from_finding_full`] +/// so a successful callgraph walk takes precedence over strategies 1–3, +/// while the substring / summary fallbacks do not short-circuit +/// [`SpecDerivationStrategy::FromFlowSteps`] / +/// [`SpecDerivationStrategy::FromRuleNamespace`] / +/// [`SpecDerivationStrategy::FromFuncSummaryWalk`]. +pub fn derive_from_callgraph_walk_only( + diag: &Diag, + evidence: &crate::evidence::Evidence, + summaries: &GlobalSummaries, + callgraph: &CallGraph, +) -> Option { + let lang = lang_from_path(&diag.path)?; + let expected_cap = Cap::from_bits_truncate(evidence.sink_caps); + if expected_cap.is_empty() { + return None; + } + let found = find_entry_via_callgraph(diag, evidence, summaries, callgraph, lang)?; + let entry_kind = found + .summary + .entry_kind + .as_ref() + .map(entry_kind_from_summary) + .unwrap_or_else(|| name_to_entry_kind(&found.summary.name)); + let entry_file = if !found.summary.file_path.is_empty() { + found.summary.file_path.clone() + } else { + diag.path.clone() + }; + let mut spec = finalize_spec( + diag, + entry_file, + found.summary.name.clone(), + lang, + expected_cap, + diag.path.clone(), + diag.line as u32, + SpecDerivationStrategy::FromCallgraphEntry, + ); + spec.entry_kind = entry_kind; + spec.spec_hash = compute_spec_hash(&spec); + Some(spec) +} + /// Like [`derive_from_callgraph_entry_with`], but also consults the /// whole-program [`CallGraph`] when `callgraph` is `Some`. /// diff --git a/tests/dynamic_fixtures/callgraph_entry/orphan_helper_sink.py b/tests/dynamic_fixtures/callgraph_entry/orphan_helper_sink.py new file mode 100644 index 00000000..9e3e8841 --- /dev/null +++ b/tests/dynamic_fixtures/callgraph_entry/orphan_helper_sink.py @@ -0,0 +1,13 @@ +# Phase 04 follow-up regression fixture: the sink lives in a class method +# that has no callers in the whole-program callgraph. The reverse-edge BFS +# in `find_entry_via_callgraph` must miss (helper is inside a class, so +# `is_entry_point`'s zero-in-degree heuristic does not apply), and the +# strict `derive_from_callgraph_walk_only` pre-step must defer to the +# strategy ladder so the substring `.http.` rule-id fallback does NOT +# short-circuit the more precise `FromFlowSteps` strategy. + + +class Stuff: + def helper(self, arg): + import os + os.system(arg) # sink: command injection diff --git a/tests/spec_callgraph_resolution.rs b/tests/spec_callgraph_resolution.rs index 1c8de086..03f65705 100644 --- a/tests/spec_callgraph_resolution.rs +++ b/tests/spec_callgraph_resolution.rs @@ -104,6 +104,21 @@ fn sink_step_in(file: &str, function: &str, line: usize) -> FlowStep { } } +fn source_step_in(file: &str, function: &str, line: usize) -> FlowStep { + FlowStep { + step: 0, + kind: FlowStepKind::Source, + file: file.into(), + line: line as u32, + col: 0, + snippet: None, + variable: None, + callee: None, + function: Some(function.into()), + is_cross_file: false, + } +} + /// Helper: assert that strategy 4 with the callgraph rewrites the /// entry to a framework-bound ancestor. fn assert_callgraph_rewrites_entry( @@ -256,3 +271,59 @@ fn from_finding_with_callgraph_thin_wrapper_compiles_and_runs() { assert_eq!(spec.derivation, SpecDerivationStrategy::FromCallgraphEntry); assert!(matches!(spec.entry_kind, EntryKind::HttpRoute)); } + +// ── Strict pre-step regression: BFS-miss must defer to the ladder ──────────── + +#[test] +fn bfs_miss_with_http_rule_defers_to_flow_steps_strategy() { + // Regression for the Phase 04 follow-up: the pre-step in + // `HarnessSpec::from_finding_full` must use the *strict* + // `derive_from_callgraph_walk_only` helper. If it instead falls + // through to the rule-id `.http.` / `.cli.` substring fallback baked + // into `derive_from_callgraph_entry_full`, every `.http.*` finding + // whose enclosing function happens to be orphaned in the callgraph + // gets tagged `FromCallgraphEntry` and loses the more precise + // `FromFlowSteps` resolution. This fixture parks the sink in a + // class method with no callers: the helper is *not* an entry point + // (`container` is non-empty so the zero-in-degree heuristic does + // not apply) and BFS bottoms out without finding an ancestor. + let file = fixtures_dir().join("orphan_helper_sink.py"); + let file_str = file.to_string_lossy().to_string(); + let (summaries, cg, _analysis) = build_context(&file); + + // Sanity: the helper must be summarised and not be an entry point. + let helper_summary = summaries + .iter() + .find(|(_, s)| s.name == "helper") + .map(|(_, s)| s) + .expect("pass 1 must summarise the orphan helper"); + assert!( + !is_entry_point(helper_summary, &cg), + "class method helper with non-empty container must not qualify as entry point" + ); + + // Synth a `py.http.*` rule id with a Source flow_step rooted in the + // helper so strategy 1 (FromFlowSteps) has a concrete entry. + let mut diag = make_diag("py.http.synthetic_route", &file_str, 13); + let mut ev = Evidence::default(); + ev.flow_steps = vec![ + source_step_in(&file_str, "helper", 13), + sink_step_in(&file_str, "helper", 13), + ]; + ev.sink_caps = Cap::SHELL_ESCAPE.bits(); + diag.evidence = Some(ev); + + let spec = HarnessSpec::from_finding_full(&diag, false, Some(&summaries), Some(&cg)) + .expect("strict pre-step must defer; strategy 1 must produce a spec"); + assert_eq!( + spec.derivation, + SpecDerivationStrategy::FromFlowSteps, + "BFS-miss + `.http.` rule must NOT short-circuit on the substring fallback; \ + expected FromFlowSteps but got {:?}", + spec.derivation + ); + assert_eq!( + spec.entry_name, "helper", + "FromFlowSteps must record the helper as entry, not an inferred route handler" + ); +}