[pitboss] sweep after phase 04: 1 deferred items resolved

This commit is contained in:
pitboss 2026-05-14 04:38:53 -05:00
parent 780dc9099c
commit 937eb479e6
3 changed files with 148 additions and 2 deletions

View file

@ -221,9 +221,15 @@ impl HarnessSpec {
// Takes precedence over the four-strategy ladder because a route
// handler / CLI entry is always a stronger driving anchor than
// the helper function that physically contains the sink.
//
// Strict variant: only the reverse-edge BFS (`find_entry_via_callgraph`)
// counts here. The summary-entry-kind + rule-id substring fallbacks
// that live in `derive_from_callgraph_entry_full` stay at strategy-4
// priority — calling them here would short-circuit the more precise
// strategies (FromFlowSteps / FromRuleNamespace / FromFuncSummaryAuto)
// whenever the rule id happens to contain `.http.` / `.cli.`.
if let (Some(s), Some(cg)) = (summaries, callgraph) {
if let Some(spec) = derive_from_callgraph_entry_full(diag, evidence, Some(s), Some(cg))
{
if let Some(spec) = derive_from_callgraph_walk_only(diag, evidence, s, cg) {
return Ok(spec);
}
}
@ -516,6 +522,62 @@ pub fn derive_from_callgraph_entry_with(
derive_from_callgraph_entry_full(diag, evidence, summaries, None)
}
/// Strict reverse-edge-BFS-only variant of
/// [`derive_from_callgraph_entry_full`].
///
/// Returns `Some(spec)` only when [`find_entry_via_callgraph`] resolves
/// the sink's enclosing function to a framework-bound ancestor via the
/// whole-program callgraph. Unlike
/// [`derive_from_callgraph_entry_full`], the summary-entry-kind fallback
/// on the enclosing function and the rule-id `.http.` / `.cli.`
/// substring heuristic are *not* consulted here — those remain
/// strategy-4 last-chance behaviour invoked from
/// [`HarnessSpec::from_finding_full`]'s strategy ladder.
///
/// Used by the Phase 04 pre-step in [`HarnessSpec::from_finding_full`]
/// so a successful callgraph walk takes precedence over strategies 13,
/// while the substring / summary fallbacks do not short-circuit
/// [`SpecDerivationStrategy::FromFlowSteps`] /
/// [`SpecDerivationStrategy::FromRuleNamespace`] /
/// [`SpecDerivationStrategy::FromFuncSummaryWalk`].
pub fn derive_from_callgraph_walk_only(
diag: &Diag,
evidence: &crate::evidence::Evidence,
summaries: &GlobalSummaries,
callgraph: &CallGraph,
) -> Option<HarnessSpec> {
let lang = lang_from_path(&diag.path)?;
let expected_cap = Cap::from_bits_truncate(evidence.sink_caps);
if expected_cap.is_empty() {
return None;
}
let found = find_entry_via_callgraph(diag, evidence, summaries, callgraph, lang)?;
let entry_kind = found
.summary
.entry_kind
.as_ref()
.map(entry_kind_from_summary)
.unwrap_or_else(|| name_to_entry_kind(&found.summary.name));
let entry_file = if !found.summary.file_path.is_empty() {
found.summary.file_path.clone()
} else {
diag.path.clone()
};
let mut spec = finalize_spec(
diag,
entry_file,
found.summary.name.clone(),
lang,
expected_cap,
diag.path.clone(),
diag.line as u32,
SpecDerivationStrategy::FromCallgraphEntry,
);
spec.entry_kind = entry_kind;
spec.spec_hash = compute_spec_hash(&spec);
Some(spec)
}
/// Like [`derive_from_callgraph_entry_with`], but also consults the
/// whole-program [`CallGraph`] when `callgraph` is `Some`.
///

View file

@ -0,0 +1,13 @@
# Phase 04 follow-up regression fixture: the sink lives in a class method
# that has no callers in the whole-program callgraph. The reverse-edge BFS
# in `find_entry_via_callgraph` must miss (helper is inside a class, so
# `is_entry_point`'s zero-in-degree heuristic does not apply), and the
# strict `derive_from_callgraph_walk_only` pre-step must defer to the
# strategy ladder so the substring `.http.` rule-id fallback does NOT
# short-circuit the more precise `FromFlowSteps` strategy.
class Stuff:
def helper(self, arg):
import os
os.system(arg) # sink: command injection

View file

@ -104,6 +104,21 @@ fn sink_step_in(file: &str, function: &str, line: usize) -> FlowStep {
}
}
fn source_step_in(file: &str, function: &str, line: usize) -> FlowStep {
FlowStep {
step: 0,
kind: FlowStepKind::Source,
file: file.into(),
line: line as u32,
col: 0,
snippet: None,
variable: None,
callee: None,
function: Some(function.into()),
is_cross_file: false,
}
}
/// Helper: assert that strategy 4 with the callgraph rewrites the
/// entry to a framework-bound ancestor.
fn assert_callgraph_rewrites_entry(
@ -256,3 +271,59 @@ fn from_finding_with_callgraph_thin_wrapper_compiles_and_runs() {
assert_eq!(spec.derivation, SpecDerivationStrategy::FromCallgraphEntry);
assert!(matches!(spec.entry_kind, EntryKind::HttpRoute));
}
// ── Strict pre-step regression: BFS-miss must defer to the ladder ────────────
#[test]
fn bfs_miss_with_http_rule_defers_to_flow_steps_strategy() {
// Regression for the Phase 04 follow-up: the pre-step in
// `HarnessSpec::from_finding_full` must use the *strict*
// `derive_from_callgraph_walk_only` helper. If it instead falls
// through to the rule-id `.http.` / `.cli.` substring fallback baked
// into `derive_from_callgraph_entry_full`, every `.http.*` finding
// whose enclosing function happens to be orphaned in the callgraph
// gets tagged `FromCallgraphEntry` and loses the more precise
// `FromFlowSteps` resolution. This fixture parks the sink in a
// class method with no callers: the helper is *not* an entry point
// (`container` is non-empty so the zero-in-degree heuristic does
// not apply) and BFS bottoms out without finding an ancestor.
let file = fixtures_dir().join("orphan_helper_sink.py");
let file_str = file.to_string_lossy().to_string();
let (summaries, cg, _analysis) = build_context(&file);
// Sanity: the helper must be summarised and not be an entry point.
let helper_summary = summaries
.iter()
.find(|(_, s)| s.name == "helper")
.map(|(_, s)| s)
.expect("pass 1 must summarise the orphan helper");
assert!(
!is_entry_point(helper_summary, &cg),
"class method helper with non-empty container must not qualify as entry point"
);
// Synth a `py.http.*` rule id with a Source flow_step rooted in the
// helper so strategy 1 (FromFlowSteps) has a concrete entry.
let mut diag = make_diag("py.http.synthetic_route", &file_str, 13);
let mut ev = Evidence::default();
ev.flow_steps = vec![
source_step_in(&file_str, "helper", 13),
sink_step_in(&file_str, "helper", 13),
];
ev.sink_caps = Cap::SHELL_ESCAPE.bits();
diag.evidence = Some(ev);
let spec = HarnessSpec::from_finding_full(&diag, false, Some(&summaries), Some(&cg))
.expect("strict pre-step must defer; strategy 1 must produce a spec");
assert_eq!(
spec.derivation,
SpecDerivationStrategy::FromFlowSteps,
"BFS-miss + `.http.` rule must NOT short-circuit on the substring fallback; \
expected FromFlowSteps but got {:?}",
spec.derivation
);
assert_eq!(
spec.entry_name, "helper",
"FromFlowSteps must record the helper as entry, not an inferred route handler"
);
}