mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-21 20:18:06 +02:00
[pitboss] phase 04: Track A.4 — Callgraph-aware spec entry-point resolution
This commit is contained in:
parent
3b660ba1d3
commit
780dc9099c
9 changed files with 618 additions and 4 deletions
|
|
@ -52,6 +52,7 @@ pub struct AmbiguousCallee {
|
|||
///
|
||||
/// Nodes are [`FuncKey`]s (one per function definition across all files).
|
||||
/// Edges represent call-site relationships resolved after pass 1.
|
||||
#[derive(Debug)]
|
||||
pub struct CallGraph {
|
||||
pub graph: DiGraph<FuncKey, CallEdge>,
|
||||
/// `FuncKey → NodeIndex` for quick lookup.
|
||||
|
|
|
|||
|
|
@ -373,6 +373,22 @@ fn load_verify_summaries(
|
|||
Some(Arc::new(crate::summary::merge_summaries(all, Some(&root_str))))
|
||||
}
|
||||
|
||||
/// Build the whole-program [`crate::callgraph::CallGraph`] from a
|
||||
/// preloaded [`crate::summary::GlobalSummaries`] so the verifier can
|
||||
/// thread it into the callgraph-aware spec-derivation path
|
||||
/// (`SpecDerivationStrategy::FromCallgraphEntry`).
|
||||
///
|
||||
/// Best-effort: callgraph construction itself never fails, but this
|
||||
/// helper exists to keep the verify pipeline parallel with
|
||||
/// [`load_verify_summaries`] and to absorb future failure modes (e.g.
|
||||
/// interop-edge loading) behind a single optional return.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn load_verify_callgraph(
|
||||
summaries: &crate::summary::GlobalSummaries,
|
||||
) -> Arc<crate::callgraph::CallGraph> {
|
||||
Arc::new(crate::callgraph::build_call_graph(summaries, &[]))
|
||||
}
|
||||
|
||||
/// Entry point called by the CLI.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn handle(
|
||||
|
|
@ -529,6 +545,12 @@ pub fn handle(
|
|||
// without re-hitting SQLite per finding. Best-effort: a load
|
||||
// failure logs and falls through to the substring heuristics.
|
||||
opts.summaries = load_verify_summaries(&project_name, &db_path, &scan_path);
|
||||
// Build the whole-program callgraph from the preloaded summaries
|
||||
// so strategy 4 can walk reverse edges to a route handler / CLI
|
||||
// entry when the sink lives in a leaf helper.
|
||||
if let Some(ref s) = opts.summaries {
|
||||
opts.callgraph = Some(load_verify_callgraph(s));
|
||||
}
|
||||
}
|
||||
for diag in &mut diags {
|
||||
let result = crate::dynamic::verify::verify_finding(diag, &opts);
|
||||
|
|
|
|||
|
|
@ -17,13 +17,15 @@
|
|||
//! meaning, the hash inputs change, or the corpus changes in a way that
|
||||
//! would invalidate previously-computed hashes.
|
||||
|
||||
use crate::callgraph::{CallGraph, CallGraphAnalysis};
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::corpus::CORPUS_VERSION;
|
||||
use crate::evidence::{Confidence, FlowStepKind, UnsupportedReason};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::{FuncSummary, GlobalSummaries};
|
||||
use crate::symbol::Lang;
|
||||
use crate::symbol::{FuncKey, Lang};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{HashSet, VecDeque};
|
||||
use std::path::Path;
|
||||
|
||||
/// Re-export of the always-present [`crate::evidence::SpecDerivationStrategy`].
|
||||
|
|
@ -177,6 +179,33 @@ impl HarnessSpec {
|
|||
diag: &Diag,
|
||||
verify_all_confidence: bool,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
) -> Result<Self, UnsupportedReason> {
|
||||
Self::from_finding_full(diag, verify_all_confidence, summaries, None)
|
||||
}
|
||||
|
||||
/// Strategy-aware constructor that also consults a whole-program
|
||||
/// [`CallGraph`] when `callgraph` is `Some`.
|
||||
///
|
||||
/// Strategy 4 ([`SpecDerivationStrategy::FromCallgraphEntry`]) walks
|
||||
/// reverse call-graph edges from the sink's enclosing function via
|
||||
/// [`crate::callgraph::callers_of`] to discover the *nearest* ancestor
|
||||
/// that qualifies as an entry point (see [`is_entry_point`]). When
|
||||
/// found, the spec's `entry_file` / `entry_name` are rewritten to the
|
||||
/// ancestor and `entry_kind` is classified from the ancestor's
|
||||
/// [`FuncSummary::entry_kind`] — capturing every framework-bound sink
|
||||
/// whose only real caller is a route decorator or CLI subcommand.
|
||||
///
|
||||
/// When `callgraph` is `None` the behaviour matches
|
||||
/// [`HarnessSpec::from_finding_with_summaries`] verbatim: strategy 4
|
||||
/// falls back to the rule-id substring / summary-entry-kind path.
|
||||
/// When `summaries` is `None` the callgraph walk has no per-key
|
||||
/// summary to consult and degrades to a name-based entry recogniser
|
||||
/// (`main` / `__main__`).
|
||||
pub fn from_finding_full(
|
||||
diag: &Diag,
|
||||
verify_all_confidence: bool,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
callgraph: Option<&CallGraph>,
|
||||
) -> Result<Self, UnsupportedReason> {
|
||||
if !verify_all_confidence {
|
||||
match diag.confidence {
|
||||
|
|
@ -187,6 +216,18 @@ impl HarnessSpec {
|
|||
|
||||
let evidence = diag.evidence.as_ref().ok_or(UnsupportedReason::NoFlowSteps)?;
|
||||
|
||||
// Phase 04 pre-step: when both callgraph *and* summaries are
|
||||
// present, walk reverse edges to a framework-bound ancestor.
|
||||
// Takes precedence over the four-strategy ladder because a route
|
||||
// handler / CLI entry is always a stronger driving anchor than
|
||||
// the helper function that physically contains the sink.
|
||||
if let (Some(s), Some(cg)) = (summaries, callgraph) {
|
||||
if let Some(spec) = derive_from_callgraph_entry_full(diag, evidence, Some(s), Some(cg))
|
||||
{
|
||||
return Ok(spec);
|
||||
}
|
||||
}
|
||||
|
||||
// Try each strategy in priority order; first non-None wins.
|
||||
if let Some(spec) = derive_from_flow_steps(diag, evidence) {
|
||||
return Ok(spec);
|
||||
|
|
@ -197,13 +238,35 @@ impl HarnessSpec {
|
|||
if let Some(spec) = derive_from_func_summary_auto(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_callgraph_entry_with(diag, evidence, summaries) {
|
||||
if let Some(spec) = derive_from_callgraph_entry_full(diag, evidence, summaries, callgraph)
|
||||
{
|
||||
return Ok(spec);
|
||||
}
|
||||
|
||||
Err(UnsupportedReason::SpecDerivationFailed)
|
||||
}
|
||||
|
||||
/// Convenience wrapper around [`HarnessSpec::from_finding_full`] that
|
||||
/// pins `verify_all_confidence = false` and accepts only callgraph
|
||||
/// context. Used by the verifier when the caller has built a fresh
|
||||
/// [`CallGraph`] but not yet plumbed the matching
|
||||
/// [`GlobalSummaries`]; in that mode the callgraph walk degrades to
|
||||
/// the name-based entry recogniser.
|
||||
///
|
||||
/// The `analysis` argument is accepted to pin the API surface against
|
||||
/// future SCC-aware refinements (e.g. bounding the reverse-edge BFS
|
||||
/// against the analysis's pre-computed back edges); the current
|
||||
/// implementation does not consult it because the BFS already
|
||||
/// protects against recursive predecessor chains via its visited
|
||||
/// set.
|
||||
pub fn from_finding_with_callgraph(
|
||||
diag: &Diag,
|
||||
callgraph: &CallGraph,
|
||||
_analysis: &CallGraphAnalysis,
|
||||
) -> Result<Self, UnsupportedReason> {
|
||||
Self::from_finding_full(diag, false, None, Some(callgraph))
|
||||
}
|
||||
|
||||
/// True when [`HarnessSpec::entry_kind`] is in
|
||||
/// [`crate::dynamic::lang::entry_kinds_supported`] for [`HarnessSpec::lang`].
|
||||
///
|
||||
|
|
@ -449,6 +512,26 @@ pub fn derive_from_callgraph_entry_with(
|
|||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
) -> Option<HarnessSpec> {
|
||||
derive_from_callgraph_entry_full(diag, evidence, summaries, None)
|
||||
}
|
||||
|
||||
/// Like [`derive_from_callgraph_entry_with`], but also consults the
|
||||
/// whole-program [`CallGraph`] when `callgraph` is `Some`.
|
||||
///
|
||||
/// When both `summaries` and `callgraph` are present, the sink's
|
||||
/// enclosing function is resolved to a [`FuncKey`] and a reverse-edge
|
||||
/// BFS walks predecessors until an ancestor satisfies
|
||||
/// [`is_entry_point`]. The spec's `entry_file` / `entry_name` are
|
||||
/// rewritten to that ancestor and `entry_kind` is classified from the
|
||||
/// ancestor's [`FuncSummary::entry_kind`] (HTTP variants → HttpRoute).
|
||||
/// The legacy rule-id `.http.` / `.cli.` substring fallback is still
|
||||
/// consulted when the callgraph walk finds nothing.
|
||||
pub fn derive_from_callgraph_entry_full(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
callgraph: Option<&CallGraph>,
|
||||
) -> Option<HarnessSpec> {
|
||||
let lang = lang_from_path(&diag.path)?;
|
||||
let expected_cap = Cap::from_bits_truncate(evidence.sink_caps);
|
||||
|
|
@ -456,7 +539,38 @@ pub fn derive_from_callgraph_entry_with(
|
|||
return None;
|
||||
}
|
||||
|
||||
// Step 1: try summary-based classification.
|
||||
// Step 0: callgraph-aware reverse-edge walk to the nearest entry-point
|
||||
// ancestor. Only fires when both summaries *and* callgraph are present.
|
||||
if let (Some(s), Some(cg)) = (summaries, callgraph) {
|
||||
if let Some(found) = find_entry_via_callgraph(diag, evidence, s, cg, lang) {
|
||||
let entry_kind = found
|
||||
.summary
|
||||
.entry_kind
|
||||
.as_ref()
|
||||
.map(entry_kind_from_summary)
|
||||
.unwrap_or_else(|| name_to_entry_kind(&found.summary.name));
|
||||
let entry_file = if !found.summary.file_path.is_empty() {
|
||||
found.summary.file_path.clone()
|
||||
} else {
|
||||
diag.path.clone()
|
||||
};
|
||||
let mut spec = finalize_spec(
|
||||
diag,
|
||||
entry_file,
|
||||
found.summary.name.clone(),
|
||||
lang,
|
||||
expected_cap,
|
||||
diag.path.clone(),
|
||||
diag.line as u32,
|
||||
SpecDerivationStrategy::FromCallgraphEntry,
|
||||
);
|
||||
spec.entry_kind = entry_kind;
|
||||
spec.spec_hash = compute_spec_hash(&spec);
|
||||
return Some(spec);
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1: try summary-based classification of the enclosing function.
|
||||
let summary_kind = enclosing_function_from_flow_steps(evidence)
|
||||
.and_then(|name| find_summary_by_path(summaries?, lang, &name, &diag.path))
|
||||
.and_then(|s| s.entry_kind.as_ref().map(entry_kind_from_summary));
|
||||
|
|
@ -491,6 +605,140 @@ pub fn derive_from_callgraph_entry_with(
|
|||
Some(spec)
|
||||
}
|
||||
|
||||
/// Recognise function-name-only entry points when no static
|
||||
/// [`crate::entry_points::EntryKind`] tag is available.
|
||||
///
|
||||
/// `main` / `fn main` / `__main__` (Python's `if __name__ == "__main__":`
|
||||
/// block-as-function convention) become [`EntryKind::CliSubcommand`];
|
||||
/// every other name defaults to [`EntryKind::Function`]. Used to give
|
||||
/// the verifier a non-`Function` entry kind for callgraph-discovered
|
||||
/// ancestors whose summaries pre-date the static entry-kind detector.
|
||||
fn name_to_entry_kind(name: &str) -> EntryKind {
|
||||
match name {
|
||||
"main" | "__main__" => EntryKind::CliSubcommand,
|
||||
_ => EntryKind::Function,
|
||||
}
|
||||
}
|
||||
|
||||
/// True when `func` qualifies as a static entry point: framework-bound
|
||||
/// route handler (`func.entry_kind.is_some()`), Rust / C-style program
|
||||
/// `main`, or Python `__main__` block-as-function.
|
||||
///
|
||||
/// `callgraph` is accepted as future-extension surface (e.g. checking
|
||||
/// in-degree == 0 to claim externally-driven CLI helpers) but the
|
||||
/// current implementation only uses it for the in-degree heuristic when
|
||||
/// the function name itself does not match a recognised pattern.
|
||||
pub fn is_entry_point(func: &FuncSummary, callgraph: &CallGraph) -> bool {
|
||||
if func.entry_kind.is_some() {
|
||||
return true;
|
||||
}
|
||||
if matches!(func.name.as_str(), "main" | "__main__") {
|
||||
return true;
|
||||
}
|
||||
// Last-resort: if the call graph has zero static callers for this
|
||||
// function and it is *not* a closure / lambda (which legitimately
|
||||
// have zero callers but are inlined at their use site), treat it as
|
||||
// externally driven. We only claim this when the function lives at
|
||||
// file top level (empty container) so we do not promote leaf helper
|
||||
// methods on classes to entry points.
|
||||
if !func.container.is_empty() {
|
||||
return false;
|
||||
}
|
||||
let lang = match Lang::from_slug(&func.lang) {
|
||||
Some(l) => l,
|
||||
None => return false,
|
||||
};
|
||||
let key = FuncKey {
|
||||
lang,
|
||||
namespace: func.file_path.clone(),
|
||||
container: func.container.clone(),
|
||||
name: func.name.clone(),
|
||||
arity: Some(func.param_count),
|
||||
disambig: func.disambig,
|
||||
kind: func.kind,
|
||||
};
|
||||
if let Some(&node) = callgraph.index.get(&key) {
|
||||
callgraph
|
||||
.graph
|
||||
.neighbors_directed(node, petgraph::Direction::Incoming)
|
||||
.next()
|
||||
.is_none()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Result of a successful callgraph-driven entry-point lookup.
|
||||
struct EntryHit<'a> {
|
||||
#[allow(dead_code)]
|
||||
key: FuncKey,
|
||||
summary: &'a FuncSummary,
|
||||
}
|
||||
|
||||
/// Walk reverse edges from the sink's enclosing function until an entry
|
||||
/// point is found.
|
||||
///
|
||||
/// Returns `None` when:
|
||||
/// * the sink's enclosing function cannot be resolved from
|
||||
/// `evidence.flow_steps`, or
|
||||
/// * the resolved function has no node in the callgraph (e.g. defined
|
||||
/// in a file pass 1 did not summarise), or
|
||||
/// * no ancestor satisfies [`is_entry_point`] within the BFS frontier.
|
||||
fn find_entry_via_callgraph<'a>(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: &'a GlobalSummaries,
|
||||
callgraph: &CallGraph,
|
||||
lang: Lang,
|
||||
) -> Option<EntryHit<'a>> {
|
||||
let enclosing = enclosing_function_from_flow_steps(evidence)
|
||||
.or_else(|| resolve_enclosing_function(diag, evidence, Some(summaries), lang))?;
|
||||
// Locate the FuncKey by matching name + file_path against the summaries.
|
||||
let (sink_key, sink_summary) = summaries
|
||||
.iter()
|
||||
.find(|(k, s)| {
|
||||
k.lang == lang && s.name == enclosing && paths_match(&s.file_path, &diag.path)
|
||||
})
|
||||
.map(|(k, s)| (k.clone(), s))?;
|
||||
// Sink's own enclosing function may itself be an entry (route
|
||||
// handler that contains the sink directly). When that is the case
|
||||
// the existing summary-classification path already returns the
|
||||
// right answer, but seeding the BFS with it keeps the two paths
|
||||
// consistent.
|
||||
let start = *callgraph.index.get(&sink_key)?;
|
||||
if is_entry_point(sink_summary, callgraph) {
|
||||
return Some(EntryHit {
|
||||
key: sink_key,
|
||||
summary: sink_summary,
|
||||
});
|
||||
}
|
||||
let mut visited: HashSet<petgraph::graph::NodeIndex> = HashSet::new();
|
||||
visited.insert(start);
|
||||
let mut queue: VecDeque<petgraph::graph::NodeIndex> = VecDeque::new();
|
||||
queue.push_back(start);
|
||||
while let Some(node) = queue.pop_front() {
|
||||
for caller_node in callgraph
|
||||
.graph
|
||||
.neighbors_directed(node, petgraph::Direction::Incoming)
|
||||
{
|
||||
if !visited.insert(caller_node) {
|
||||
continue;
|
||||
}
|
||||
let caller_key = &callgraph.graph[caller_node];
|
||||
if let Some(caller_summary) = summaries.get(caller_key) {
|
||||
if is_entry_point(caller_summary, callgraph) {
|
||||
return Some(EntryHit {
|
||||
key: caller_key.clone(),
|
||||
summary: caller_summary,
|
||||
});
|
||||
}
|
||||
}
|
||||
queue.push_back(caller_node);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Map a static-analysis [`crate::entry_points::EntryKind`] (route shape) onto
|
||||
/// the dynamic-side [`EntryKind`] taxonomy. Every current variant of the
|
||||
/// static enum describes an HTTP route handler — no CLI / library-API
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
//! The CLI subcommand and any library consumer call [`verify_finding`].
|
||||
//! It is the only function the rest of the crate needs to know about.
|
||||
|
||||
use crate::callgraph::CallGraph;
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::corpus::{payloads_for, CORPUS_VERSION};
|
||||
use crate::dynamic::report::{AttemptSummary, VerifyResult, VerifyStatus};
|
||||
|
|
@ -41,6 +42,14 @@ pub struct VerifyOptions {
|
|||
/// `None` disables the summary-driven derivation paths; strategy 3 is a
|
||||
/// no-op and strategy 4 falls back to the rule-id substring heuristic.
|
||||
pub summaries: Option<Arc<GlobalSummaries>>,
|
||||
/// Whole-program [`CallGraph`] threaded into the callgraph-aware
|
||||
/// branch of strategy 4 ([`SpecDerivationStrategy::FromCallgraphEntry`]).
|
||||
///
|
||||
/// When present alongside [`Self::summaries`], the verifier walks
|
||||
/// reverse edges from the sink's enclosing function to the nearest
|
||||
/// entry-point ancestor (route handler, CLI subcommand, `main`).
|
||||
/// `None` keeps strategy 4 on the legacy rule-id substring path.
|
||||
pub callgraph: Option<Arc<CallGraph>>,
|
||||
}
|
||||
|
||||
impl VerifyOptions {
|
||||
|
|
@ -61,6 +70,7 @@ impl VerifyOptions {
|
|||
db_path: None,
|
||||
verify_all_confidence: config.scanner.verify_all_confidence,
|
||||
summaries: None,
|
||||
callgraph: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -322,10 +332,11 @@ fn derivation_failure_hint(diag: &Diag) -> String {
|
|||
pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
||||
let finding_id = format!("{:016x}", diag.stable_hash);
|
||||
|
||||
let spec = match HarnessSpec::from_finding_with_summaries(
|
||||
let spec = match HarnessSpec::from_finding_full(
|
||||
diag,
|
||||
opts.verify_all_confidence,
|
||||
opts.summaries.as_deref(),
|
||||
opts.callgraph.as_deref(),
|
||||
) {
|
||||
Ok(s) => s,
|
||||
Err(reason) => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue