mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-24 20:28:06 +02:00
refactor(server, scan): introduce target management with active target switching, enhance DB pool handling, and integrate target-aware task routes for improved modularity
This commit is contained in:
parent
acdc71cd88
commit
635b213825
40 changed files with 1810 additions and 240 deletions
|
|
@ -108,14 +108,7 @@ impl BuildPool for GoPool {
|
|||
}
|
||||
|
||||
let output = base_command(&self.go_bin)
|
||||
.args([
|
||||
"build",
|
||||
"-trimpath",
|
||||
"-buildvcs=false",
|
||||
"-o",
|
||||
&dest,
|
||||
".",
|
||||
])
|
||||
.args(["build", "-trimpath", "-buildvcs=false", "-o", &dest, "."])
|
||||
.current_dir(workdir)
|
||||
.env("GOCACHE", &go_cache)
|
||||
.env("GOPATH", &go_path)
|
||||
|
|
|
|||
|
|
@ -84,8 +84,7 @@ impl BuildPool for RustPool {
|
|||
.current_dir(workdir)
|
||||
.env(
|
||||
"CARGO_HOME",
|
||||
std::env::var("CARGO_HOME")
|
||||
.unwrap_or_else(|_| default_cargo_home()),
|
||||
std::env::var("CARGO_HOME").unwrap_or_else(|_| default_cargo_home()),
|
||||
)
|
||||
.env(
|
||||
"RUSTUP_HOME",
|
||||
|
|
|
|||
|
|
@ -640,7 +640,10 @@ mod tests {
|
|||
let dst = tmp.path().join("clone");
|
||||
copy_workdir(&src, &dst).unwrap();
|
||||
assert_eq!(fs::read(dst.join("top.txt")).unwrap(), b"top");
|
||||
assert_eq!(fs::read(dst.join("nested").join("deep.txt")).unwrap(), b"deep");
|
||||
assert_eq!(
|
||||
fs::read(dst.join("nested").join("deep.txt")).unwrap(),
|
||||
b"deep"
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
|
|
@ -655,7 +658,10 @@ mod tests {
|
|||
copy_workdir(&src, &dst).unwrap();
|
||||
let link = dst.join("link.txt");
|
||||
assert!(
|
||||
fs::symlink_metadata(&link).unwrap().file_type().is_symlink(),
|
||||
fs::symlink_metadata(&link)
|
||||
.unwrap()
|
||||
.file_type()
|
||||
.is_symlink(),
|
||||
"internal symlink must be preserved, not dereferenced"
|
||||
);
|
||||
assert_eq!(fs::read(&link).unwrap(), b"real");
|
||||
|
|
|
|||
|
|
@ -827,8 +827,7 @@ impl WorkerPool {
|
|||
.collect();
|
||||
}
|
||||
|
||||
let results: Vec<Mutex<Option<O>>> =
|
||||
(0..items.len()).map(|_| Mutex::new(None)).collect();
|
||||
let results: Vec<Mutex<Option<O>>> = (0..items.len()).map(|_| Mutex::new(None)).collect();
|
||||
|
||||
std::thread::scope(|scope| {
|
||||
let results = &results;
|
||||
|
|
|
|||
|
|
@ -147,10 +147,10 @@ fn bind_mount_ro(src: &Path, dst: &Path) -> io::Result<()> {
|
|||
const MS_REC: u64 = 0x4000;
|
||||
|
||||
fs::create_dir_all(dst)?;
|
||||
let csrc =
|
||||
CString::new(src.as_os_str().as_bytes()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
|
||||
let cdst =
|
||||
CString::new(dst.as_os_str().as_bytes()).map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
|
||||
let csrc = CString::new(src.as_os_str().as_bytes())
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
|
||||
let cdst = CString::new(dst.as_os_str().as_bytes())
|
||||
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
|
||||
|
||||
let bind = unsafe {
|
||||
mount(
|
||||
|
|
@ -240,7 +240,11 @@ mod tests {
|
|||
// Snapshot it into a fresh per-finding workdir.
|
||||
let workdir = tempfile::TempDir::new().unwrap();
|
||||
baseline.snapshot_into(workdir.path()).unwrap();
|
||||
let cloned = workdir.path().join("node_modules").join("left-pad").join("index.js");
|
||||
let cloned = workdir
|
||||
.path()
|
||||
.join("node_modules")
|
||||
.join("left-pad")
|
||||
.join("index.js");
|
||||
assert!(cloned.exists(), "snapshot must materialise node_modules");
|
||||
assert_eq!(fs::read(&cloned).unwrap(), b"module.exports = 1;\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -200,6 +200,84 @@ fn default_derivation_strategy() -> SpecDerivationStrategy {
|
|||
SpecDerivationStrategy::FromFlowSteps
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) — the optional cross-file context consulted by the
|
||||
/// multi-strategy scoring derivation.
|
||||
///
|
||||
/// Bundles the three inputs every scored strategy and the cross-file source
|
||||
/// seeding read, so the public [`HarnessSpec::derive_best`] /
|
||||
/// [`HarnessSpec::derive_all_strategies`] surface takes one borrowable
|
||||
/// context rather than three positional `Option`s. Cheap to copy (two
|
||||
/// references + a bool).
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct SpecDerivationCtx<'a> {
|
||||
/// When true, skip the `Confidence >= Medium` gate so low-confidence
|
||||
/// findings are still attempted.
|
||||
pub verify_all_confidence: bool,
|
||||
/// Cross-file function summaries (`FuncSummary` + `SsaFuncSummary`),
|
||||
/// shared by every finding in a scan.
|
||||
pub summaries: Option<&'a GlobalSummaries>,
|
||||
/// Whole-program call graph used for reverse-edge entry resolution and
|
||||
/// cross-file source seeding.
|
||||
pub callgraph: Option<&'a CallGraph>,
|
||||
}
|
||||
|
||||
impl<'a> SpecDerivationCtx<'a> {
|
||||
/// Construct a context from the three positional inputs the legacy
|
||||
/// `from_finding_*` constructors take.
|
||||
pub fn new(
|
||||
verify_all_confidence: bool,
|
||||
summaries: Option<&'a GlobalSummaries>,
|
||||
callgraph: Option<&'a CallGraph>,
|
||||
) -> Self {
|
||||
Self {
|
||||
verify_all_confidence,
|
||||
summaries,
|
||||
callgraph,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) — one scored derivation candidate.
|
||||
///
|
||||
/// Produced by [`HarnessSpec::derive_all_strategies`]; carries both the
|
||||
/// built [`HarnessSpec`] and the [`SpecDerivationStrategy`] that produced
|
||||
/// it. The strategy tag is retained alongside `spec.derivation` (which
|
||||
/// holds the same value) so the loser-ranking telemetry can report the tag
|
||||
/// without unwrapping the spec.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SpecCandidate {
|
||||
/// The derived harness recipe.
|
||||
pub spec: HarnessSpec,
|
||||
/// Which strategy produced [`Self::spec`].
|
||||
pub strategy: SpecDerivationStrategy,
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) — lexicographic score for a candidate spec.
|
||||
///
|
||||
/// Field declaration order *is* the comparison priority: the derived
|
||||
/// [`Ord`] compares `flow_depth` first, then `framework_bound`, then
|
||||
/// `cross_file_resolved`, then `payloads_available`. Higher is better, so
|
||||
/// [`HarnessSpec::derive_best`] picks the candidate whose score is the
|
||||
/// maximum. `bool` orders `false < true`, so a framework-bound /
|
||||
/// cross-file-resolved / payload-backed candidate outscores one that is
|
||||
/// not, all else equal.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct SpecScore {
|
||||
/// Flow-step depth the spec covers: `evidence.flow_steps.len()` plus a
|
||||
/// hop when the entry was rewritten to an ancestor function (the
|
||||
/// callgraph-walk strategies cover more of the call chain than the
|
||||
/// helper that physically contains the sink).
|
||||
pub flow_depth: u32,
|
||||
/// A [`FrameworkBinding`] was attached to the spec.
|
||||
pub framework_bound: bool,
|
||||
/// The spec's entry resolves to a different file than the sink — the
|
||||
/// source was recovered across a file boundary.
|
||||
pub cross_file_resolved: bool,
|
||||
/// The `(expected_cap, lang)` pair has at least one curated payload, so
|
||||
/// the verifier has something to fire.
|
||||
pub payloads_available: bool,
|
||||
}
|
||||
|
||||
impl HarnessSpec {
|
||||
/// Build a spec from a finding. Returns `Err` with a typed reason when
|
||||
/// the finding cannot be driven dynamically.
|
||||
|
|
@ -291,51 +369,15 @@ impl HarnessSpec {
|
|||
summaries: Option<&GlobalSummaries>,
|
||||
callgraph: Option<&CallGraph>,
|
||||
) -> Result<Self, UnsupportedReason> {
|
||||
if !verify_all_confidence {
|
||||
match diag.confidence {
|
||||
Some(c) if c >= Confidence::Medium => {}
|
||||
_ => return Err(UnsupportedReason::ConfidenceTooLow),
|
||||
}
|
||||
}
|
||||
|
||||
let evidence = diag
|
||||
.evidence
|
||||
.as_ref()
|
||||
.ok_or(UnsupportedReason::NoFlowSteps)?;
|
||||
|
||||
// Phase 04 pre-step: when both callgraph *and* summaries are
|
||||
// present, walk reverse edges to a framework-bound ancestor.
|
||||
// Takes precedence over the four-strategy ladder because a route
|
||||
// handler / CLI entry is always a stronger driving anchor than
|
||||
// the helper function that physically contains the sink.
|
||||
//
|
||||
// Strict variant: only the reverse-edge BFS (`find_entry_via_callgraph`)
|
||||
// counts here. The summary-entry-kind + rule-id substring fallbacks
|
||||
// that live in `derive_from_callgraph_entry_full` stay at strategy-4
|
||||
// priority — calling them here would short-circuit the more precise
|
||||
// strategies (FromFlowSteps / FromRuleNamespace / FromFuncSummaryAuto)
|
||||
// whenever the rule id happens to contain `.http.` / `.cli.`.
|
||||
if let (Some(s), Some(cg)) = (summaries, callgraph)
|
||||
&& let Some(spec) = derive_from_callgraph_walk_only(diag, evidence, s, cg)
|
||||
{
|
||||
return Ok(spec);
|
||||
}
|
||||
|
||||
// Try each strategy in priority order; first non-None wins.
|
||||
if let Some(spec) = derive_from_flow_steps(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_rule_namespace_with(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_func_summary_auto(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_callgraph_entry_full(diag, evidence, summaries, callgraph) {
|
||||
return Ok(spec);
|
||||
}
|
||||
|
||||
Err(UnsupportedReason::SpecDerivationFailed)
|
||||
// Phase 25 (Track K.0): the legacy sequential first-match ladder is
|
||||
// now a thin wrapper over the multi-strategy scoring path. Every
|
||||
// strategy this method used to try in priority order is still run by
|
||||
// `derive_all_strategies`; `derive_best` scores them and the
|
||||
// ascending-precedence ordering reproduces the old tie-break
|
||||
// (strict callgraph walk > flow_steps > rule_namespace >
|
||||
// func_summary > callgraph fallback) when scores are equal.
|
||||
let ctx = SpecDerivationCtx::new(verify_all_confidence, summaries, callgraph);
|
||||
Self::derive_best(diag, &ctx)
|
||||
}
|
||||
|
||||
/// Convenience wrapper around [`HarnessSpec::from_finding_full`] that
|
||||
|
|
@ -388,6 +430,133 @@ impl HarnessSpec {
|
|||
SpecDerivationStrategy::FromCallgraphEntry,
|
||||
]
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) — run *every* derivation strategy and score each
|
||||
/// resulting candidate.
|
||||
///
|
||||
/// Unlike the legacy sequential first-match ladder, this evaluates all
|
||||
/// strategies that fire for the finding and returns each as a
|
||||
/// `(SpecCandidate, SpecScore)` pair. The caller
|
||||
/// ([`Self::derive_best_ranked`]) picks the maximum-scoring candidate.
|
||||
///
|
||||
/// Candidates are returned in *ascending precedence* order (lowest-priority
|
||||
/// strategy first). This is load-bearing: [`SpecScore`] is intentionally
|
||||
/// coarse and genuine ties are common (e.g. two strategies that both name
|
||||
/// the sink's own enclosing function as the entry). When scores tie, the
|
||||
/// winner-selection in [`Self::derive_best_ranked`] keeps the *last*
|
||||
/// maximal element, so ascending precedence here reproduces the legacy
|
||||
/// ladder's tie-break (flow-steps beats rule-namespace beats
|
||||
/// func-summary, and the strict callgraph walk beats every other
|
||||
/// strategy) without baking strategy rank into the score itself.
|
||||
///
|
||||
/// Returns an empty `Vec` when the finding carries no evidence or no
|
||||
/// strategy fires.
|
||||
pub fn derive_all_strategies(
|
||||
diag: &Diag,
|
||||
ctx: &SpecDerivationCtx,
|
||||
) -> Vec<(SpecCandidate, SpecScore)> {
|
||||
let Some(evidence) = diag.evidence.as_ref() else {
|
||||
return Vec::new();
|
||||
};
|
||||
let summaries = ctx.summaries;
|
||||
let callgraph = ctx.callgraph;
|
||||
|
||||
// Build raw candidates in ascending precedence (lowest first). The
|
||||
// two callgraph entries mirror the legacy two call sites: the
|
||||
// `*_full` variant carries the low-precedence summary-kind / rule-id
|
||||
// fallback, the `*_walk_only` and cross-file-seed variants are the
|
||||
// high-precedence reverse-edge walks.
|
||||
let mut raw: Vec<(HarnessSpec, SpecDerivationStrategy)> = Vec::new();
|
||||
if let Some(spec) = derive_from_callgraph_entry_full(diag, evidence, summaries, callgraph) {
|
||||
raw.push((spec, SpecDerivationStrategy::FromCallgraphEntry));
|
||||
}
|
||||
if let Some(spec) = derive_from_func_summary_auto(diag, evidence, summaries) {
|
||||
raw.push((spec, SpecDerivationStrategy::FromFuncSummaryWalk));
|
||||
}
|
||||
if let Some(spec) = derive_from_rule_namespace_with(diag, evidence, summaries) {
|
||||
raw.push((spec, SpecDerivationStrategy::FromRuleNamespace));
|
||||
}
|
||||
if let Some(spec) = derive_from_flow_steps(diag, evidence, summaries) {
|
||||
raw.push((spec, SpecDerivationStrategy::FromFlowSteps));
|
||||
}
|
||||
if let (Some(s), Some(cg)) = (summaries, callgraph) {
|
||||
if let Some(spec) = derive_from_callgraph_walk_only(diag, evidence, s, cg) {
|
||||
raw.push((spec, SpecDerivationStrategy::FromCallgraphEntry));
|
||||
}
|
||||
if let Some(spec) = derive_from_cross_file_seed(diag, evidence, s, cg) {
|
||||
raw.push((spec, SpecDerivationStrategy::FromCallgraphEntry));
|
||||
}
|
||||
}
|
||||
|
||||
let sink_file = sink_file_of(diag, evidence);
|
||||
raw.into_iter()
|
||||
.map(|(spec, strategy)| {
|
||||
let score = score_candidate(&spec, evidence, &sink_file);
|
||||
(SpecCandidate { spec, strategy }, score)
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) — derive the single best spec for a finding.
|
||||
///
|
||||
/// Runs [`Self::derive_all_strategies`] and returns the maximum-scoring
|
||||
/// candidate's spec. The error contract matches the legacy
|
||||
/// [`Self::from_finding_full`]:
|
||||
/// - `Err(UnsupportedReason::ConfidenceTooLow)` when the confidence gate
|
||||
/// fails (and `ctx.verify_all_confidence` is false),
|
||||
/// - `Err(UnsupportedReason::NoFlowSteps)` when the finding carries no
|
||||
/// `Evidence` at all,
|
||||
/// - `Err(UnsupportedReason::SpecDerivationFailed)` when evidence is
|
||||
/// present but no strategy fired.
|
||||
pub fn derive_best(diag: &Diag, ctx: &SpecDerivationCtx) -> Result<Self, UnsupportedReason> {
|
||||
Self::derive_best_ranked(diag, ctx).map(|(spec, _runners_up)| spec)
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) — like [`Self::derive_best`] but also returns the
|
||||
/// loser ranking for telemetry.
|
||||
///
|
||||
/// The second tuple element lists every non-winning candidate's
|
||||
/// `(strategy, score)` in descending score order, so the verifier can
|
||||
/// emit a [`crate::dynamic::trace::TraceStage::SpecScoringResult`] event
|
||||
/// that makes engine gaps visible (which strategies fired, how they
|
||||
/// scored, and which one lost the tie-break).
|
||||
pub fn derive_best_ranked(
|
||||
diag: &Diag,
|
||||
ctx: &SpecDerivationCtx,
|
||||
) -> Result<(Self, Vec<(SpecDerivationStrategy, SpecScore)>), UnsupportedReason> {
|
||||
if !ctx.verify_all_confidence {
|
||||
match diag.confidence {
|
||||
Some(c) if c >= Confidence::Medium => {}
|
||||
_ => return Err(UnsupportedReason::ConfidenceTooLow),
|
||||
}
|
||||
}
|
||||
// Distinguish "no evidence at all" (NoFlowSteps) from "evidence
|
||||
// present but no strategy fired" (SpecDerivationFailed) — the
|
||||
// verifier lifts only the latter to `Inconclusive`.
|
||||
if diag.evidence.is_none() {
|
||||
return Err(UnsupportedReason::NoFlowSteps);
|
||||
}
|
||||
|
||||
let mut scored = Self::derive_all_strategies(diag, ctx);
|
||||
if scored.is_empty() {
|
||||
return Err(UnsupportedReason::SpecDerivationFailed);
|
||||
}
|
||||
|
||||
// Stable sort by score ascending. `derive_all_strategies` returns
|
||||
// candidates in ascending precedence, and a stable sort preserves
|
||||
// that order within equal scores — so the final element is the
|
||||
// highest-scoring candidate, and on a score tie it is the
|
||||
// highest-precedence one (legacy ladder tie-break).
|
||||
scored.sort_by(|a, b| a.1.cmp(&b.1));
|
||||
let (winner, _winner_score) = scored.pop().expect("non-empty checked above");
|
||||
let mut runners_up: Vec<(SpecDerivationStrategy, SpecScore)> = scored
|
||||
.into_iter()
|
||||
.map(|(cand, score)| (cand.strategy, score))
|
||||
.collect();
|
||||
// Report losers best-first.
|
||||
runners_up.reverse();
|
||||
Ok((winner.spec, runners_up))
|
||||
}
|
||||
}
|
||||
|
||||
// ── Strategy 1: from flow_steps (original path) ──────────────────────────────
|
||||
|
|
@ -962,6 +1131,201 @@ fn entry_kind_from_summary(_kind: &crate::entry_points::EntryKind) -> EntryKind
|
|||
EntryKind::HttpRoute
|
||||
}
|
||||
|
||||
// ── Phase 25 (Track K.0): multi-strategy scoring + cross-file seeding ────────
|
||||
|
||||
/// Maximum reverse-edge hops the cross-file source seeding walks before
|
||||
/// giving up. Bounds the BFS so a deep call chain cannot stall derivation;
|
||||
/// the [`crate::dynamic::spec`] Phase 25 spec fixes this at 5.
|
||||
const CROSS_FILE_SEED_MAX_DEPTH: usize = 5;
|
||||
|
||||
/// The sink call-site's file: the last `Sink` flow step, falling back to the
|
||||
/// diag's own path. Used by [`score_candidate`] to decide whether a
|
||||
/// candidate's entry was resolved across a file boundary.
|
||||
fn sink_file_of(diag: &Diag, evidence: &crate::evidence::Evidence) -> String {
|
||||
evidence
|
||||
.flow_steps
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| matches!(s.kind, FlowStepKind::Sink))
|
||||
.map(|s| s.file.clone())
|
||||
.unwrap_or_else(|| diag.path.clone())
|
||||
}
|
||||
|
||||
/// Flow-step depth a candidate covers.
|
||||
///
|
||||
/// Base is `evidence.flow_steps.len()`. A candidate whose entry was
|
||||
/// rewritten to a *different* function than the sink's enclosing function
|
||||
/// (i.e. one of the callgraph-walk strategies climbed the call chain to a
|
||||
/// route handler / source ancestor) earns a `+1` hop bonus, so it scores
|
||||
/// strictly above the strategies that merely name the sink's own enclosing
|
||||
/// helper as the entry. This is what lets a successful reverse-edge walk
|
||||
/// win the [`SpecScore`] comparison without baking strategy rank into the
|
||||
/// score.
|
||||
fn candidate_flow_depth(spec: &HarnessSpec, evidence: &crate::evidence::Evidence) -> u32 {
|
||||
let base = evidence.flow_steps.len() as u32;
|
||||
let hop = match enclosing_function_from_flow_steps(evidence) {
|
||||
Some(ref f) if !f.is_empty() && *f != spec.entry_name => 1,
|
||||
_ => 0,
|
||||
};
|
||||
base + hop
|
||||
}
|
||||
|
||||
/// True when the `(cap, lang)` pair has at least one curated payload to fire.
|
||||
///
|
||||
/// `expected_cap` may carry several bits; a direct multi-bit lookup misses
|
||||
/// (the corpus is keyed by single caps), so on a miss we test each set bit
|
||||
/// individually.
|
||||
fn candidate_has_payloads(cap: Cap, lang: Lang) -> bool {
|
||||
use crate::dynamic::corpus::registry::payloads_for_lang;
|
||||
if !payloads_for_lang(cap, lang).is_empty() {
|
||||
return true;
|
||||
}
|
||||
cap.iter()
|
||||
.any(|bit| !payloads_for_lang(bit, lang).is_empty())
|
||||
}
|
||||
|
||||
/// Score a single candidate spec on the four Phase 25 axes.
|
||||
fn score_candidate(
|
||||
spec: &HarnessSpec,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
sink_file: &str,
|
||||
) -> SpecScore {
|
||||
SpecScore {
|
||||
flow_depth: candidate_flow_depth(spec, evidence),
|
||||
framework_bound: spec.framework.is_some(),
|
||||
cross_file_resolved: !sink_file.is_empty()
|
||||
&& !spec.entry_file.is_empty()
|
||||
&& spec.entry_file != sink_file,
|
||||
payloads_available: candidate_has_payloads(spec.expected_cap, spec.lang),
|
||||
}
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0) deliverable 4 — cross-file source seeding.
|
||||
///
|
||||
/// Walks reverse call-graph edges from the sink's enclosing function,
|
||||
/// consulting [`GlobalSummaries::get_ssa`] (the `ssa_by_key` index) at each
|
||||
/// ancestor, until it finds either:
|
||||
/// * a **Source** — an ancestor whose [`crate::summary::ssa_summary::SsaFuncSummary::source_caps`]
|
||||
/// is non-empty, i.e. it introduces externally-controlled input, or
|
||||
/// * a **framework binding** — an ancestor that satisfies [`is_entry_point`].
|
||||
///
|
||||
/// Bounded at [`CROSS_FILE_SEED_MAX_DEPTH`] reverse hops. Unlike
|
||||
/// [`find_entry_via_callgraph`], which stops only at framework entry points,
|
||||
/// this also stops at SSA-confirmed sources — so it recovers a drivable
|
||||
/// entry for findings whose taint originates in a cross-file helper that
|
||||
/// reads input but is not itself a route handler. That additional reach is
|
||||
/// the lever Phase 25 pulls to cut the `Inconclusive(SpecDerivationFailed)`
|
||||
/// rate.
|
||||
fn seed_cross_file_source<'a>(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: &'a GlobalSummaries,
|
||||
callgraph: &CallGraph,
|
||||
lang: Lang,
|
||||
) -> Option<EntryHit<'a>> {
|
||||
let enclosing = enclosing_function_from_flow_steps(evidence)
|
||||
.or_else(|| resolve_enclosing_function(diag, evidence, Some(summaries), lang))?;
|
||||
let sink_key = summaries
|
||||
.iter()
|
||||
.find(|(k, s)| {
|
||||
k.lang == lang && s.name == enclosing && paths_match(&s.file_path, &diag.path)
|
||||
})
|
||||
.map(|(k, _)| k.clone())?;
|
||||
let start = *callgraph.index.get(&sink_key)?;
|
||||
|
||||
let mut visited: HashSet<petgraph::graph::NodeIndex> = HashSet::new();
|
||||
visited.insert(start);
|
||||
let mut frontier: Vec<petgraph::graph::NodeIndex> = vec![start];
|
||||
for _ in 0..CROSS_FILE_SEED_MAX_DEPTH {
|
||||
let mut next: Vec<petgraph::graph::NodeIndex> = Vec::new();
|
||||
for node in frontier.drain(..) {
|
||||
for caller in callgraph
|
||||
.graph
|
||||
.neighbors_directed(node, petgraph::Direction::Incoming)
|
||||
{
|
||||
if !visited.insert(caller) {
|
||||
continue;
|
||||
}
|
||||
let caller_key = &callgraph.graph[caller];
|
||||
let summary = summaries.get(caller_key);
|
||||
let is_source = summaries
|
||||
.get_ssa(caller_key)
|
||||
.is_some_and(|ssa| !ssa.source_caps.is_empty());
|
||||
let is_framework = summary.is_some_and(|s| is_entry_point(s, callgraph));
|
||||
if (is_source || is_framework)
|
||||
&& let Some(s) = summary
|
||||
{
|
||||
return Some(EntryHit {
|
||||
key: caller_key.clone(),
|
||||
summary: s,
|
||||
});
|
||||
}
|
||||
next.push(caller);
|
||||
}
|
||||
}
|
||||
frontier = next;
|
||||
if frontier.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strategy candidate built from [`seed_cross_file_source`].
|
||||
///
|
||||
/// Rewrites the spec's entry to the cross-file Source / framework ancestor
|
||||
/// the seed walk resolved, classifying its [`EntryKind`] from the ancestor's
|
||||
/// summary (HTTP-shaped static entry kinds → [`EntryKind::HttpRoute`], else
|
||||
/// name-based). Tagged [`SpecDerivationStrategy::FromCallgraphEntry`] — it
|
||||
/// is a reverse-edge call-graph walk, like the other two callgraph
|
||||
/// candidates — and emitted at the highest precedence in
|
||||
/// [`HarnessSpec::derive_all_strategies`].
|
||||
fn derive_from_cross_file_seed(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: &GlobalSummaries,
|
||||
callgraph: &CallGraph,
|
||||
) -> Option<HarnessSpec> {
|
||||
let lang = lang_from_path(&diag.path)?;
|
||||
let expected_cap = Cap::from_bits_truncate(evidence.sink_caps);
|
||||
if expected_cap.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let found = seed_cross_file_source(diag, evidence, summaries, callgraph, lang)?;
|
||||
let entry_kind = found
|
||||
.summary
|
||||
.entry_kind
|
||||
.as_ref()
|
||||
.map(entry_kind_from_summary)
|
||||
.unwrap_or_else(|| name_to_entry_kind(&found.summary.name));
|
||||
let entry_file = if !found.summary.file_path.is_empty() {
|
||||
found.summary.file_path.clone()
|
||||
} else {
|
||||
diag.path.clone()
|
||||
};
|
||||
let (sink_file, sink_line) = evidence
|
||||
.flow_steps
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| matches!(s.kind, FlowStepKind::Sink))
|
||||
.map(|s| (s.file.clone(), s.line))
|
||||
.unwrap_or_else(|| (diag.path.clone(), diag.line as u32));
|
||||
let mut spec = finalize_spec(
|
||||
diag,
|
||||
entry_file,
|
||||
found.summary.name.clone(),
|
||||
lang,
|
||||
expected_cap,
|
||||
sink_file,
|
||||
sink_line,
|
||||
SpecDerivationStrategy::FromCallgraphEntry,
|
||||
Some(summaries),
|
||||
);
|
||||
spec.entry_kind = entry_kind;
|
||||
spec.spec_hash = compute_spec_hash(&spec);
|
||||
Some(spec)
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Resolve the language for a finding path using extension first, then a
|
||||
|
|
@ -2573,4 +2937,250 @@ mod tests {
|
|||
assert_eq!(spec.spec_hash, pre_hash);
|
||||
assert!(spec.framework.is_some());
|
||||
}
|
||||
|
||||
// ── Phase 25 (Track K.0): multi-strategy scoring + cross-file seeding ────
|
||||
|
||||
#[test]
|
||||
fn spec_score_orders_lexicographically() {
|
||||
// `flow_depth` dominates every lower-priority axis.
|
||||
let deep = SpecScore {
|
||||
flow_depth: 3,
|
||||
framework_bound: false,
|
||||
cross_file_resolved: false,
|
||||
payloads_available: false,
|
||||
};
|
||||
let shallow_but_rich = SpecScore {
|
||||
flow_depth: 2,
|
||||
framework_bound: true,
|
||||
cross_file_resolved: true,
|
||||
payloads_available: true,
|
||||
};
|
||||
assert!(deep > shallow_but_rich);
|
||||
|
||||
// Equal `flow_depth`: `framework_bound` breaks the tie.
|
||||
let fw = SpecScore {
|
||||
flow_depth: 2,
|
||||
framework_bound: true,
|
||||
cross_file_resolved: false,
|
||||
payloads_available: false,
|
||||
};
|
||||
let no_fw = SpecScore {
|
||||
flow_depth: 2,
|
||||
framework_bound: false,
|
||||
cross_file_resolved: true,
|
||||
payloads_available: true,
|
||||
};
|
||||
assert!(fw > no_fw);
|
||||
|
||||
// Equal `flow_depth` + `framework_bound`: `cross_file_resolved` wins.
|
||||
let xfile = SpecScore {
|
||||
flow_depth: 1,
|
||||
framework_bound: false,
|
||||
cross_file_resolved: true,
|
||||
payloads_available: false,
|
||||
};
|
||||
let no_xfile = SpecScore {
|
||||
flow_depth: 1,
|
||||
framework_bound: false,
|
||||
cross_file_resolved: false,
|
||||
payloads_available: true,
|
||||
};
|
||||
assert!(xfile > no_xfile);
|
||||
|
||||
// Only `payloads_available` differs: it is the final tie-breaker.
|
||||
let with_payloads = SpecScore {
|
||||
flow_depth: 1,
|
||||
framework_bound: false,
|
||||
cross_file_resolved: false,
|
||||
payloads_available: true,
|
||||
};
|
||||
let without = SpecScore {
|
||||
flow_depth: 1,
|
||||
framework_bound: false,
|
||||
cross_file_resolved: false,
|
||||
payloads_available: false,
|
||||
};
|
||||
assert!(with_payloads > without);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_all_strategies_empty_without_evidence() {
|
||||
// No `Evidence` struct at all → no strategy has anything to derive
|
||||
// from, so the candidate set is empty (and `derive_best_ranked`
|
||||
// lifts this to `NoFlowSteps`, exercised separately).
|
||||
let diag = crate::commands::scan::Diag {
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: None,
|
||||
..Default::default()
|
||||
};
|
||||
let ctx = SpecDerivationCtx::new(false, None, None);
|
||||
assert!(HarnessSpec::derive_all_strategies(&diag, &ctx).is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_best_ranked_reports_runner_up_strategies() {
|
||||
use crate::labels::Cap;
|
||||
// A finding both the flow-steps and rule-namespace strategies can
|
||||
// drive: identical entry → identical score → flow_steps wins the
|
||||
// precedence tie-break, and rule_namespace is reported as a loser.
|
||||
let evidence = Evidence {
|
||||
flow_steps: vec![
|
||||
source_step("src/handler.py", "handle_request"),
|
||||
sink_step("src/handler.py"),
|
||||
],
|
||||
sink_caps: Cap::SHELL_ESCAPE.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
let diag = crate::commands::scan::Diag {
|
||||
id: "py.cmdi.os_system".into(),
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(evidence),
|
||||
path: "src/handler.py".into(),
|
||||
..Default::default()
|
||||
};
|
||||
let ctx = SpecDerivationCtx::new(false, None, None);
|
||||
let (spec, runners_up) = HarnessSpec::derive_best_ranked(&diag, &ctx).unwrap();
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromFlowSteps);
|
||||
assert!(
|
||||
runners_up
|
||||
.iter()
|
||||
.any(|(s, _)| *s == SpecDerivationStrategy::FromRuleNamespace),
|
||||
"rule-namespace strategy must appear in the runner-up ranking, got {runners_up:?}",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn seed_cross_file_source_stops_at_cross_file_source() {
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::CalleeSite;
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
use crate::symbol::FuncKey;
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
|
||||
// Sink helper in db.rs — contains the dangerous call, no callees.
|
||||
let run_query = build_summary(
|
||||
"run_query",
|
||||
"src/db.rs",
|
||||
"rust",
|
||||
Cap::SHELL_ESCAPE.bits(),
|
||||
vec![0],
|
||||
None,
|
||||
);
|
||||
let run_query_key = FuncKey::new_function(Lang::Rust, "src/db.rs", "run_query", Some(1));
|
||||
gs.insert(run_query_key, run_query);
|
||||
|
||||
// Source ancestor in input.rs — reads external input, calls run_query.
|
||||
let mut read_input = build_summary("read_input", "src/input.rs", "rust", 0, vec![], None);
|
||||
read_input.callees = vec![CalleeSite::bare("run_query")];
|
||||
let read_input_key =
|
||||
FuncKey::new_function(Lang::Rust, "src/input.rs", "read_input", Some(1));
|
||||
gs.insert(read_input_key.clone(), read_input);
|
||||
// SSA summary marks read_input a Source (non-empty source_caps) —
|
||||
// the signal `seed_cross_file_source` stops on.
|
||||
gs.insert_ssa(
|
||||
read_input_key,
|
||||
SsaFuncSummary {
|
||||
source_caps: Cap::SHELL_ESCAPE,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
|
||||
// A caller of read_input gives it in-degree 1, so the
|
||||
// `is_entry_point` zero-caller heuristic does NOT fire — proving the
|
||||
// walk stops because read_input is a SOURCE, not a framework entry.
|
||||
let mut dispatch = build_summary("dispatch", "src/main.rs", "rust", 0, vec![], None);
|
||||
dispatch.callees = vec![CalleeSite::bare("read_input")];
|
||||
let dispatch_key = FuncKey::new_function(Lang::Rust, "src/main.rs", "dispatch", Some(1));
|
||||
gs.insert(dispatch_key, dispatch);
|
||||
|
||||
let cg = crate::callgraph::build_call_graph(&gs, &[]);
|
||||
|
||||
let ev = Evidence {
|
||||
flow_steps: vec![sink_only_step_with_function("src/db.rs", "run_query")],
|
||||
sink_caps: Cap::SHELL_ESCAPE.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
let diag = crate::commands::scan::Diag {
|
||||
id: "rust.cmdi.command".into(),
|
||||
path: "src/db.rs".into(),
|
||||
line: 6,
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(ev.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let hit = seed_cross_file_source(&diag, &ev, &gs, &cg, Lang::Rust)
|
||||
.expect("reverse walk must reach the cross-file source ancestor");
|
||||
assert_eq!(hit.summary.name, "read_input");
|
||||
assert_eq!(hit.summary.file_path, "src/input.rs");
|
||||
// read_input must not itself be a framework entry point — confirming
|
||||
// the stop was on the source condition.
|
||||
assert!(!is_entry_point(hit.summary, &cg));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_from_cross_file_seed_rewrites_entry_across_file_boundary() {
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::CalleeSite;
|
||||
use crate::summary::ssa_summary::SsaFuncSummary;
|
||||
use crate::symbol::FuncKey;
|
||||
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let run_query = build_summary(
|
||||
"run_query",
|
||||
"src/db.rs",
|
||||
"rust",
|
||||
Cap::SHELL_ESCAPE.bits(),
|
||||
vec![0],
|
||||
None,
|
||||
);
|
||||
gs.insert(
|
||||
FuncKey::new_function(Lang::Rust, "src/db.rs", "run_query", Some(1)),
|
||||
run_query,
|
||||
);
|
||||
|
||||
let mut read_input = build_summary("read_input", "src/input.rs", "rust", 0, vec![], None);
|
||||
read_input.callees = vec![CalleeSite::bare("run_query")];
|
||||
let read_input_key =
|
||||
FuncKey::new_function(Lang::Rust, "src/input.rs", "read_input", Some(1));
|
||||
gs.insert(read_input_key.clone(), read_input);
|
||||
gs.insert_ssa(
|
||||
read_input_key,
|
||||
SsaFuncSummary {
|
||||
source_caps: Cap::SHELL_ESCAPE,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
|
||||
let cg = crate::callgraph::build_call_graph(&gs, &[]);
|
||||
|
||||
let ev = Evidence {
|
||||
flow_steps: vec![sink_only_step_with_function("src/db.rs", "run_query")],
|
||||
sink_caps: Cap::SHELL_ESCAPE.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
let diag = crate::commands::scan::Diag {
|
||||
id: "rust.cmdi.command".into(),
|
||||
path: "src/db.rs".into(),
|
||||
line: 6,
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(ev.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let spec = derive_from_cross_file_seed(&diag, &ev, &gs, &cg)
|
||||
.expect("cross-file seed must derive a spec");
|
||||
assert_eq!(spec.entry_name, "read_input");
|
||||
assert_eq!(spec.entry_file, "src/input.rs");
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromCallgraphEntry);
|
||||
|
||||
// End-to-end: the scorer prefers the cross-file entry — deeper flow
|
||||
// (one reverse hop) plus cross-file resolution beats the sink-local
|
||||
// strategies that name `run_query` itself as the entry.
|
||||
let ctx = SpecDerivationCtx::new(false, Some(&gs), Some(&cg));
|
||||
let best = HarnessSpec::derive_best(&diag, &ctx).expect("derive_best must succeed");
|
||||
assert_eq!(best.entry_name, "read_input");
|
||||
assert_eq!(best.derivation, SpecDerivationStrategy::FromCallgraphEntry);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -60,6 +60,12 @@ pub enum TraceStage {
|
|||
/// trace consumer can audit how a mixed-cap batch fanned out across
|
||||
/// lanes without head-of-line blocking.
|
||||
WorkerLaneAssigned,
|
||||
/// Track K.0 (Phase 25) — the multi-strategy spec-derivation scoring
|
||||
/// picked a winning candidate. `detail` carries
|
||||
/// `winner=<strategy> runners_up=<strategy,…>` so a trace consumer can
|
||||
/// audit which strategies fired and which lost the score / tie-break,
|
||||
/// making engine derivation gaps visible without re-running.
|
||||
SpecScoringResult,
|
||||
}
|
||||
|
||||
impl TraceStage {
|
||||
|
|
@ -79,6 +85,7 @@ impl TraceStage {
|
|||
Self::OracleObserved => "oracle_observed",
|
||||
Self::Verdict => "verdict",
|
||||
Self::WorkerLaneAssigned => "worker_lane_assigned",
|
||||
Self::SpecScoringResult => "spec_scoring_result",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -246,5 +253,9 @@ mod tests {
|
|||
TraceStage::WorkerLaneAssigned.as_str(),
|
||||
"worker_lane_assigned"
|
||||
);
|
||||
assert_eq!(
|
||||
TraceStage::SpecScoringResult.as_str(),
|
||||
"spec_scoring_result"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -437,6 +437,31 @@ fn spec_derivation_failed_verdict(
|
|||
}
|
||||
}
|
||||
|
||||
/// Phase 25 (Track K.0): render the [`crate::dynamic::trace::TraceStage::SpecScoringResult`]
|
||||
/// detail string.
|
||||
///
|
||||
/// Deterministic and within the trace-detail budget: the winning strategy
|
||||
/// followed by the loser ranking in descending-score order, each tagged with
|
||||
/// its covered flow depth so a trace consumer sees *why* the winner won.
|
||||
fn format_spec_scoring_detail(
|
||||
winner: SpecDerivationStrategy,
|
||||
runners_up: &[(SpecDerivationStrategy, crate::dynamic::spec::SpecScore)],
|
||||
) -> String {
|
||||
use std::fmt::Write as _;
|
||||
let mut detail = format!("winner={winner} runners_up=");
|
||||
if runners_up.is_empty() {
|
||||
detail.push_str("none");
|
||||
} else {
|
||||
for (i, (strat, score)) in runners_up.iter().enumerate() {
|
||||
if i > 0 {
|
||||
detail.push(',');
|
||||
}
|
||||
let _ = write!(detail, "{strat}:{}", score.flow_depth);
|
||||
}
|
||||
}
|
||||
detail
|
||||
}
|
||||
|
||||
/// True when the finding has *some* derivable signal (rule namespace, sink
|
||||
/// caps, or evidence) so a spec-derivation failure should be surfaced as
|
||||
/// `Inconclusive` rather than `Unsupported`.
|
||||
|
|
@ -550,13 +575,23 @@ pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
|||
};
|
||||
}
|
||||
|
||||
let spec = match HarnessSpec::from_finding_full(
|
||||
diag,
|
||||
// Phase 25 (Track K.0): derive the spec through the multi-strategy
|
||||
// scoring path. `derive_best_ranked` runs every strategy, scores each
|
||||
// candidate, and returns the winner plus the loser ranking for
|
||||
// telemetry.
|
||||
let ctx = crate::dynamic::spec::SpecDerivationCtx::new(
|
||||
opts.verify_all_confidence,
|
||||
opts.summaries.as_deref(),
|
||||
opts.callgraph.as_deref(),
|
||||
) {
|
||||
Ok(s) => s,
|
||||
);
|
||||
let spec = match HarnessSpec::derive_best_ranked(diag, &ctx) {
|
||||
Ok((s, runners_up)) => {
|
||||
trace.record(
|
||||
crate::dynamic::trace::TraceStage::SpecScoringResult,
|
||||
Some(format_spec_scoring_detail(s.derivation, &runners_up)),
|
||||
);
|
||||
s
|
||||
}
|
||||
Err(reason) => {
|
||||
trace.record(
|
||||
crate::dynamic::trace::TraceStage::Verdict,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue