mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-12 19:55:14 +02:00
[pitboss] sweep after phase 01: 3 deferred items resolved
This commit is contained in:
parent
31d9ef725a
commit
946cb6a9bc
5 changed files with 455 additions and 57 deletions
|
|
@ -333,6 +333,46 @@ pub(crate) fn is_preview_tier_path(path: &Path) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
/// Load every persisted `FuncSummary` for `project` from `db_path` and fold
|
||||
/// them into a [`GlobalSummaries`]. Best-effort: any failure (pool init,
|
||||
/// summary load) logs and returns `None`, leaving dynamic verification on
|
||||
/// the no-summaries code path.
|
||||
///
|
||||
/// Called once at the top of the verify loop so per-finding spec derivation
|
||||
/// hits an in-memory index, not SQLite. The index is wrapped in `Arc` so
|
||||
/// `VerifyOptions` can be cloned cheaply if a caller threads it onto
|
||||
/// multiple findings concurrently in the future.
|
||||
#[cfg(feature = "dynamic")]
|
||||
fn load_verify_summaries(
|
||||
project: &str,
|
||||
db_path: &Path,
|
||||
scan_root: &Path,
|
||||
) -> Option<Arc<crate::summary::GlobalSummaries>> {
|
||||
let pool = match Indexer::init(db_path) {
|
||||
Ok(p) => p,
|
||||
Err(e) => {
|
||||
tracing::debug!("verify: indexer init failed; summary-driven spec derivation off: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let idx = match Indexer::from_pool(project, &pool) {
|
||||
Ok(i) => i,
|
||||
Err(e) => {
|
||||
tracing::debug!("verify: indexer open failed; summary-driven spec derivation off: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let all = match idx.load_all_summaries() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
tracing::debug!("verify: load_all_summaries failed; spec derivation off: {e}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let root_str = scan_root.to_string_lossy().into_owned();
|
||||
Some(Arc::new(crate::summary::merge_summaries(all, Some(&root_str))))
|
||||
}
|
||||
|
||||
/// Entry point called by the CLI.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn handle(
|
||||
|
|
@ -483,6 +523,12 @@ pub fn handle(
|
|||
// When index_mode is Off, the DB is never created, so no cache.
|
||||
if index_mode != IndexMode::Off && db_path.exists() {
|
||||
opts.db_path = Some(db_path.clone());
|
||||
// Preload cross-file summaries once so the spec-derivation
|
||||
// pipeline can resolve the enclosing function's `FuncSummary`
|
||||
// (strategy 3) and its static `entry_kind` (strategy 4)
|
||||
// without re-hitting SQLite per finding. Best-effort: a load
|
||||
// failure logs and falls through to the substring heuristics.
|
||||
opts.summaries = load_verify_summaries(&project_name, &db_path, &scan_path);
|
||||
}
|
||||
for diag in &mut diags {
|
||||
let result = crate::dynamic::verify::verify_finding(diag, &opts);
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@
|
|||
|
||||
use crate::commands::scan::Diag;
|
||||
use crate::dynamic::corpus::CORPUS_VERSION;
|
||||
use crate::evidence::{Confidence, FlowStep, FlowStepKind, UnsupportedReason};
|
||||
use crate::evidence::{Confidence, FlowStepKind, UnsupportedReason};
|
||||
use crate::labels::Cap;
|
||||
use crate::summary::FuncSummary;
|
||||
use crate::summary::{FuncSummary, GlobalSummaries};
|
||||
use crate::symbol::Lang;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::Path;
|
||||
|
|
@ -157,6 +157,32 @@ impl HarnessSpec {
|
|||
pub fn from_finding_opts(
|
||||
diag: &Diag,
|
||||
verify_all_confidence: bool,
|
||||
) -> Result<Self, UnsupportedReason> {
|
||||
Self::from_finding_with_summaries(diag, verify_all_confidence, None)
|
||||
}
|
||||
|
||||
/// Strategy-aware constructor that consults `summaries` when present.
|
||||
///
|
||||
/// When `summaries` is `Some`, strategy 3 ([`SpecDerivationStrategy::FromFuncSummaryWalk`])
|
||||
/// looks up the enclosing function's [`FuncSummary`] by `(lang, name, file)`
|
||||
/// — derived from `evidence.flow_steps[*].function` — and pulls a real
|
||||
/// `tainted_sink_params` slot rather than no-op'ing as it does in the
|
||||
/// `None` path. Strategy 4 additionally upgrades the
|
||||
/// `.http.` / `.cli.` substring heuristic by consulting
|
||||
/// [`FuncSummary::entry_kind`] on the resolved summary; an HTTP-shaped
|
||||
/// entry-kind variant becomes `EntryKind::HttpRoute` regardless of the
|
||||
/// rule id, and the legacy substring fallback runs only when no summary
|
||||
/// is found.
|
||||
///
|
||||
/// The `entry_name` populated by strategies 2 and 4 is also resolved
|
||||
/// from `evidence.flow_steps[*].function` (the authoritative enclosing
|
||||
/// function annotation set by the SSA taint engine) rather than from
|
||||
/// `evidence.sink.snippet` / `evidence.source.snippet`, which carry
|
||||
/// shortened callee text — never the enclosing-function name.
|
||||
pub fn from_finding_with_summaries(
|
||||
diag: &Diag,
|
||||
verify_all_confidence: bool,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
) -> Result<Self, UnsupportedReason> {
|
||||
if !verify_all_confidence {
|
||||
match diag.confidence {
|
||||
|
|
@ -171,13 +197,13 @@ impl HarnessSpec {
|
|||
if let Some(spec) = derive_from_flow_steps(diag, evidence) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_rule_namespace(diag, evidence) {
|
||||
if let Some(spec) = derive_from_rule_namespace_with(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_func_summary(diag, evidence, None) {
|
||||
if let Some(spec) = derive_from_func_summary_auto(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
if let Some(spec) = derive_from_callgraph_entry(diag, evidence) {
|
||||
if let Some(spec) = derive_from_callgraph_entry_with(diag, evidence, summaries) {
|
||||
return Ok(spec);
|
||||
}
|
||||
|
||||
|
|
@ -248,6 +274,20 @@ fn derive_from_flow_steps(diag: &Diag, evidence: &crate::evidence::Evidence) ->
|
|||
pub fn derive_from_rule_namespace(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
) -> Option<HarnessSpec> {
|
||||
derive_from_rule_namespace_with(diag, evidence, None)
|
||||
}
|
||||
|
||||
/// Like [`derive_from_rule_namespace`], but consults `summaries` to recover the
|
||||
/// enclosing function name when `evidence.flow_steps` does not carry one.
|
||||
///
|
||||
/// When neither flow_steps nor the summary index resolve a name, the entry
|
||||
/// name falls back to `"<unknown>"` (kept stable across runs so spec hashes
|
||||
/// remain reproducible).
|
||||
pub fn derive_from_rule_namespace_with(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
) -> Option<HarnessSpec> {
|
||||
let mut iter = diag.id.split('.');
|
||||
let lang_prefix = iter.next()?;
|
||||
|
|
@ -284,11 +324,7 @@ pub fn derive_from_rule_namespace(
|
|||
}
|
||||
}
|
||||
|
||||
let entry_function = evidence
|
||||
.sink
|
||||
.as_ref()
|
||||
.and_then(|s| s.snippet.clone())
|
||||
.filter(|s| !s.is_empty())
|
||||
let entry_function = resolve_enclosing_function(diag, evidence, summaries, lang)
|
||||
.unwrap_or_else(|| "<unknown>".to_owned());
|
||||
|
||||
Some(finalize_spec(
|
||||
|
|
@ -353,6 +389,26 @@ pub fn derive_from_func_summary(
|
|||
Some(spec)
|
||||
}
|
||||
|
||||
// ── Strategy 3 (auto): locate the enclosing FuncSummary in `summaries` ───────
|
||||
|
||||
/// Resolve the enclosing function's [`FuncSummary`] from `summaries` and
|
||||
/// delegate to [`derive_from_func_summary`].
|
||||
///
|
||||
/// Returns `None` when `summaries` is `None`, when the enclosing function
|
||||
/// name cannot be recovered from `evidence.flow_steps`, or when no summary
|
||||
/// matches `(lang, name, file)`.
|
||||
fn derive_from_func_summary_auto(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
) -> Option<HarnessSpec> {
|
||||
let summaries = summaries?;
|
||||
let lang = lang_from_path(&diag.path)?;
|
||||
let name = enclosing_function_from_flow_steps(evidence)?;
|
||||
let summary = find_summary_by_path(summaries, lang, &name, &diag.path)?;
|
||||
derive_from_func_summary(diag, evidence, Some(summary))
|
||||
}
|
||||
|
||||
// ── Strategy 4: callgraph entry-kind ─────────────────────────────────────────
|
||||
|
||||
/// Build a spec by treating the sink's enclosing function as an entry point
|
||||
|
|
@ -367,26 +423,46 @@ pub fn derive_from_callgraph_entry(
|
|||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
) -> Option<HarnessSpec> {
|
||||
let id = &diag.id;
|
||||
let entry_kind = if id.contains(".http.") {
|
||||
EntryKind::HttpRoute
|
||||
} else if id.contains(".cli.") {
|
||||
EntryKind::CliSubcommand
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
derive_from_callgraph_entry_with(diag, evidence, None)
|
||||
}
|
||||
|
||||
/// Like [`derive_from_callgraph_entry`], but prefers
|
||||
/// [`FuncSummary::entry_kind`] over the `.http.` / `.cli.` rule-id substring
|
||||
/// heuristic when a matching summary is available in `summaries`.
|
||||
///
|
||||
/// An HTTP-shaped [`crate::entry_points::EntryKind`] variant on the enclosing
|
||||
/// function's summary becomes [`EntryKind::HttpRoute`] regardless of the rule
|
||||
/// id. The substring fallback runs only when no summary entry-kind is found
|
||||
/// — e.g. for AST-only findings with no taint-engine flow_steps.
|
||||
pub fn derive_from_callgraph_entry_with(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
) -> Option<HarnessSpec> {
|
||||
let lang = lang_from_path(&diag.path)?;
|
||||
let expected_cap = Cap::from_bits_truncate(evidence.sink_caps);
|
||||
if expected_cap.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let entry_function = evidence
|
||||
.source
|
||||
.as_ref()
|
||||
.and_then(|s| s.snippet.clone())
|
||||
.filter(|s| !s.is_empty())
|
||||
// Step 1: try summary-based classification.
|
||||
let summary_kind = enclosing_function_from_flow_steps(evidence)
|
||||
.and_then(|name| find_summary_by_path(summaries?, lang, &name, &diag.path))
|
||||
.and_then(|s| s.entry_kind.as_ref().map(entry_kind_from_summary));
|
||||
|
||||
// Step 2: fall back to rule-id substring heuristic (legacy).
|
||||
let id = &diag.id;
|
||||
let id_kind = if id.contains(".http.") {
|
||||
Some(EntryKind::HttpRoute)
|
||||
} else if id.contains(".cli.") {
|
||||
Some(EntryKind::CliSubcommand)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let entry_kind = summary_kind.or(id_kind)?;
|
||||
|
||||
let entry_function = resolve_enclosing_function(diag, evidence, summaries, lang)
|
||||
.unwrap_or_else(|| "<unknown>".to_owned());
|
||||
|
||||
let mut spec = finalize_spec(
|
||||
|
|
@ -404,6 +480,16 @@ pub fn derive_from_callgraph_entry(
|
|||
Some(spec)
|
||||
}
|
||||
|
||||
/// Map a static-analysis [`crate::entry_points::EntryKind`] (route shape) onto
|
||||
/// the dynamic-side [`EntryKind`] taxonomy. Every current variant of the
|
||||
/// static enum describes an HTTP route handler — no CLI / library-API
|
||||
/// variants exist statically — so they all collapse to
|
||||
/// [`EntryKind::HttpRoute`]. When the static taxonomy grows non-HTTP variants
|
||||
/// (e.g. clap subcommand detection), extend this match to preserve them.
|
||||
fn entry_kind_from_summary(_kind: &crate::entry_points::EntryKind) -> EntryKind {
|
||||
EntryKind::HttpRoute
|
||||
}
|
||||
|
||||
// ── Helpers ──────────────────────────────────────────────────────────────────
|
||||
|
||||
fn lang_from_path(path: &str) -> Option<Lang> {
|
||||
|
|
@ -411,6 +497,76 @@ fn lang_from_path(path: &str) -> Option<Lang> {
|
|||
Lang::from_extension(ext)
|
||||
}
|
||||
|
||||
/// Return the first non-empty `function` annotation found on any flow step.
|
||||
///
|
||||
/// Strategy 1 ([`derive_from_flow_steps`]) consumes the `Source`-step
|
||||
/// annotation directly; strategies 2 and 4 fall back to *any* step with a
|
||||
/// `function` set because the SSA engine annotates sink and assignment steps
|
||||
/// as well. The annotation is authoritative — it carries the enclosing
|
||||
/// function as resolved against the CFG — so it is preferred over the call
|
||||
/// snippet, which carries shortened callee text.
|
||||
fn enclosing_function_from_flow_steps(evidence: &crate::evidence::Evidence) -> Option<String> {
|
||||
evidence
|
||||
.flow_steps
|
||||
.iter()
|
||||
.find_map(|s| s.function.clone().filter(|f| !f.is_empty()))
|
||||
}
|
||||
|
||||
/// Resolve the enclosing function name for the diag using, in order:
|
||||
/// 1. any `flow_steps[*].function` annotation (always authoritative),
|
||||
/// 2. a [`GlobalSummaries`] lookup when `summaries` is `Some` and exactly one
|
||||
/// function in the diag's file shares the rule-language tag (last-resort
|
||||
/// disambiguation when flow_steps is empty),
|
||||
/// 3. `None` (callers default to `"<unknown>"`).
|
||||
fn resolve_enclosing_function(
|
||||
diag: &Diag,
|
||||
evidence: &crate::evidence::Evidence,
|
||||
summaries: Option<&GlobalSummaries>,
|
||||
lang: Lang,
|
||||
) -> Option<String> {
|
||||
if let Some(name) = enclosing_function_from_flow_steps(evidence) {
|
||||
return Some(name);
|
||||
}
|
||||
let summaries = summaries?;
|
||||
let mut hits = summaries
|
||||
.iter()
|
||||
.filter(|(k, _)| k.lang == lang)
|
||||
.filter(|(_, s)| paths_match(&s.file_path, &diag.path));
|
||||
let first = hits.next()?;
|
||||
if hits.next().is_some() {
|
||||
// Ambiguous: multiple functions in this file; refuse to guess.
|
||||
return None;
|
||||
}
|
||||
Some(first.1.name.clone())
|
||||
}
|
||||
|
||||
/// Lookup a `FuncSummary` by `(lang, name)` and filter to one whose
|
||||
/// `file_path` matches `diag_path`. Returns `None` on no match.
|
||||
fn find_summary_by_path<'a>(
|
||||
summaries: &'a GlobalSummaries,
|
||||
lang: Lang,
|
||||
name: &str,
|
||||
diag_path: &str,
|
||||
) -> Option<&'a FuncSummary> {
|
||||
summaries
|
||||
.lookup_same_lang(lang, name)
|
||||
.into_iter()
|
||||
.find(|(_, s)| paths_match(&s.file_path, diag_path))
|
||||
.map(|(_, s)| s)
|
||||
}
|
||||
|
||||
/// Loose path comparison that tolerates absolute / project-relative drift.
|
||||
///
|
||||
/// `FuncSummary::file_path` may be stored relative to the project root while
|
||||
/// `Diag::path` may be canonicalised. A suffix match is permissive enough to
|
||||
/// link them without dragging the canonicaliser into the verify hot path.
|
||||
fn paths_match(summary_path: &str, diag_path: &str) -> bool {
|
||||
if summary_path == diag_path {
|
||||
return true;
|
||||
}
|
||||
summary_path.ends_with(diag_path) || diag_path.ends_with(summary_path)
|
||||
}
|
||||
|
||||
/// Map the first segment of a Nyx rule id (`py`, `js`, `ts`, `java`, …) to a
|
||||
/// [`Lang`]. Returns `None` for non-language prefixes (`taint-`, `cfg-`,
|
||||
/// `state-`).
|
||||
|
|
@ -485,39 +641,6 @@ fn finalize_spec(
|
|||
spec
|
||||
}
|
||||
|
||||
/// Walk a synthetic single-step flow to satisfy callers that expect a `FlowStep`
|
||||
/// vector. Used by strategies 2–4 when they need to materialise a flow for
|
||||
/// downstream consumers.
|
||||
#[allow(dead_code)]
|
||||
pub(crate) fn synthetic_flow(diag: &Diag, function: &str) -> Vec<FlowStep> {
|
||||
vec![
|
||||
FlowStep {
|
||||
step: 1,
|
||||
kind: FlowStepKind::Source,
|
||||
file: diag.path.clone(),
|
||||
line: diag.line as u32,
|
||||
col: diag.col as u32,
|
||||
snippet: None,
|
||||
variable: None,
|
||||
callee: None,
|
||||
function: Some(function.to_owned()),
|
||||
is_cross_file: false,
|
||||
},
|
||||
FlowStep {
|
||||
step: 2,
|
||||
kind: FlowStepKind::Sink,
|
||||
file: diag.path.clone(),
|
||||
line: diag.line as u32,
|
||||
col: diag.col as u32,
|
||||
snippet: None,
|
||||
variable: None,
|
||||
callee: None,
|
||||
function: Some(function.to_owned()),
|
||||
is_cross_file: false,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/// Walk `flow_steps` and return the entry point: the enclosing function of
|
||||
/// the first `Source` step that has a function annotation. This is the
|
||||
/// outermost callable that receives the tainted input.
|
||||
|
|
@ -919,6 +1042,22 @@ mod tests {
|
|||
assert_eq!(spec.expected_cap, Cap::DESERIALIZE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule_namespace_strategy_pins_rs_auth_mapping() {
|
||||
// Regression: `rs.auth.*` must map to `Lang::Rust` + `Cap::UNAUTHORIZED_ID`.
|
||||
// The plan calls out this exemplar but had no test coverage.
|
||||
let diag = diag_with_rule_id(
|
||||
"rs.auth.missing_ownership_check.taint",
|
||||
"src/handler.rs",
|
||||
0,
|
||||
);
|
||||
let spec = HarnessSpec::from_finding(&diag).unwrap();
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromRuleNamespace);
|
||||
assert_eq!(spec.lang, Lang::Rust);
|
||||
assert_eq!(spec.expected_cap, Cap::UNAUTHORIZED_ID);
|
||||
assert_eq!(spec.toolchain_id, "rust-stable");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule_namespace_strategy_rejects_path_lang_mismatch() {
|
||||
use crate::labels::Cap;
|
||||
|
|
@ -1039,4 +1178,162 @@ mod tests {
|
|||
let spec = HarnessSpec::from_finding(&diag).unwrap();
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromFlowSteps);
|
||||
}
|
||||
|
||||
// ── Phase 01 follow-ups: GlobalSummaries threading ───────────────────────
|
||||
|
||||
fn sink_only_step_with_function(file: &str, function: &str) -> crate::evidence::FlowStep {
|
||||
crate::evidence::FlowStep {
|
||||
step: 1,
|
||||
kind: FlowStepKind::Sink,
|
||||
file: file.into(),
|
||||
line: 6,
|
||||
col: 0,
|
||||
snippet: Some("os.system".into()),
|
||||
variable: None,
|
||||
callee: Some("os.system".into()),
|
||||
function: Some(function.into()),
|
||||
is_cross_file: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn build_summary(name: &str, file: &str, lang: &str, sink_caps: u32, tainted_params: Vec<usize>, entry_kind: Option<crate::entry_points::EntryKind>) -> FuncSummary {
|
||||
FuncSummary {
|
||||
name: name.into(),
|
||||
file_path: file.into(),
|
||||
lang: lang.into(),
|
||||
param_count: 1,
|
||||
param_names: vec!["req".into()],
|
||||
source_caps: 0,
|
||||
sanitizer_caps: 0,
|
||||
sink_caps,
|
||||
propagating_params: vec![],
|
||||
propagates_taint: false,
|
||||
tainted_sink_params: tainted_params,
|
||||
param_to_sink: vec![],
|
||||
callees: vec![],
|
||||
container: String::new(),
|
||||
disambig: None,
|
||||
kind: Default::default(),
|
||||
module_path: None,
|
||||
rust_use_map: None,
|
||||
rust_wildcards: None,
|
||||
hierarchy_edges: vec![],
|
||||
entry_kind,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn entry_name_uses_flow_steps_function_not_snippet() {
|
||||
// Strategy 2 was previously populating `entry_name` from the sink's
|
||||
// *snippet* (callee text like `"os.system"`). The fix prefers the
|
||||
// `function` annotation on any flow step, which carries the
|
||||
// enclosing function name.
|
||||
use crate::labels::Cap;
|
||||
let ev = Evidence {
|
||||
flow_steps: vec![sink_only_step_with_function(
|
||||
"app/handler.py",
|
||||
"do_request",
|
||||
)],
|
||||
sink_caps: Cap::SHELL_ESCAPE.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
let diag = crate::commands::scan::Diag {
|
||||
id: "py.cmdi.os_system".into(),
|
||||
path: "app/handler.py".into(),
|
||||
line: 6,
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(ev.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let spec = derive_from_rule_namespace(&diag, &ev).expect("must derive");
|
||||
assert_eq!(spec.entry_name, "do_request");
|
||||
// The callee text never leaks into the entry name.
|
||||
assert!(!spec.entry_name.contains("os.system"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn func_summary_auto_resolves_via_global_summaries() {
|
||||
// Strategy 3 with `summaries = Some(_)`: the enclosing function
|
||||
// name comes from the flow_steps annotation, the summary is found
|
||||
// by `(lang, name)` lookup filtered by file_path, and the spec
|
||||
// picks `tainted_sink_params[0]` as the payload slot.
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::FuncKey;
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let summary = build_summary(
|
||||
"do_request",
|
||||
"app/handler.py",
|
||||
"python",
|
||||
Cap::SHELL_ESCAPE.bits(),
|
||||
vec![0],
|
||||
None,
|
||||
);
|
||||
let key = FuncKey::new_function(Lang::Python, "app/handler.py", "do_request", Some(1));
|
||||
gs.insert(key, summary);
|
||||
|
||||
let ev = Evidence {
|
||||
flow_steps: vec![sink_only_step_with_function(
|
||||
"app/handler.py",
|
||||
"do_request",
|
||||
)],
|
||||
sink_caps: Cap::SHELL_ESCAPE.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
let diag = crate::commands::scan::Diag {
|
||||
id: "taint-unsanitised-flow".into(),
|
||||
path: "app/handler.py".into(),
|
||||
line: 6,
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(ev),
|
||||
..Default::default()
|
||||
};
|
||||
let spec = HarnessSpec::from_finding_with_summaries(&diag, false, Some(&gs))
|
||||
.expect("summary-driven derivation must succeed");
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromFuncSummaryWalk);
|
||||
assert!(matches!(spec.payload_slot, PayloadSlot::Param(0)));
|
||||
assert_eq!(spec.entry_name, "do_request");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn callgraph_entry_uses_summary_entry_kind_over_rule_id() {
|
||||
// Strategy 4 with summaries: a non-http/non-cli rule id still wins
|
||||
// HttpRoute classification when the enclosing function's
|
||||
// `entry_kind` is set on its summary.
|
||||
use crate::entry_points::{EntryKind as StaticEntryKind, HttpMethod};
|
||||
use crate::labels::Cap;
|
||||
use crate::symbol::FuncKey;
|
||||
let mut gs = GlobalSummaries::new();
|
||||
let summary = build_summary(
|
||||
"index",
|
||||
"app/views.py",
|
||||
"python",
|
||||
Cap::SSRF.bits(),
|
||||
vec![],
|
||||
Some(StaticEntryKind::FlaskRoute { method: HttpMethod::GET }),
|
||||
);
|
||||
let key = FuncKey::new_function(Lang::Python, "app/views.py", "index", Some(1));
|
||||
gs.insert(key, summary);
|
||||
|
||||
let ev = Evidence {
|
||||
flow_steps: vec![sink_only_step_with_function("app/views.py", "index")],
|
||||
sink_caps: Cap::SSRF.bits(),
|
||||
..Default::default()
|
||||
};
|
||||
let diag = crate::commands::scan::Diag {
|
||||
// Note: the rule id has no `.http.` or `.cli.` segment — the
|
||||
// legacy substring heuristic would bail. Only the summary
|
||||
// entry_kind unlocks HttpRoute classification.
|
||||
id: "taint-unsanitised-flow".into(),
|
||||
path: "app/views.py".into(),
|
||||
line: 6,
|
||||
confidence: Some(Confidence::High),
|
||||
evidence: Some(ev.clone()),
|
||||
..Default::default()
|
||||
};
|
||||
let spec = derive_from_callgraph_entry_with(&diag, &ev, Some(&gs))
|
||||
.expect("entry-kind-driven derivation must succeed");
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromCallgraphEntry);
|
||||
assert!(matches!(spec.entry_kind, EntryKind::HttpRoute));
|
||||
assert_eq!(spec.entry_name, "index");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,8 +12,10 @@ use crate::dynamic::spec::{HarnessSpec, SPEC_FORMAT_VERSION};
|
|||
use crate::dynamic::telemetry::{self, TelemetryEvent};
|
||||
use crate::dynamic::toolchain;
|
||||
use crate::evidence::{InconclusiveReason, SpecDerivationStrategy, UnsupportedReason};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::utils::config::Config;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
|
|
@ -27,6 +29,18 @@ pub struct VerifyOptions {
|
|||
/// When `true`, skip the `Confidence >= Medium` gate and attempt
|
||||
/// verification on all findings. Corresponds to `--verify-all-confidence`.
|
||||
pub verify_all_confidence: bool,
|
||||
/// Cross-file function summaries shared by every finding in a scan.
|
||||
///
|
||||
/// Threaded into [`HarnessSpec::from_finding_with_summaries`] so the
|
||||
/// summary-walk strategy and the entry-kind-aware callgraph strategy
|
||||
/// can resolve the diag's enclosing function against the same
|
||||
/// [`GlobalSummaries`] index the taint engine used. Held by `Arc` so the
|
||||
/// caller (e.g. the scan command) can build the index once and reuse it
|
||||
/// across the per-finding loop without cloning.
|
||||
///
|
||||
/// `None` disables the summary-driven derivation paths; strategy 3 is a
|
||||
/// no-op and strategy 4 falls back to the rule-id substring heuristic.
|
||||
pub summaries: Option<Arc<GlobalSummaries>>,
|
||||
}
|
||||
|
||||
impl VerifyOptions {
|
||||
|
|
@ -46,6 +60,7 @@ impl VerifyOptions {
|
|||
project_root: None,
|
||||
db_path: None,
|
||||
verify_all_confidence: config.scanner.verify_all_confidence,
|
||||
summaries: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -243,7 +258,11 @@ fn derivation_failure_hint(diag: &Diag) -> String {
|
|||
pub fn verify_finding(diag: &Diag, opts: &VerifyOptions) -> VerifyResult {
|
||||
let finding_id = format!("{:016x}", diag.stable_hash);
|
||||
|
||||
let spec = match HarnessSpec::from_finding_opts(diag, opts.verify_all_confidence) {
|
||||
let spec = match HarnessSpec::from_finding_with_summaries(
|
||||
diag,
|
||||
opts.verify_all_confidence,
|
||||
opts.summaries.as_deref(),
|
||||
) {
|
||||
Ok(s) => s,
|
||||
Err(reason) => {
|
||||
return spec_derivation_failed_verdict(finding_id, diag, reason);
|
||||
|
|
|
|||
|
|
@ -105,6 +105,7 @@ mod parity_tests {
|
|||
project_root: None,
|
||||
db_path: None,
|
||||
verify_all_confidence: false,
|
||||
summaries: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -118,6 +119,7 @@ mod parity_tests {
|
|||
project_root: None,
|
||||
db_path: None,
|
||||
verify_all_confidence: false,
|
||||
summaries: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -144,6 +144,40 @@ mod spec_strategies {
|
|||
assert_eq!(spec.expected_cap, Cap::DESERIALIZE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_rule_namespace_pins_rs_auth_to_unauthorized_id() {
|
||||
// Regression: `rs.auth.missing_ownership_check.taint` must derive a
|
||||
// Rust + UNAUTHORIZED_ID spec via the rule-namespace strategy. The
|
||||
// phase 01 deliverables called out `rs.auth.*` as an exemplar but
|
||||
// shipped without a regression test pinning the `auth → UNAUTHORIZED_ID`
|
||||
// mapping.
|
||||
let mut diag = make_diag(
|
||||
"rs.auth.missing_ownership_check.taint",
|
||||
"src/handler.rs",
|
||||
14,
|
||||
);
|
||||
let mut ev = Evidence::default();
|
||||
ev.sink_caps = Cap::UNAUTHORIZED_ID.bits();
|
||||
diag.evidence = Some(ev.clone());
|
||||
|
||||
let spec = derive_from_rule_namespace(&diag, &ev)
|
||||
.expect("rs.auth rule namespace must derive a spec");
|
||||
assert_eq!(spec.derivation, SpecDerivationStrategy::FromRuleNamespace);
|
||||
assert_eq!(spec.lang, nyx_scanner::symbol::Lang::Rust);
|
||||
assert_eq!(spec.expected_cap, Cap::UNAUTHORIZED_ID);
|
||||
assert_eq!(spec.sink_line, 14);
|
||||
assert_eq!(spec.toolchain_id, "rust-stable");
|
||||
|
||||
// End-to-end through `HarnessSpec::from_finding` (no flow_steps).
|
||||
let spec_end_to_end =
|
||||
HarnessSpec::from_finding(&diag).expect("end-to-end derivation must succeed");
|
||||
assert_eq!(
|
||||
spec_end_to_end.derivation,
|
||||
SpecDerivationStrategy::FromRuleNamespace
|
||||
);
|
||||
assert_eq!(spec_end_to_end.expected_cap, Cap::UNAUTHORIZED_ID);
|
||||
}
|
||||
|
||||
// ── Strategy 3: FromFuncSummaryWalk ──────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue