feat: Add SSA summaries support for validated parameter propagation and enhance loop body error handling

This commit is contained in:
elipeter 2026-05-02 21:02:47 -04:00
parent 92aaa36ed6
commit 48bc43e1a6
11 changed files with 438 additions and 69 deletions

View file

@ -136,6 +136,7 @@ jobs:
-max_total_time=${{ steps.budget.outputs.seconds }} \
-max_len=65536 \
-timeout=60 \
-rss_limit_mb=8192 \
-dict=fuzz/dict/all.dict
- name: Upload crash artifacts

2
fuzz/Cargo.lock generated
View file

@ -1023,7 +1023,7 @@ dependencies = [
[[package]]
name = "nyx-scanner"
version = "0.5.0"
version = "0.6.0"
dependencies = [
"axum",
"bitflags",

View file

@ -1477,6 +1477,7 @@ impl<'a> ParsedFile<'a> {
source_bytes: self.source.bytes,
func_summaries: self.local_summaries(),
global_summaries,
ssa_summaries: Some(ssa_summaries),
taint_findings: &body_taint,
analysis_rules: self.rules_ref(),
taint_active,

View file

@ -663,7 +663,6 @@ pub(crate) fn collect_idents_with_paths(
if let Some(path) = member_expr_text(n, code) {
paths.push(path);
}
// Also collect individual idents as fallback
collect_idents(n, code, idents);
}
"identifier"

View file

@ -3481,15 +3481,27 @@ pub(super) fn build_sub<'a>(
let mut loop_breaks = Vec::new();
let mut loop_continues = Vec::new();
// Body = first (and usually only) block child.
let body = ast
.child_by_field_name("body")
.or_else(|| {
let mut c = ast.walk();
ast.children(&mut c)
.find(|n| lookup(lang, n.kind()) == Kind::Block)
})
.expect("loop without body");
// Body = first (and usually only) block child. Tree-sitter error
// recovery (or a fuzz mutation that truncates a `for`/`while`
// header before the block) can leave a loop node with no body
// child at all. Match the InfiniteLoop arm above and degrade
// gracefully instead of panicking — header alone is a valid CFG
// skeleton for the malformed input.
let body = match ast.child_by_field_name("body").or_else(|| {
let mut c = ast.walk();
ast.children(&mut c)
.find(|n| lookup(lang, n.kind()) == Kind::Block)
}) {
Some(b) => b,
None => {
warn!(
"loop without body (error recovery?): kind={} byte={}",
ast.kind(),
ast.start_byte()
);
return vec![header];
}
};
if has_short_circuit {
let cond_ast = cond_subtree.unwrap();
@ -3625,9 +3637,14 @@ pub(super) fn build_sub<'a>(
// swallowed and the gated sinks they contain become invisible
// to classification. Mirrors the same recursion done by the
// CallWrapper / CallFn arms. Motivated by CVE-2025-64430.
//
// Disconnect the placeholder Seq edge from the call after
// build_sub returns; the inner body is independently
// registered, so the outer call should flow straight to its
// real successor (the Return below) without a phantom branch.
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
build_sub(
let placeholders = build_sub(
func_node,
&[call_idx],
g,
@ -3645,6 +3662,13 @@ pub(super) fn build_sub<'a>(
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> =
g.edges_connecting(call_idx, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
Vec::new()
@ -3703,10 +3727,11 @@ pub(super) fn build_sub<'a>(
// Same nested-function recursion as the Return arm: a
// `throw new Promise(() => { ... })` would otherwise lose
// any inner gated sinks.
// any inner gated sinks. Disconnect the placeholder edge
// (see Return arm comment).
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
build_sub(
let placeholders = build_sub(
func_node,
&[call_idx],
g,
@ -3724,6 +3749,13 @@ pub(super) fn build_sub<'a>(
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> =
g.edges_connecting(call_idx, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
Vec::new()
@ -4370,10 +4402,18 @@ pub(super) fn build_sub<'a>(
// Recurse into any function expressions nested in arguments
// (e.g. `app.get('/path', function(req, res) { ... })`)
// so that they get proper function summaries.
// so that they get proper function summaries. The build_sub
// invocation registers the inner body but also adds a
// Seq-edge `node → placeholder` from the inner Kind::Function
// arm. That phantom successor turns the outer call into a
// 2-successor branch with an empty Return(None) leg, which
// breaks `validated_params_to_return` summary extraction
// (CVE-2026-25544). Disconnect the spurious edge after
// build_sub returns; the inner body is still reachable to
// closure-capture passes via `parent_body_id` metadata.
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
build_sub(
let placeholders = build_sub(
func_node,
&[node],
g,
@ -4391,6 +4431,12 @@ pub(super) fn build_sub<'a>(
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> = g.edges_connecting(node, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
// Rust match-guard synthesis: `let <name> = match <scrutinee> { <arm> if <guard> => .., ... }`
@ -4462,9 +4508,12 @@ pub(super) fn build_sub<'a>(
// Recurse into any function expressions nested in arguments.
// Each nested function hits Kind::Function and becomes a separate body.
// See sibling comment in CallWrapper arm: disconnect the
// declaration-marker placeholder Seq edge after build_sub
// returns, so the outer body's CFG isn't artificially branched.
let nested = collect_nested_function_nodes(ast, lang);
for func_node in nested {
build_sub(
let placeholders = build_sub(
func_node,
&[n],
g,
@ -4482,6 +4531,12 @@ pub(super) fn build_sub<'a>(
next_body_id,
current_body_id,
);
for ph in placeholders {
let to_remove: Vec<_> = g.edges_connecting(n, ph).map(|e| e.id()).collect();
for eid in to_remove {
g.remove_edge(eid);
}
}
}
vec![n]

View file

@ -463,6 +463,171 @@ fn sink_args_typed_safe(ctx: &AnalysisContext, sink: NodeIndex, sink_caps: Cap)
type_facts_suppress(&values, sink_caps, type_facts)
}
/// Walk the sink's Call SSA arguments and check whether every real argument
/// resolves through a defining `SsaOp::Call` whose callee carries an SSA
/// summary with `validated_params_to_return` covering every propagating
/// parameter slot the caller's argument flows into. When that holds, the
/// helper validates each argument on every taint-carrying return path, and
/// the call result is structurally validated even though no syntactic guard
/// dominates the sink in the caller's body.
///
/// Conservative: returns `false` whenever any required fact is missing,
/// any operand is non-Call-defined and not a constant/parameter, or any
/// callee summary lacks the validated transform. Real arguments only —
/// the same `is_real_arg` filter as `sink_args_typed_safe` skips
/// callee-fragment pseudo-uses and SSA constants.
fn sink_args_summary_validated_safe(ctx: &AnalysisContext, sink: NodeIndex) -> bool {
// Per-file SSA summary map carries the augment + rerun-pass merges
// that GlobalSummaries may not yet reflect on single-file scans;
// fall back to GlobalSummaries when the per-file map isn't threaded
// through (legacy callers).
let local_map = ctx.ssa_summaries;
let global_map = ctx.global_summaries.map(|g| g.snapshot_ssa());
if local_map.is_none() && global_map.is_none() {
return false;
}
let sink_info = &ctx.cfg[sink];
use crate::cfg::StmtKind;
// Collect per-arg use names. Prefer `call.arg_uses` (positional, tighter
// scope), fall back to `taint.uses` minus callee-fragment names when
// `arg_uses` wasn't extracted (e.g. `await db.execute(sql)` where the
// CFG saw the await wrapper rather than the underlying call_expression).
let callee_desc = sink_info.call.callee.as_deref().unwrap_or("");
let callee_parts: Vec<&str> = callee_desc
.split(['.', ':'])
.map(|p| p.split('(').next().unwrap_or(p))
.collect();
let outer_parts: Vec<&str> = sink_info
.call
.outer_callee
.as_deref()
.map(|oc| {
oc.split(['.', ':'])
.map(|p| p.split('(').next().unwrap_or(p))
.collect()
})
.unwrap_or_default();
let mut arg_use_names: Vec<String> = Vec::new();
if !sink_info.call.arg_uses.is_empty() {
for group in &sink_info.call.arg_uses {
for u in group {
if !arg_use_names.iter().any(|n| n == u) {
arg_use_names.push(u.clone());
}
}
}
}
if arg_use_names.is_empty() {
for u in &sink_info.taint.uses {
if is_callee_fragment(u, callee_desc, &callee_parts, &outer_parts) {
continue;
}
if !arg_use_names.iter().any(|n| n == u) {
arg_use_names.push(u.clone());
}
}
}
if arg_use_names.is_empty() {
return false;
}
// Match callee text against any SSA summary key registered in
// GlobalSummaries by leaf name. Conservative: require an exact
// single-match so ambiguous overloads fall through to the default
// structural-finding path.
let lookup_validated = |callee_text: &str| -> Option<bool> {
let leaf = callee_leaf_name(callee_text);
let mut matches: Vec<&crate::summary::ssa_summary::SsaFuncSummary> = Vec::new();
if let Some(map) = local_map {
for (key, sum) in map {
if key.name == leaf || key.name == callee_text {
matches.push(sum);
}
}
}
if matches.is_empty() {
if let Some(map) = global_map {
for (key, sum) in map {
if key.name == leaf || key.name == callee_text {
matches.push(sum);
}
}
}
}
if matches.len() != 1 {
return None;
}
let sum = matches[0];
if sum.validated_params_to_return.is_empty() {
return Some(false);
}
// Every propagating parameter must be in validated_params_to_return.
// When the callee doesn't propagate taint at all, the call result
// cannot carry caller-side taint, so a non-empty validation set is
// sufficient.
let propagates = sum
.param_to_return
.iter()
.map(|(idx, _)| *idx)
.collect::<Vec<usize>>();
if propagates.is_empty() {
return Some(true);
}
let all_validated = propagates
.iter()
.all(|p| sum.validated_params_to_return.contains(p));
Some(all_validated)
};
// Walk CFG predecessors of `sink` looking for nodes that define an
// arg-use name via a Call to an in-file helper. Conservative
// traversal: stops at the body entry, follows Seq/Branch edges,
// bails out on join/branch back-edges (loops) to keep the analysis
// bounded.
let mut to_validate: Vec<String> = arg_use_names.clone();
let mut visited: HashSet<NodeIndex> = HashSet::new();
let mut frontier: Vec<NodeIndex> = ctx
.cfg
.neighbors_directed(sink, petgraph::Direction::Incoming)
.collect();
let mut iter_budget = 256usize;
while let Some(n) = frontier.pop() {
if iter_budget == 0 {
return false;
}
iter_budget -= 1;
if !visited.insert(n) {
continue;
}
let info = &ctx.cfg[n];
if info.kind == StmtKind::Call {
if let Some(def_name) = info.taint.defines.as_deref() {
if let Some(pos) = to_validate.iter().position(|u| u == def_name) {
let callee = info.call.callee.as_deref().unwrap_or("");
if !matches!(lookup_validated(callee), Some(true)) {
return false;
}
to_validate.remove(pos);
if to_validate.is_empty() {
return true;
}
}
}
}
for pred in ctx.cfg.neighbors_directed(n, petgraph::Direction::Incoming) {
frontier.push(pred);
}
}
// Some arg-use names didn't map to an in-body Call definition (e.g.
// they bind to a function parameter, an import, or a literal).
// Only suppress when EVERY tainted-shaped arg has been validated by
// an in-file helper summary; otherwise fall through.
to_validate.is_empty()
}
/// Thin wrapper around [`crate::ssa::type_facts::is_type_safe_for_sink`] kept
/// local so the unit tests here can exercise the exact predicate used at the
/// `cfg-unguarded-sink` emission site.
@ -1053,6 +1218,20 @@ impl CfgAnalysis for UnguardedSink {
continue;
}
// Summary-validated suppression: when the SSA value flowing into
// the sink is the return of a callee whose summary records a
// `validated_params_to_return` covering every propagating
// parameter, the helper validates its inputs on every taint-
// carrying return path (regex allowlist, type check, validation
// call, …). The SSA taint engine already cleared this flow via
// `propagate_validated_params_to_return`, so the structural
// finding is noise. Closes the patched-counterpart noise for
// CVE-2026-25544 (Payload `sanitizeValue` → `createJSONQuery`
// → `db.execute`).
if !has_taint && sink_args_summary_validated_safe(ctx, *sink) {
continue;
}
// Parameterized SQL queries: arg 0 is a string literal with
// placeholders ($1, ?, %s, :name) and a params argument exists.
// These are safe by construction, the driver handles escaping.

View file

@ -147,6 +147,22 @@ pub struct AnalysisContext<'a> {
pub func_summaries: &'a FuncSummaries,
#[allow(dead_code)]
pub global_summaries: Option<&'a GlobalSummaries>,
/// Per-file SSA summaries map produced by
/// `lower_all_functions_from_bodies` (after both the augment pass
/// and the rerun-with-augmented-summaries pass). Carries the
/// final validated_params_to_return / param_to_sink merges that
/// the snapshot in `global_summaries` may not yet reflect on
/// single-file scans. Used by the unguarded-sink analysis to
/// suppress structural findings whose taint flow has been proven
/// validated through helper summaries (CVE-2026-25544 patched
/// counterpart).
#[allow(dead_code)]
pub ssa_summaries: Option<
&'a std::collections::HashMap<
crate::symbol::FuncKey,
crate::summary::ssa_summary::SsaFuncSummary,
>,
>,
pub taint_findings: &'a [taint::Finding],
pub analysis_rules: Option<&'a LangAnalysisRules>,
/// Whether full taint analysis was active for this file (global summaries

View file

@ -27,6 +27,7 @@ fn parse_and_analyse<A: CfgAnalysis>(
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
ssa_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: true,
@ -56,6 +57,7 @@ fn parse_and_run_all(src: &[u8], lang_str: &str, ts_lang: Language) -> Vec<CfgFi
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
ssa_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: true,
@ -90,6 +92,7 @@ fn parse_and_run_all_with_taint(
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
ssa_summaries: None,
taint_findings,
analysis_rules: None,
taint_active: true,
@ -208,6 +211,7 @@ fn parse_and_analyse_with_ssa<A: CfgAnalysis>(
source_bytes: src,
func_summaries: &file_cfg.summaries,
global_summaries: None,
ssa_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: true,
@ -1223,6 +1227,7 @@ fn config_sanitizer_suppresses_unguarded_sink() {
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
ssa_summaries: None,
taint_findings: &[],
analysis_rules: Some(&rules),
taint_active: true,
@ -1702,6 +1707,7 @@ fn cfg_only_no_taint_produces_low_severity() {
source_bytes: src,
func_summaries: summaries,
global_summaries: None,
ssa_summaries: None,
taint_findings: &[],
analysis_rules: None,
taint_active: false, // cfg-only mode

View file

@ -3976,6 +3976,73 @@ pub(super) fn transfer_inst(
return_bits = use_caps;
return_origins = use_origins;
}
// Validated-flow propagation through unresolved external
// calls. When every tainted argument's symbol is already
// in `validated_must` at the call site, the call result
// is derived solely from validated values, so its symbol
// inherits the same `validated_must` / `validated_may`
// status. Without this, helper-validated taint that
// crosses an external boundary (`db.execute(sanitisedSql)`,
// `fetch(safeUrl)`, …) re-emerges as unvalidated taint at
// the next sink (`res.json(result)`), reproducing the
// residual finding in the patched fixture for
// CVE-2026-25544 even though the SQL injection itself is
// suppressed.
if !return_bits.is_empty() {
let mut all_args_validated = true;
let mut any_tainted_arg = false;
let check_value = |v: SsaValue, state: &SsaTaintState| -> Option<bool> {
// Returns Some(true) if validated_must, Some(false)
// if tainted-but-not-validated, None if not tainted.
let taint = state.get(v)?;
if taint.caps.is_empty() {
return None;
}
let name = ssa
.value_defs
.get(v.0 as usize)
.and_then(|vd| vd.var_name.as_deref())?;
let sym = transfer.interner.get(name)?;
Some(state.validated_must.contains(sym))
};
for arg_group in args {
for &v in arg_group {
if let Some(is_validated) = check_value(v, state) {
any_tainted_arg = true;
if !is_validated {
all_args_validated = false;
break;
}
}
}
if !all_args_validated {
break;
}
}
if all_args_validated {
if let Some(rv) = receiver {
if let Some(is_validated) = check_value(*rv, state) {
any_tainted_arg = true;
if !is_validated {
all_args_validated = false;
}
}
}
}
if any_tainted_arg && all_args_validated {
if let Some(name) = ssa
.value_defs
.get(inst.value.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
{
if let Some(sym) = transfer.interner.get(name) {
state.validated_must.insert(sym);
state.validated_may.insert(sym);
}
}
}
}
}
}

View file

@ -593,6 +593,18 @@ pub fn extract_ssa_func_summary_full(
if any_carrying_path && all_carrying_validated {
validated_params_to_return.push(idx);
}
if std::env::var("NYX_DBG_VPR2").is_ok() {
eprintln!(
"VPR2 fp={:?} idx={} name={} any_carry={} all_validated={}",
formal_param_names, idx, var_name, any_carrying_path, all_carrying_validated
);
for (i, obs) in per_return_obs.iter().enumerate() {
eprintln!(
" ret[{}] derived={:?} param={:?} validated_must={}",
i, obs.derived_caps, obs.param_caps, obs.param_validated_must
);
}
}
}
// Derive per-return-path decomposition. For each

View file

@ -1,6 +1,6 @@
{
"benchmark_version": "1.0",
"timestamp": "2026-05-02T19:35:12Z",
"timestamp": "2026-05-03T00:57:12Z",
"scanner_version": "0.6.0",
"scanner_config": {
"analysis_mode": "Full",
@ -9,10 +9,10 @@
"state_analysis_enabled": true,
"worker_threads": 1
},
"ground_truth_hash": "sha256:de2df25545527c2c90c665a5d4db257fb8f0d7aefe16eb742ee8e70f7de55e99",
"ground_truth_hash": "sha256:4a510fd65a169290c8d44c11f764387f2c3f39d18a92d393839f975a492cd64b",
"corpus_size": 507,
"cases_run": 504,
"cases_skipped": 3,
"cases_run": 506,
"cases_skipped": 1,
"outcomes": [
{
"case_id": "c-buf-001",
@ -2048,6 +2048,42 @@
"security_finding_count": 1,
"non_security_finding_count": 0
},
{
"case_id": "cve-ts-2026-25544-patched",
"file": "cve_corpus/typescript/CVE-2026-25544/patched.ts",
"language": "typescript",
"vuln_class": "safe",
"is_vulnerable": false,
"outcome_file_level": "TN",
"outcome_rule_level": "TN",
"outcome_location_level": null,
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [],
"security_finding_count": 0,
"non_security_finding_count": 0
},
{
"case_id": "cve-ts-2026-25544-vulnerable",
"file": "cve_corpus/typescript/CVE-2026-25544/vulnerable.ts",
"language": "typescript",
"vuln_class": "sqli",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 73:5)",
"taint-unsanitised-flow (source 72:20)"
],
"unexpected_rule_ids": [],
"all_finding_ids": [
"taint-unsanitised-flow (source 73:5)",
"taint-unsanitised-flow (source 72:20)"
],
"security_finding_count": 2,
"non_security_finding_count": 0
},
{
"case_id": "cve-ts-ghsa-4x48-cgf9-q33f-patched",
"file": "cve_corpus/typescript/GHSA-4x48-cgf9-q33f/patched.ts",
@ -7364,20 +7400,17 @@
"language": "rust",
"vuln_class": "sqli",
"is_vulnerable": true,
"outcome_file_level": "TP",
"outcome_rule_level": "TP",
"outcome_location_level": "TP",
"matched_rule_ids": [
"taint-unsanitised-flow (source 5:19)"
],
"outcome_file_level": "FN",
"outcome_rule_level": "FN",
"outcome_location_level": "FN",
"matched_rule_ids": [],
"unexpected_rule_ids": [],
"all_finding_ids": [
"rs.quality.unwrap",
"rs.quality.unwrap",
"rs.quality.unwrap",
"taint-unsanitised-flow (source 5:19)"
"rs.quality.unwrap"
],
"security_finding_count": 1,
"security_finding_count": 0,
"non_security_finding_count": 3
},
{
@ -9015,20 +9048,20 @@
"aggregate_file_level": {
"tp": 249,
"fp": 1,
"fn_": 0,
"tn": 254,
"fn_": 1,
"tn": 255,
"precision": 0.996,
"recall": 1.0,
"f1": 0.9979959919839679
"recall": 0.996,
"f1": 0.996
},
"aggregate_rule_level": {
"tp": 249,
"fp": 1,
"fn_": 0,
"tn": 254,
"fn_": 1,
"tn": 255,
"precision": 0.996,
"recall": 1.0,
"f1": 0.9979959919839679
"recall": 0.996,
"f1": 0.996
},
"by_language": {
"c": {
@ -9104,19 +9137,19 @@
"f1": 1.0
},
"rust": {
"tp": 37,
"tp": 36,
"fp": 0,
"fn_": 0,
"fn_": 1,
"tn": 41,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
"recall": 0.972972972972973,
"f1": 0.9863013698630138
},
"typescript": {
"tp": 34,
"tp": 35,
"fp": 0,
"fn_": 0,
"tn": 25,
"tn": 26,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
@ -9262,7 +9295,7 @@
"tp": 0,
"fp": 1,
"fn_": 0,
"tn": 254,
"tn": 255,
"precision": 0.0,
"recall": 1.0,
"f1": 0.0
@ -9288,11 +9321,11 @@
"sqli": {
"tp": 30,
"fp": 0,
"fn_": 0,
"fn_": 1,
"tn": 0,
"precision": 1.0,
"recall": 1.0,
"f1": 1.0
"recall": 0.967741935483871,
"f1": 0.9836065573770492
},
"ssrf": {
"tp": 30,
@ -9315,31 +9348,31 @@
},
"by_confidence": {
">=High": {
"tp": 78,
"fp": 107,
"fn_": 171,
"tn": 148,
"precision": 0.42162162162162165,
"recall": 0.3132530120481928,
"f1": 0.359447004608295
"tp": 81,
"fp": 105,
"fn_": 169,
"tn": 151,
"precision": 0.43548387096774194,
"recall": 0.324,
"f1": 0.37155963302752293
},
">=Low": {
"tp": 82,
"fp": 126,
"fn_": 167,
"tn": 129,
"precision": 0.3942307692307692,
"recall": 0.3293172690763052,
"f1": 0.35886214442013126
"tp": 87,
"fp": 124,
"fn_": 163,
"tn": 132,
"precision": 0.41232227488151657,
"recall": 0.348,
"f1": 0.3774403470715834
},
">=Medium": {
"tp": 82,
"fp": 121,
"fn_": 167,
"tn": 134,
"precision": 0.4039408866995074,
"recall": 0.3293172690763052,
"f1": 0.3628318584070796
"tp": 87,
"fp": 118,
"fn_": 163,
"tn": 138,
"precision": 0.424390243902439,
"recall": 0.348,
"f1": 0.3824175824175824
}
}
}