Authorization analysis logic improvements (#61)

This commit is contained in:
Eli Peter 2026-05-02 16:44:49 -04:00 committed by GitHub
parent 3c89bddbf2
commit 40995e45e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 4193 additions and 134 deletions

View file

@ -1406,6 +1406,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
mod_aliases_ref,
None,
Some(&formal_params),
None,
);
// Only store if the summary has observable effects. With
@ -1531,6 +1532,11 @@ pub(crate) fn lower_all_functions_from_bodies(
} else {
Some(&mod_aliases)
};
let formal_destructured = if !body.meta.param_destructured_fields.is_empty() {
Some(body.meta.param_destructured_fields.as_slice())
} else {
None
};
let summary = ssa_transfer::extract_ssa_func_summary(
&func_ssa,
&body.graph,
@ -1543,6 +1549,7 @@ pub(crate) fn lower_all_functions_from_bodies(
mod_aliases_ref,
locator,
Some(formal_params),
formal_destructured,
);
// Always insert the summary, even when all fields are empty/default.
@ -1775,6 +1782,11 @@ fn rerun_extraction_with_augmented_summaries(
Some(&mod_aliases)
};
let formal_destructured = if !body.meta.param_destructured_fields.is_empty() {
Some(body.meta.param_destructured_fields.as_slice())
} else {
None
};
let new_summary = ssa_transfer::extract_ssa_func_summary_full(
&callee.ssa,
parent_cfg,
@ -1788,6 +1800,7 @@ fn rerun_extraction_with_augmented_summaries(
locator,
Some(&body.meta.params),
Some(&augmented_snapshot),
formal_destructured,
);
// OR-merge sink-only fields into the existing summary.
@ -1796,8 +1809,16 @@ fn rerun_extraction_with_augmented_summaries(
}
}
/// OR-merge `param_to_sink` and `param_to_sink_param` from `src` into
/// `dst`. Existing entries are preserved; only NEW entries are added.
/// OR-merge `param_to_sink`, `param_to_sink_param`, and
/// `validated_params_to_return` from `src` into `dst`. Existing entries
/// are preserved; only NEW entries are added.
///
/// The validated-param list grows monotonically across extraction
/// rounds: a parameter that proves validated under any extraction
/// pass (the augmented second pass typically resolves more
/// cross-function summaries than the first) stays validated. Drops
/// here would silently lose CVE-2026-25544-class precision the
/// re-extraction pass was specifically designed to recover.
fn merge_sink_fields(
dst: &mut crate::summary::ssa_summary::SsaFuncSummary,
src: &crate::summary::ssa_summary::SsaFuncSummary,
@ -1823,6 +1844,11 @@ fn merge_sink_fields(
dst.param_to_sink_param.push((idx, pos, caps));
}
}
for &idx in &src.validated_params_to_return {
if !dst.validated_params_to_return.contains(&idx) {
dst.validated_params_to_return.push(idx);
}
}
}
/// Walk lexical-containment children of every parent body and lift

View file

@ -377,6 +377,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
return PredicateKind::ValidationCall;
}
// Regex / pattern allowlist `<X>.test(value)` / `<X>.match(value)` calls
// where the receiver name carries a regex or pattern marker. The
// standard JS / TS / Python / Java / Ruby / Go regex APIs all expose a
// boolean test method; the success arm (true) means `value` matches the
// pattern. Conservative on receiver names so non-regex methods like
// `obj.test(x)` (test runner), `db.test(...)` (test column) etc. don't
// get pulled in. Motivated by Payload CVE-2026-25544
// (`if (!SAFE_STRING_REGEX.test(value)) throw …;`).
if (bare == "test" || bare == "match" || bare == "matches")
&& let Some(dot_pos) = callee_part.rfind('.')
{
let receiver = &callee_part[..dot_pos];
let receiver_lower = receiver.to_ascii_lowercase();
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
// Sanitizer
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
return PredicateKind::SanitizerCall;
@ -638,6 +656,19 @@ fn extract_validation_target(text: &str) -> Option<String> {
// Check for method call pattern: `x.method(...)` or `x.method_name(...)`
if let Some(dot_pos) = callee_part.rfind('.') {
let receiver = callee_part[..dot_pos].trim();
let method = callee_part[dot_pos + 1..].trim().to_ascii_lowercase();
// Regex-allowlist `<re>.test(value)` / `<re>.match(value)` / `<re>.matches(value)`:
// the validated target is the call's first argument, not the regex
// receiver. Without this special case, branch narrowing would mark
// the regex itself as validated and leave the user input alone.
if matches!(method.as_str(), "test" | "match" | "matches")
&& let Some(first_arg) = first_call_arg(args_part)
{
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
if !receiver.is_empty() && is_identifier(receiver) {
return Some(receiver.to_string());
}
@ -977,6 +1008,33 @@ mod tests {
assert_eq!(target.as_deref(), Some("x"));
}
/// Regex `<X>.test(value)` should classify as ValidationCall and the
/// validated target should be the call argument, not the regex
/// receiver. Pinned because the receiver-as-target heuristic is the
/// default for method calls. Motivated by Payload CVE-2026-25544
/// (`if (!SAFE_STRING_REGEX.test(value)) throw …;`).
#[test]
fn target_regex_test_first_arg() {
let (kind, target) = classify_condition_with_target("!SAFE_STRING_REGEX.test(value)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("value"));
}
#[test]
fn target_regex_test_pattern_receiver() {
let (kind, target) = classify_condition_with_target("ALLOWED_PATTERN.test(s)");
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("s"));
}
/// Receiver name without a regex/pattern marker should NOT be pulled
/// in as a validator: `obj.test(x)` is a test runner, not a regex.
#[test]
fn target_test_non_regex_receiver_is_not_validation() {
let kind = classify_condition("obj.test(value)");
assert_eq!(kind, PredicateKind::Unknown);
}
#[test]
fn target_comparison_extracts_identifier_side() {
let (kind, target) = classify_condition_with_target("x == 5");

View file

@ -3499,7 +3499,21 @@ pub(super) fn transfer_inst(
// `ssa/lower.rs`), which inflates `args.len()` beyond the real
// positional arity. The CFG's `arg_uses` is the authoritative
// positional-arg list.
let arity_hint = info.call.arg_uses.len();
//
// Fallback: certain TypeScript call shapes — notably calls
// inside template-string substitutions (`${fn(arg)}`) — get
// their `arg_uses` dropped by CFG lowering even though the
// call's positional `args` are intact. When that happens
// the strict `Some(0)` arity hint silently fails to match
// any callee that takes ≥1 arg, swallowing summary
// resolution. Detect the asymmetry and pass `None` so
// `resolve_local_func_key_query`'s unique-name fallback
// can still pick up the lone candidate.
let arity_hint = if info.call.arg_uses.is_empty() && !args.is_empty() {
None
} else {
Some(info.call.arg_uses.len())
};
// Type-aware resolution: when the SSA receiver value has a
// known abstract type (HttpClient, URL, …), feed that into
// the resolver as an authoritative `receiver_type`. This
@ -3511,7 +3525,7 @@ pub(super) fn transfer_inst(
callee,
caller_func,
info.call.call_ordinal,
Some(arity_hint),
arity_hint,
*receiver,
);
@ -3627,6 +3641,43 @@ pub(super) fn transfer_inst(
env.refine(inst.value, &fact);
}
}
// Validated-flow propagation through callee summaries.
//
// Runs regardless of whether inline analysis already
// resolved the call: inline analysis re-runs the
// callee's taint with caller-side seeds but does not
// surface the callee's symbol-keyed
// `validated_must` / `validated_may` state into the
// caller, so the summary-level signal is the only
// channel for propagating helper-validation across
// a function boundary.
//
// When the callee's body validates a parameter on
// every return path that carries the param's caps
// (regex allowlist, type check, validation call, …),
// a normal-returning call site is the validating arm
// by construction: control could not reach the
// post-call instruction unless the helper's
// predicate(s) accepted the argument. Mark each
// tainted argument's `var_name` and the call's
// result `var_name` in the caller's
// `validated_must` / `validated_may` sets so
// subsequent sinks observe `all_validated = true`,
// the same way an inline `if (!regex.test(x)) throw`
// validates the surviving branch. Closes the
// helper-validator propagation gap surfaced by
// CVE-2026-25544 (Payload `sanitizeValue` SQLi).
if !resolved.validated_params_to_return.is_empty() {
propagate_validated_params_to_return(
inst,
args,
ssa,
transfer.interner,
state,
&resolved.validated_params_to_return,
);
}
}
// When find_classifiable_inner_call overrides the callee (e.g.
@ -3640,7 +3691,7 @@ pub(super) fn transfer_inst(
oc,
caller_func,
info.call.call_ordinal,
Some(arity_hint),
arity_hint,
) {
if resolved_container_to_return.is_empty() {
resolved_container_to_return =
@ -3735,6 +3786,24 @@ pub(super) fn transfer_inst(
if !aggregate_sanitizer_applied {
return_bits &= !resolved.sanitizer_caps;
}
// Validated-flow propagation through callee summaries.
//
// When the callee's body validates a parameter on every
// return path (regex allowlist, type check, validation
// call, etc. — see
// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`]),
// a normal-returning call site is the validating arm by
// construction: control could not reach the post-call
// instruction unless the helper's predicate(s) accepted
// the argument. Mark each tainted argument's `var_name`
// and the call's result `var_name` in the caller's
// `validated_must` / `validated_may` sets so subsequent
// sinks observe `all_validated = true`, the same way an
// inline `if (!regex.test(x)) throw` validates the
// surviving branch. Closes the helper-validator
// propagation gap surfaced by CVE-2026-25544 (Payload
// `sanitizeValue` SQLi).
}
// Type-qualified receiver resolution: when normal callee resolution
@ -4236,7 +4305,7 @@ pub(super) fn transfer_inst(
oc,
caller_func,
info.call.call_ordinal,
Some(arity_hint),
arity_hint,
) {
if !oc_sum.propagates_taint && oc_sum.source_caps.is_empty() {
// Outer callee blocks taint: no param→return flow,
@ -6301,6 +6370,60 @@ fn collect_args_taint(
/// [`Cap::UNAUTHORIZED_ID`], ownership/membership guards prove on
/// inputs rather than the return value. Other caps and origins are
/// untouched.
/// Apply [`SsaFuncSummary::validated_params_to_return`] at a call site.
///
/// For each parameter index `p` in `validated_params`, mark the
/// `var_name` of every tainted SSA value at `args[p]` and the call's
/// own result `inst.value` in the caller's `validated_must` /
/// `validated_may` sets. Mirrors the symbol-keyed validation a direct
/// `if (!regex.test(x)) throw` would set on the surviving branch.
///
/// Sound because the callee summary records `validated_params_to_return`
/// only when the param's `var_name` is in `validated_must` at *every*
/// return block — a normal-returning call therefore proves the
/// validating arm. No-op when no actual argument is tainted (avoids
/// spuriously validating untouched names downstream).
fn propagate_validated_params_to_return(
inst: &SsaInst,
args: &[SmallVec<[SsaValue; 2]>],
ssa: &SsaBody,
interner: &crate::state::symbol::SymbolInterner,
state: &mut SsaTaintState,
validated_params: &[usize],
) {
let mark = |val: SsaValue, st: &mut SsaTaintState| {
let Some(name) = ssa
.value_defs
.get(val.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
else {
return;
};
let Some(sym) = interner.get(name) else {
return;
};
st.validated_must.insert(sym);
st.validated_may.insert(sym);
};
let mut any_arg_tainted = false;
for &p in validated_params {
let Some(arg_vals) = args.get(p) else {
continue;
};
for &v in arg_vals {
if state.get(v).is_some_and(|t| !t.caps.is_empty()) {
any_arg_tainted = true;
mark(v, state);
}
}
}
if any_arg_tainted {
mark(inst.value, state);
}
}
fn strip_cap_from_call_args(
args: &[SmallVec<[SsaValue; 2]>],
receiver: &Option<SsaValue>,
@ -8676,6 +8799,14 @@ struct ResolvedSummary {
/// `field_points_to` records. Applied at the caller call site by
/// `apply_field_points_to_writes`.
field_points_to: crate::summary::points_to::FieldPointsToSummary,
/// Parameter indices whose taint flow to the return is fully
/// validated by a dominating predicate inside the callee on every
/// return path. Mirrors
/// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`].
/// Populated only via `convert_ssa_to_resolved`; other resolution
/// paths leave it empty (label / coarse-FuncSummary forms cannot
/// express per-path predicate validation).
validated_params_to_return: Vec<usize>,
}
fn resolve_callee(
@ -8825,6 +8956,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
// Try label classification for the bound function (by leaf name).
@ -8896,6 +9028,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
}
@ -9041,6 +9174,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
} else {
@ -9091,6 +9225,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
};
match widened.len() {
0 => {}
@ -9162,6 +9297,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
}
@ -9344,6 +9480,7 @@ fn convert_ssa_to_resolved_for_caller(
points_to: ssa_sum.points_to.clone(),
field_points_to: ssa_sum.field_points_to.clone(),
param_to_gate_filters: ssa_sum.param_to_gate_filters.clone(),
validated_params_to_return: ssa_sum.validated_params_to_return.to_vec(),
}
}

View file

@ -50,6 +50,7 @@ pub fn extract_ssa_func_summary(
module_aliases: Option<&HashMap<SsaValue, SmallVec<[String; 2]>>>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
formal_param_names: Option<&[String]>,
formal_destructured_fields: Option<&[Vec<String>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
extract_ssa_func_summary_full(
ssa,
@ -64,6 +65,7 @@ pub fn extract_ssa_func_summary(
locator,
formal_param_names,
None,
formal_destructured_fields,
)
}
@ -93,6 +95,15 @@ pub fn extract_ssa_func_summary_full(
ssa_summaries: Option<
&HashMap<crate::symbol::FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
>,
// Per-parameter destructured-binding sibling names. Entry `i` is
// the list of field names destructured by the same call-site arg
// slot as the primary `formal_param_names[i]`, excluding the
// primary name. Empty vec for non-destructured params; `None` for
// callers that don't carry destructure info (legacy / test paths).
// Drives the destructured-arg expansion in the per-param probe so
// taint flow through sibling bindings is visible to summary
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
formal_destructured_fields: Option<&[Vec<String>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
use crate::summary::SinkSite;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
@ -159,13 +170,32 @@ pub fn extract_ssa_func_summary_full(
/// Inner [`PathFact`] when the rv on this path is a one-arg
/// variant constructor; [`None`] otherwise.
variant_inner_fact: Option<crate::abstract_interp::PathFact>,
/// `true` when the per-param probe's seeded parameter var_name
/// is in this return block's exit `validated_must`. `false`
/// for the baseline (no-seed) probe and for params not
/// validated on this path. Drives
/// `validated_params_to_return` summary extraction.
param_validated_must: bool,
}
// Helper: run a taint probe with a given global_seed and return
// the aggregate return caps, sink events, joined return abstract,
// and the per-return-block observation list used to derive
// per-return-path transforms.
let run_probe = |seed: HashMap<BindingKey, VarTaint>| -> (
//
// `probe_param_names` lists the seeded parameter's `var_name`
// plus any destructured-binding siblings sharing the slot
// (`None` for the baseline source-caps probe). When non-empty,
// each return-block observation records whether ANY of those
// names is in the exit state's `validated_must`, which feeds
// `validated_params_to_return` summary extraction below. The
// any-name semantics matches the slot-wide model: a destructured
// formal `({ a, b, c })` represents one call-site slot, and any
// sibling reaching `validated_must` proves the slot's caps were
// narrowed before reaching the return.
let run_probe = |seed: HashMap<BindingKey, VarTaint>,
probe_param_names: Option<&[&str]>|
-> (
Cap,
Vec<SsaTaintEvent>,
Option<crate::abstract_interp::AbstractValue>,
@ -313,6 +343,13 @@ pub fn extract_ssa_func_summary_full(
// The hash is stable across runs for a given predicate
// shape so call sites can compare paths deterministically.
let (predicate_hash, known_true, known_false) = summarise_return_predicates(&exit);
let param_validated_must = match probe_param_names {
Some(names) => names.iter().any(|name| match interner.get(name) {
Some(sym) => exit.validated_must.contains(sym),
None => false,
}),
None => false,
};
per_return.push(ReturnBlockObs {
derived_caps: block_derived_caps,
param_caps: block_param_caps,
@ -322,6 +359,7 @@ pub fn extract_ssa_func_summary_full(
abstract_value: block_abs,
path_fact: block_path_fact,
variant_inner_fact: block_variant_inner,
param_validated_must,
});
}
}
@ -343,7 +381,7 @@ pub fn extract_ssa_func_summary_full(
// Abstract values don't depend on taint seeding, so the baseline probe
// captures the function's intrinsic abstract return value.
let (baseline_return_caps, _baseline_events, return_abstract, baseline_obs) =
run_probe(HashMap::new());
run_probe(HashMap::new(), None);
let source_caps = baseline_return_caps;
// Per-return-path PathFact decomposition derived from the baseline
@ -403,6 +441,12 @@ pub fn extract_ssa_func_summary_full(
usize,
SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>,
)> = Vec::new();
// Parameter indices whose taint flow to the return is fully
// validated by a dominating predicate on every return path.
// Populated below by checking each per-param probe's return-block
// exit states for `validated_must` containing the param's
// var_name. Empty when no parameter is validated.
let mut validated_params_to_return: SmallVec<[usize; 2]> = SmallVec::new();
for &(idx, ref var_name, _ssa_val) in &param_info {
let mut seed = HashMap::new();
@ -421,6 +465,37 @@ pub fn extract_ssa_func_summary_full(
probe_taint.clone(),
);
// Destructured-arg sibling expansion. When the formal at slot
// `idx` destructures an object pattern (`({ column, operator,
// value })`), the SSA body emits a separate [`SsaOp::Param`]
// for every destructured binding (sequential indices > slot
// count, since the closure-capture pass treats them as
// free-identifier reads). The call-site only passes ONE arg
// for the slot, so the engine never seeds the sibling Param
// ops at runtime — but the per-parameter SUMMARY probe must
// model "if this slot is tainted then every binding it
// produced is tainted too". Seed each sibling's `var_name`
// with the same caps the primary received. The probe-level
// `validated_must` check below treats the slot as validated
// when ANY sibling lands in `validated_must` on a return path.
//
// Closes the residual gap behind CVE-2026-25544 (PayloadCMS
// `@payloadcms/drizzle` SQLi via `createJSONQuery({ value })`):
// the validator helper `sanitizeValue(value, operator)` lives
// inside the body and the probe needs to see `value` flow
// through the `validated_params_to_return` channel before
// suppressing the caller's sink.
let slot_siblings: &[String] = formal_destructured_fields
.and_then(|d| d.get(idx))
.map(|v| v.as_slice())
.unwrap_or(&[]);
for sib in slot_siblings {
seed.insert(
BindingKey::new(sib.as_str(), BodyId(0)),
probe_taint.clone(),
);
}
// Phantom-Param prefix seeding. SSA lowering of arrow / nested
// function bodies often exposes free-identifier member-access
// expressions (e.g. `file._source.uri`) as their own
@ -437,13 +512,18 @@ pub fn extract_ssa_func_summary_full(
// `formal_var_name + "."` with the same caps the formal param
// received: semantically "if `file` is tainted, then every
// observable field path on `file` is tainted too". Bounded
// by SSA size; cap-equivalent to direct seeding.
let prefix = format!("{}.", var_name);
// by SSA size; cap-equivalent to direct seeding. Mirror this
// for each destructured sibling (`value.foo` / `column.name`
// member-projections inside the body).
let prefixes: Vec<String> = std::iter::once(var_name.clone())
.chain(slot_siblings.iter().cloned())
.map(|n| format!("{}.", n))
.collect();
for block in &ssa.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
if let SsaOp::Param { .. } = &inst.op {
if let Some(name) = inst.var_name.as_ref() {
if name.starts_with(&prefix) {
if prefixes.iter().any(|p| name.starts_with(p)) {
seed.insert(
BindingKey::new(name.as_str(), BodyId(0)),
probe_taint.clone(),
@ -454,7 +534,15 @@ pub fn extract_ssa_func_summary_full(
}
}
let (return_caps, events, _, per_return_obs) = run_probe(seed);
// Build slot-wide name list for the validated_must check.
// Primary first, then siblings, then heap-allocated owned
// copies — `run_probe` only borrows for its inner loop.
let mut slot_names: Vec<&str> = Vec::with_capacity(1 + slot_siblings.len());
slot_names.push(var_name.as_str());
for sib in slot_siblings {
slot_names.push(sib.as_str());
}
let (return_caps, events, _, per_return_obs) = run_probe(seed, Some(slot_names.as_slice()));
// Subtract baseline source_caps, we only want param-contributed caps
let param_return_caps = return_caps & !source_caps;
@ -469,6 +557,44 @@ pub fn extract_ssa_func_summary_full(
param_to_return.push((idx, transform));
}
// Validated-param-to-return detection.
//
// When the per-param probe shows that the parameter's
// `var_name` is in `validated_must` on every return path that
// *carries the parameter's contributed caps*, record the
// parameter as validated. The caller will mark each tainted
// argument passed to this position — and the call's own
// return value — as `validated_must` / `validated_may`, the
// same way an inline `if (!regex.test(x)) throw` would
// validate the surviving branch.
//
// Conservative gating:
// * Skip when the param contributes no caps to the return,
// a degenerate "validated but irrelevant" record.
// * Skip when no return block was observed (probes that
// diverged or hit `MAX_PROBE_PARAMS`).
// * Require validation on every return path that *carries
// param caps to the return*. Branches that return
// constants (e.g. `if (x === null) return 'NULL'`) carry
// no param taint and don't need a validation predicate.
// * Require ≥1 path that actually validates the param.
if !param_return_caps.is_empty() && !per_return_obs.is_empty() {
let mut any_carrying_path = false;
let all_carrying_validated = per_return_obs.iter().all(|obs| {
let carries = !(obs.derived_caps & !source_caps).is_empty()
|| !(obs.param_caps & !source_caps).is_empty();
if carries {
any_carrying_path = true;
obs.param_validated_must
} else {
true
}
});
if any_carrying_path && all_carrying_validated {
validated_params_to_return.push(idx);
}
}
// Derive per-return-path decomposition. For each
// observed return block, derive a `ReturnPathTransform` mirroring
// the aggregate logic (prefer derived caps, fall back to param
@ -694,6 +820,7 @@ pub fn extract_ssa_func_summary_full(
// extractor itself doesn't carry receiver-type info, the
// caller patches it in.
typed_call_receivers: Vec::new(),
validated_params_to_return,
}
}

View file

@ -1641,6 +1641,7 @@ mod fanout_merge_tests {
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
}
}

View file

@ -4331,6 +4331,7 @@ fn ssa_summary_identity_propagation() {
None,
None,
None,
None,
);
assert!(
!summary.param_to_return.is_empty(),
@ -4394,6 +4395,7 @@ fn ssa_summary_sanitizer_strips_bits() {
None,
None,
None,
None,
);
// Sanitizer should strip some bits
for (_, transform) in &summary.param_to_return {
@ -4450,6 +4452,7 @@ fn ssa_summary_source_adds_bits() {
None,
None,
None,
None,
);
assert!(
!summary.source_caps.is_empty(),
@ -4506,6 +4509,7 @@ fn ssa_summary_param_to_sink() {
None,
None,
None,
None,
);
assert!(
!summary.param_to_sink.is_empty(),
@ -6122,6 +6126,61 @@ async function handler(req) {
);
}
/// Regex-allowlist `<X>.test(value)` is recognised as a ValidationCall
/// targeting the call's first argument (not the regex receiver).
///
/// Shape:
///
/// ```js
/// const v = req.body.x;
/// if (!SAFE_REGEX.test(v)) { throw }
/// db.execute(v); // direct flow: should be silent
/// ```
///
/// `classify_condition` returns ValidationCall for the `*regex*.test()`
/// receiver shape (see `target_regex_test_first_arg` in path_state) and
/// `extract_validation_target` overrides the default receiver-as-target
/// rule to extract the call's first argument. Together with the
/// existing CFG-level negation handling in `compute_succ_states` the
/// false branch (continue) marks `v` as validated.
///
/// Motivated by Payload CVE-2026-25544
/// (`if (!SAFE_STRING_REGEX.test(value)) throw`). Note: this test pins
/// the direct-flow case; transitive validation through SSA-derived
/// values (e.g. template-literal concat of `v` into `sql`) is a deeper
/// gap tracked separately and not closed here.
#[test]
fn regex_test_allowlist_narrowing_clears_direct_flow() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
async function handler(req) {
const userValue = req.body.filter;
if (!SAFE_REGEX.test(userValue)) {
throw new Error('bad');
}
return await db.execute(userValue);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"regex.test allowlist narrowing should suppress direct-flow finding; got {} finding(s): {findings:?}",
findings.len()
);
}
/// Regression: `extract_ssa_func_summary` must skip `all_validated`
/// events when populating `param_to_sink` / `param_to_sink_param`.
///
@ -6205,6 +6264,282 @@ async function handler(req) {
);
}
/// Regression for CVE-2026-25544 deep fix
/// (`validated_params_to_return` summary field): a helper that
/// validates its parameter via a regex `.test(...)` allowlist and
/// returns a string derived from the validated parameter must
/// suppress the caller's downstream sink even when:
/// * the caller binds the call result to a fresh variable
/// (`const sql = sanitize(userValue)`), and
/// * the helper's return is a *derived* template literal, not a
/// pass-through of the parameter itself.
///
/// Sound because the helper only returns normally on the validating
/// arm — control could not reach the post-call instruction unless
/// the regex accepted the argument. Pinned by
/// `propagate_validated_params_to_return` marking both the arg and
/// the call result `validated_must` / `validated_may` so the sink's
/// `all_validated` check fires.
#[test]
fn validated_params_to_return_suppresses_one_hop_helper_validator() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return `safe:${value}`;
};
async function handler(req) {
const userValue = req.body.filter;
const sql = sanitize(userValue);
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"regex.test allowlist inside helper must suppress caller sink; got {} finding(s)",
findings.len()
);
}
/// Two-hop variant of
/// `validated_params_to_return_suppresses_one_hop_helper_validator`:
/// when the validator helper is itself wrapped by another helper
/// that interpolates the validator's return into a template literal,
/// summary extraction must still surface
/// `validated_params_to_return` on the *outer* helper. This pins
/// the second-pass re-extraction (via
/// `re_extract_summaries_with_augment_view`) plus the OR-merge of
/// `validated_params_to_return` in `merge_sink_fields`.
#[test]
fn validated_params_to_return_suppresses_two_hop_helper_validator() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = (value) => {
const s = sanitize(value);
return s + '!';
};
async function handler(req) {
const userValue = req.body.filter;
const sql = buildQuery(userValue);
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"two-hop helper-validator must propagate validated_params_to_return through both helpers; got {} finding(s)",
findings.len()
);
}
/// Companion to
/// `validated_params_to_return_suppresses_one_hop_helper_validator`:
/// same shape WITHOUT the regex.test guard inside the helper must
/// still fire. Asserts the validated-flow propagation does not
/// over-suppress when the helper does not actually validate.
#[test]
fn validated_params_to_return_does_not_suppress_unvalidated_helper() {
let src = br#"
const sanitize = (value) => {
return `safe:${value}`;
};
async function handler(req) {
const userValue = req.body.filter;
const sql = sanitize(userValue);
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"helper without regex guard must still flag the caller sink",
);
}
/// Regression: per-parameter summary probe must seed every
/// destructured object-pattern sibling sharing a slot, not only the
/// primary name picked by `extract_param_meta`. Without this, a
/// helper that destructures its single argument as
/// `({ value }) => …` cannot have `validated_params_to_return = [0]`
/// proven, because the validator inside the body operates on the
/// `value` binding while the probe only seeded the primary `value`
/// (or any earlier sibling) of the object pattern. Closes the
/// residual blocker for CVE-2026-25544 (PayloadCMS Drizzle SQLi).
#[test]
fn validated_params_to_return_suppresses_destructured_object_arg_helper() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = ({ value }) => {
const s = sanitize(value);
return s + '!';
};
async function handler(req) {
const userValue = req.body.filter;
const sql = buildQuery({ value: userValue });
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"destructured object-pattern arg with regex.test allowlist inside the helper must suppress caller sink; got {} finding(s)",
findings.len()
);
}
/// Regression: same coverage for TypeScript object-pattern formals
/// (`required_parameter > pattern: object_pattern`). TS exposes the
/// destructure under a wrapper required_parameter; JS exposes it as a
/// direct child of formal_parameters. Both paths must surface
/// destructured siblings to the per-parameter probe.
#[test]
fn validated_params_to_return_suppresses_destructured_object_arg_helper_ts() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value: string): string => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = ({ value }: { value: string }): string => {
const s = sanitize(value);
return s + '!';
};
async function handler(req: any) {
const userValue = req.body.filter;
const sql = buildQuery({ value: userValue });
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT);
let file_cfg = parse_lang(src, "typescript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::TypeScript,
"test.ts",
&[],
None,
);
assert!(
findings.is_empty(),
"TS destructured object-pattern arg with regex.test allowlist must suppress caller sink; got {} finding(s)",
findings.len()
);
}
/// Regression: a destructured object-pattern formal with multiple
/// fields must still propagate validated_params_to_return when the
/// validation lives behind a sibling that is NOT the primary name
/// returned by `extract_param_meta`. In CVE-2026-25544 the primary
/// is `column` (first ident in `{ column, operator, pathSegments,
/// value }`) but the validator gates `value` — without sibling
/// seeding the probe never sees the validation.
#[test]
fn destructured_sibling_validation_propagates_through_summary() {
let src = br#"
const SAFE_REGEX = /^[\w]+$/;
const sanitize = (value) => {
if (!SAFE_REGEX.test(value)) throw new Error('bad');
return value;
};
const buildQuery = ({ column, operator, value }) => {
return `${column} ${operator} ${sanitize(value)}`;
};
async function handler(req) {
const userValue = req.body.filter;
const sql = buildQuery({ column: 'col', operator: '=', value: userValue });
db.execute(sql);
}
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"destructured-sibling validation (validator binds non-primary slot binding) must propagate through summary; got {} finding(s)",
findings.len()
);
}
/// Regression: `validate*`-named callees match
/// `InputValidatorPolarity::ErrorReturning`, bare `if (err) throw`
/// guards the success branch (false branch). `is_valid*`/`is_safe*`
@ -6290,3 +6625,153 @@ const handler = (req) => {
None,
);
}
/// JS arrow-function default parameters (`(a = {}, b = {}) => …`)
/// are wrapped by tree-sitter in `assignment_pattern` nodes whose
/// `left` field carries the actual identifier. Without
/// `assignment_pattern` in `PARAM_CONFIG.param_node_kinds`, the
/// param walker skipped them, producing a parameter-less summary
/// for any function whose params have defaults. That broke
/// cross-function `param_to_sink` propagation for shapes like
/// Strapi `sendTemplatedEmail`. Motivated by CVE-2023-22621.
#[test]
fn cve_2023_22621_js_default_params_extracted() {
use crate::cfg::extract_param_meta_for_test;
let src = br#"
const sendTemplatedEmail = (emailOptions = {}, emailTemplate = {}, data = {}) => {
return emailTemplate;
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let mut parser = tree_sitter::Parser::new();
parser.set_language(&lang).unwrap();
let tree = parser.parse(&src[..], None).unwrap();
let root = tree.root_node();
let mut arrow_node: Option<tree_sitter::Node> = None;
fn find<'a>(n: tree_sitter::Node<'a>, out: &mut Option<tree_sitter::Node<'a>>) {
if n.kind() == "arrow_function" {
*out = Some(n);
return;
}
let mut c = n.walk();
for ch in n.named_children(&mut c) {
find(ch, out);
if out.is_some() {
return;
}
}
}
find(root, &mut arrow_node);
let arrow = arrow_node.expect("arrow function not found");
let params = extract_param_meta_for_test(arrow, "javascript", src);
let names: Vec<String> = params.iter().map(|(n, _)| n.clone()).collect();
assert_eq!(
names,
vec![
"emailOptions".to_string(),
"emailTemplate".to_string(),
"data".to_string()
],
"expected all 3 default-valued arrow params extracted; got {:?}",
names
);
}
/// `_.template(tainted)` is a server-side template injection sink:
/// lodash compiles `<% ... %>` evaluate blocks into a JS Function,
/// so attacker-controlled input becomes RCE at render time. Gate
/// activates conservatively when arg 1 is missing (default lodash
/// behavior is dangerous). Motivated by CVE-2023-22621 (Strapi).
#[test]
fn cve_2023_22621_lodash_template_fires_on_tainted_input() {
let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
_.template(req.body.tpl);
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected taint flow on _.template(req.body.tpl); got 0 findings",
);
}
/// `_.template(tainted, { evaluate: false })` disables lodash's
/// `<% ... %>` evaluate block compilation, so the call is no
/// longer a code-execution sink. The gate's `keyword_name =
/// "evaluate"` activation reads the literal value via the JS-side
/// closure that walks the call's arg-1 object literal (since JS
/// has no language-level keyword args). Motivated by Strapi's
/// CVE-2023-22621 patch.
#[test]
fn cve_2023_22621_lodash_template_suppressed_by_evaluate_false() {
let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
_.template(req.body.tpl, { evaluate: false });
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
findings.is_empty(),
"expected no taint flow when evaluate:false is set; got {} findings",
findings.len(),
);
}
/// Double-call chained form `_.template(tainted)(data)` — the outer
/// call's `function` field is itself a call_expression rather than
/// the member-chain shape `find_chained_inner_call` was originally
/// written for. The extension recognises the `f()()` pattern and
/// rebinds gate classification to the inner call so the gated
/// `_.template` fires even when wrapped in an immediate invocation
/// of the compiled function. Motivated by CVE-2023-22621.
#[test]
fn cve_2023_22621_lodash_template_double_call_inner_rebinding() {
let src = br#"
const _ = require('lodash');
const handler = (req, res) => {
const tpl = req.body.tpl;
_.template(tpl)({});
};
"#;
let lang = tree_sitter::Language::from(tree_sitter_javascript::LANGUAGE);
let file_cfg = parse_lang(src, "javascript", lang);
let summaries = &file_cfg.summaries;
let findings = analyse_file(
&file_cfg,
summaries,
None,
Lang::JavaScript,
"test.js",
&[],
None,
);
assert!(
!findings.is_empty(),
"expected taint flow via double-call chain rebinding; got 0 findings",
);
}