Authorization analysis logic improvements (#61)

This commit is contained in:
Eli Peter 2026-05-02 16:44:49 -04:00 committed by GitHub
parent 3c89bddbf2
commit 40995e45e7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 4193 additions and 134 deletions

View file

@ -3499,7 +3499,21 @@ pub(super) fn transfer_inst(
// `ssa/lower.rs`), which inflates `args.len()` beyond the real
// positional arity. The CFG's `arg_uses` is the authoritative
// positional-arg list.
let arity_hint = info.call.arg_uses.len();
//
// Fallback: certain TypeScript call shapes — notably calls
// inside template-string substitutions (`${fn(arg)}`) — get
// their `arg_uses` dropped by CFG lowering even though the
// call's positional `args` are intact. When that happens
// the strict `Some(0)` arity hint silently fails to match
// any callee that takes ≥1 arg, swallowing summary
// resolution. Detect the asymmetry and pass `None` so
// `resolve_local_func_key_query`'s unique-name fallback
// can still pick up the lone candidate.
let arity_hint = if info.call.arg_uses.is_empty() && !args.is_empty() {
None
} else {
Some(info.call.arg_uses.len())
};
// Type-aware resolution: when the SSA receiver value has a
// known abstract type (HttpClient, URL, …), feed that into
// the resolver as an authoritative `receiver_type`. This
@ -3511,7 +3525,7 @@ pub(super) fn transfer_inst(
callee,
caller_func,
info.call.call_ordinal,
Some(arity_hint),
arity_hint,
*receiver,
);
@ -3627,6 +3641,43 @@ pub(super) fn transfer_inst(
env.refine(inst.value, &fact);
}
}
// Validated-flow propagation through callee summaries.
//
// Runs regardless of whether inline analysis already
// resolved the call: inline analysis re-runs the
// callee's taint with caller-side seeds but does not
// surface the callee's symbol-keyed
// `validated_must` / `validated_may` state into the
// caller, so the summary-level signal is the only
// channel for propagating helper-validation across
// a function boundary.
//
// When the callee's body validates a parameter on
// every return path that carries the param's caps
// (regex allowlist, type check, validation call, …),
// a normal-returning call site is the validating arm
// by construction: control could not reach the
// post-call instruction unless the helper's
// predicate(s) accepted the argument. Mark each
// tainted argument's `var_name` and the call's
// result `var_name` in the caller's
// `validated_must` / `validated_may` sets so
// subsequent sinks observe `all_validated = true`,
// the same way an inline `if (!regex.test(x)) throw`
// validates the surviving branch. Closes the
// helper-validator propagation gap surfaced by
// CVE-2026-25544 (Payload `sanitizeValue` SQLi).
if !resolved.validated_params_to_return.is_empty() {
propagate_validated_params_to_return(
inst,
args,
ssa,
transfer.interner,
state,
&resolved.validated_params_to_return,
);
}
}
// When find_classifiable_inner_call overrides the callee (e.g.
@ -3640,7 +3691,7 @@ pub(super) fn transfer_inst(
oc,
caller_func,
info.call.call_ordinal,
Some(arity_hint),
arity_hint,
) {
if resolved_container_to_return.is_empty() {
resolved_container_to_return =
@ -3735,6 +3786,24 @@ pub(super) fn transfer_inst(
if !aggregate_sanitizer_applied {
return_bits &= !resolved.sanitizer_caps;
}
// Validated-flow propagation through callee summaries.
//
// When the callee's body validates a parameter on every
// return path (regex allowlist, type check, validation
// call, etc. — see
// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`]),
// a normal-returning call site is the validating arm by
// construction: control could not reach the post-call
// instruction unless the helper's predicate(s) accepted
// the argument. Mark each tainted argument's `var_name`
// and the call's result `var_name` in the caller's
// `validated_must` / `validated_may` sets so subsequent
// sinks observe `all_validated = true`, the same way an
// inline `if (!regex.test(x)) throw` validates the
// surviving branch. Closes the helper-validator
// propagation gap surfaced by CVE-2026-25544 (Payload
// `sanitizeValue` SQLi).
}
// Type-qualified receiver resolution: when normal callee resolution
@ -4236,7 +4305,7 @@ pub(super) fn transfer_inst(
oc,
caller_func,
info.call.call_ordinal,
Some(arity_hint),
arity_hint,
) {
if !oc_sum.propagates_taint && oc_sum.source_caps.is_empty() {
// Outer callee blocks taint: no param→return flow,
@ -6301,6 +6370,60 @@ fn collect_args_taint(
/// [`Cap::UNAUTHORIZED_ID`], ownership/membership guards prove on
/// inputs rather than the return value. Other caps and origins are
/// untouched.
/// Apply [`SsaFuncSummary::validated_params_to_return`] at a call site.
///
/// For each parameter index `p` in `validated_params`, mark the
/// `var_name` of every tainted SSA value at `args[p]` and the call's
/// own result `inst.value` in the caller's `validated_must` /
/// `validated_may` sets. Mirrors the symbol-keyed validation a direct
/// `if (!regex.test(x)) throw` would set on the surviving branch.
///
/// Sound because the callee summary records `validated_params_to_return`
/// only when the param's `var_name` is in `validated_must` at *every*
/// return block — a normal-returning call therefore proves the
/// validating arm. No-op when no actual argument is tainted (avoids
/// spuriously validating untouched names downstream).
fn propagate_validated_params_to_return(
inst: &SsaInst,
args: &[SmallVec<[SsaValue; 2]>],
ssa: &SsaBody,
interner: &crate::state::symbol::SymbolInterner,
state: &mut SsaTaintState,
validated_params: &[usize],
) {
let mark = |val: SsaValue, st: &mut SsaTaintState| {
let Some(name) = ssa
.value_defs
.get(val.0 as usize)
.and_then(|vd| vd.var_name.as_deref())
else {
return;
};
let Some(sym) = interner.get(name) else {
return;
};
st.validated_must.insert(sym);
st.validated_may.insert(sym);
};
let mut any_arg_tainted = false;
for &p in validated_params {
let Some(arg_vals) = args.get(p) else {
continue;
};
for &v in arg_vals {
if state.get(v).is_some_and(|t| !t.caps.is_empty()) {
any_arg_tainted = true;
mark(v, state);
}
}
}
if any_arg_tainted {
mark(inst.value, state);
}
}
fn strip_cap_from_call_args(
args: &[SmallVec<[SsaValue; 2]>],
receiver: &Option<SsaValue>,
@ -8676,6 +8799,14 @@ struct ResolvedSummary {
/// `field_points_to` records. Applied at the caller call site by
/// `apply_field_points_to_writes`.
field_points_to: crate::summary::points_to::FieldPointsToSummary,
/// Parameter indices whose taint flow to the return is fully
/// validated by a dominating predicate inside the callee on every
/// return path. Mirrors
/// [`crate::summary::ssa_summary::SsaFuncSummary::validated_params_to_return`].
/// Populated only via `convert_ssa_to_resolved`; other resolution
/// paths leave it empty (label / coarse-FuncSummary forms cannot
/// express per-path predicate validation).
validated_params_to_return: Vec<usize>,
}
fn resolve_callee(
@ -8825,6 +8956,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
// Try label classification for the bound function (by leaf name).
@ -8896,6 +9028,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
}
@ -9041,6 +9174,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
} else {
@ -9091,6 +9225,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
};
match widened.len() {
0 => {}
@ -9162,6 +9297,7 @@ fn resolve_callee_full(
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
});
}
}
@ -9344,6 +9480,7 @@ fn convert_ssa_to_resolved_for_caller(
points_to: ssa_sum.points_to.clone(),
field_points_to: ssa_sum.field_points_to.clone(),
param_to_gate_filters: ssa_sum.param_to_gate_filters.clone(),
validated_params_to_return: ssa_sum.validated_params_to_return.to_vec(),
}
}

View file

@ -50,6 +50,7 @@ pub fn extract_ssa_func_summary(
module_aliases: Option<&HashMap<SsaValue, SmallVec<[String; 2]>>>,
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
formal_param_names: Option<&[String]>,
formal_destructured_fields: Option<&[Vec<String>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
extract_ssa_func_summary_full(
ssa,
@ -64,6 +65,7 @@ pub fn extract_ssa_func_summary(
locator,
formal_param_names,
None,
formal_destructured_fields,
)
}
@ -93,6 +95,15 @@ pub fn extract_ssa_func_summary_full(
ssa_summaries: Option<
&HashMap<crate::symbol::FuncKey, crate::summary::ssa_summary::SsaFuncSummary>,
>,
// Per-parameter destructured-binding sibling names. Entry `i` is
// the list of field names destructured by the same call-site arg
// slot as the primary `formal_param_names[i]`, excluding the
// primary name. Empty vec for non-destructured params; `None` for
// callers that don't carry destructure info (legacy / test paths).
// Drives the destructured-arg expansion in the per-param probe so
// taint flow through sibling bindings is visible to summary
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
formal_destructured_fields: Option<&[Vec<String>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
use crate::summary::SinkSite;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
@ -159,13 +170,32 @@ pub fn extract_ssa_func_summary_full(
/// Inner [`PathFact`] when the rv on this path is a one-arg
/// variant constructor; [`None`] otherwise.
variant_inner_fact: Option<crate::abstract_interp::PathFact>,
/// `true` when the per-param probe's seeded parameter var_name
/// is in this return block's exit `validated_must`. `false`
/// for the baseline (no-seed) probe and for params not
/// validated on this path. Drives
/// `validated_params_to_return` summary extraction.
param_validated_must: bool,
}
// Helper: run a taint probe with a given global_seed and return
// the aggregate return caps, sink events, joined return abstract,
// and the per-return-block observation list used to derive
// per-return-path transforms.
let run_probe = |seed: HashMap<BindingKey, VarTaint>| -> (
//
// `probe_param_names` lists the seeded parameter's `var_name`
// plus any destructured-binding siblings sharing the slot
// (`None` for the baseline source-caps probe). When non-empty,
// each return-block observation records whether ANY of those
// names is in the exit state's `validated_must`, which feeds
// `validated_params_to_return` summary extraction below. The
// any-name semantics matches the slot-wide model: a destructured
// formal `({ a, b, c })` represents one call-site slot, and any
// sibling reaching `validated_must` proves the slot's caps were
// narrowed before reaching the return.
let run_probe = |seed: HashMap<BindingKey, VarTaint>,
probe_param_names: Option<&[&str]>|
-> (
Cap,
Vec<SsaTaintEvent>,
Option<crate::abstract_interp::AbstractValue>,
@ -313,6 +343,13 @@ pub fn extract_ssa_func_summary_full(
// The hash is stable across runs for a given predicate
// shape so call sites can compare paths deterministically.
let (predicate_hash, known_true, known_false) = summarise_return_predicates(&exit);
let param_validated_must = match probe_param_names {
Some(names) => names.iter().any(|name| match interner.get(name) {
Some(sym) => exit.validated_must.contains(sym),
None => false,
}),
None => false,
};
per_return.push(ReturnBlockObs {
derived_caps: block_derived_caps,
param_caps: block_param_caps,
@ -322,6 +359,7 @@ pub fn extract_ssa_func_summary_full(
abstract_value: block_abs,
path_fact: block_path_fact,
variant_inner_fact: block_variant_inner,
param_validated_must,
});
}
}
@ -343,7 +381,7 @@ pub fn extract_ssa_func_summary_full(
// Abstract values don't depend on taint seeding, so the baseline probe
// captures the function's intrinsic abstract return value.
let (baseline_return_caps, _baseline_events, return_abstract, baseline_obs) =
run_probe(HashMap::new());
run_probe(HashMap::new(), None);
let source_caps = baseline_return_caps;
// Per-return-path PathFact decomposition derived from the baseline
@ -403,6 +441,12 @@ pub fn extract_ssa_func_summary_full(
usize,
SmallVec<[crate::summary::ssa_summary::ReturnPathTransform; 2]>,
)> = Vec::new();
// Parameter indices whose taint flow to the return is fully
// validated by a dominating predicate on every return path.
// Populated below by checking each per-param probe's return-block
// exit states for `validated_must` containing the param's
// var_name. Empty when no parameter is validated.
let mut validated_params_to_return: SmallVec<[usize; 2]> = SmallVec::new();
for &(idx, ref var_name, _ssa_val) in &param_info {
let mut seed = HashMap::new();
@ -421,6 +465,37 @@ pub fn extract_ssa_func_summary_full(
probe_taint.clone(),
);
// Destructured-arg sibling expansion. When the formal at slot
// `idx` destructures an object pattern (`({ column, operator,
// value })`), the SSA body emits a separate [`SsaOp::Param`]
// for every destructured binding (sequential indices > slot
// count, since the closure-capture pass treats them as
// free-identifier reads). The call-site only passes ONE arg
// for the slot, so the engine never seeds the sibling Param
// ops at runtime — but the per-parameter SUMMARY probe must
// model "if this slot is tainted then every binding it
// produced is tainted too". Seed each sibling's `var_name`
// with the same caps the primary received. The probe-level
// `validated_must` check below treats the slot as validated
// when ANY sibling lands in `validated_must` on a return path.
//
// Closes the residual gap behind CVE-2026-25544 (PayloadCMS
// `@payloadcms/drizzle` SQLi via `createJSONQuery({ value })`):
// the validator helper `sanitizeValue(value, operator)` lives
// inside the body and the probe needs to see `value` flow
// through the `validated_params_to_return` channel before
// suppressing the caller's sink.
let slot_siblings: &[String] = formal_destructured_fields
.and_then(|d| d.get(idx))
.map(|v| v.as_slice())
.unwrap_or(&[]);
for sib in slot_siblings {
seed.insert(
BindingKey::new(sib.as_str(), BodyId(0)),
probe_taint.clone(),
);
}
// Phantom-Param prefix seeding. SSA lowering of arrow / nested
// function bodies often exposes free-identifier member-access
// expressions (e.g. `file._source.uri`) as their own
@ -437,13 +512,18 @@ pub fn extract_ssa_func_summary_full(
// `formal_var_name + "."` with the same caps the formal param
// received: semantically "if `file` is tainted, then every
// observable field path on `file` is tainted too". Bounded
// by SSA size; cap-equivalent to direct seeding.
let prefix = format!("{}.", var_name);
// by SSA size; cap-equivalent to direct seeding. Mirror this
// for each destructured sibling (`value.foo` / `column.name`
// member-projections inside the body).
let prefixes: Vec<String> = std::iter::once(var_name.clone())
.chain(slot_siblings.iter().cloned())
.map(|n| format!("{}.", n))
.collect();
for block in &ssa.blocks {
for inst in block.phis.iter().chain(block.body.iter()) {
if let SsaOp::Param { .. } = &inst.op {
if let Some(name) = inst.var_name.as_ref() {
if name.starts_with(&prefix) {
if prefixes.iter().any(|p| name.starts_with(p)) {
seed.insert(
BindingKey::new(name.as_str(), BodyId(0)),
probe_taint.clone(),
@ -454,7 +534,15 @@ pub fn extract_ssa_func_summary_full(
}
}
let (return_caps, events, _, per_return_obs) = run_probe(seed);
// Build slot-wide name list for the validated_must check.
// Primary first, then siblings, then heap-allocated owned
// copies — `run_probe` only borrows for its inner loop.
let mut slot_names: Vec<&str> = Vec::with_capacity(1 + slot_siblings.len());
slot_names.push(var_name.as_str());
for sib in slot_siblings {
slot_names.push(sib.as_str());
}
let (return_caps, events, _, per_return_obs) = run_probe(seed, Some(slot_names.as_slice()));
// Subtract baseline source_caps, we only want param-contributed caps
let param_return_caps = return_caps & !source_caps;
@ -469,6 +557,44 @@ pub fn extract_ssa_func_summary_full(
param_to_return.push((idx, transform));
}
// Validated-param-to-return detection.
//
// When the per-param probe shows that the parameter's
// `var_name` is in `validated_must` on every return path that
// *carries the parameter's contributed caps*, record the
// parameter as validated. The caller will mark each tainted
// argument passed to this position — and the call's own
// return value — as `validated_must` / `validated_may`, the
// same way an inline `if (!regex.test(x)) throw` would
// validate the surviving branch.
//
// Conservative gating:
// * Skip when the param contributes no caps to the return,
// a degenerate "validated but irrelevant" record.
// * Skip when no return block was observed (probes that
// diverged or hit `MAX_PROBE_PARAMS`).
// * Require validation on every return path that *carries
// param caps to the return*. Branches that return
// constants (e.g. `if (x === null) return 'NULL'`) carry
// no param taint and don't need a validation predicate.
// * Require ≥1 path that actually validates the param.
if !param_return_caps.is_empty() && !per_return_obs.is_empty() {
let mut any_carrying_path = false;
let all_carrying_validated = per_return_obs.iter().all(|obs| {
let carries = !(obs.derived_caps & !source_caps).is_empty()
|| !(obs.param_caps & !source_caps).is_empty();
if carries {
any_carrying_path = true;
obs.param_validated_must
} else {
true
}
});
if any_carrying_path && all_carrying_validated {
validated_params_to_return.push(idx);
}
}
// Derive per-return-path decomposition. For each
// observed return block, derive a `ReturnPathTransform` mirroring
// the aggregate logic (prefer derived caps, fall back to param
@ -694,6 +820,7 @@ pub fn extract_ssa_func_summary_full(
// extractor itself doesn't carry receiver-type info, the
// caller patches it in.
typed_call_receivers: Vec::new(),
validated_params_to_return,
}
}

View file

@ -1641,6 +1641,7 @@ mod fanout_merge_tests {
points_to: Default::default(),
field_points_to: Default::default(),
param_to_gate_filters: vec![],
validated_params_to_return: vec![],
}
}