Precision pass on auth and resource analysis (#63)

This commit is contained in:
Eli Peter 2026-05-03 13:51:46 -04:00 committed by GitHub
parent 064801a3a4
commit c7c5e0f3a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
62 changed files with 4248 additions and 138 deletions

View file

@ -8077,13 +8077,17 @@ fn is_abstract_safe_for_sink(
return true;
}
// HTML_ESCAPE type-only gate: an integer's decimal representation is
// always digits (with optional leading `-`), which never contain HTML
// metacharacters (`<`, `>`, `"`, `'`, `&`, `/`, `:`) in either text or
// attribute context. The interval bound is irrelevant here, a large
// magnitude doesn't introduce metachars, so HTML_ESCAPE uses a
// type-only leaf check rather than the SQL/FILE/SHELL dual gate below.
if sink_caps.intersects(Cap::HTML_ESCAPE) {
// HTML_ESCAPE / FILE_IO type-only gate: an integer's decimal
// representation is always digits (with optional leading `-`), which
// never contain HTML metacharacters (`<`, `>`, `"`, `'`, `&`, `/`,
// `:`) nor path metacharacters (`/`, `\`, `.`). Magnitude is
// irrelevant — a large value doesn't introduce metachars, so both
// sink classes use a type-only leaf check rather than the SQL/SHELL
// dual gate below. Closes the sudo-rs RUSTSEC-2023-0069 patched FP
// where `let uid: u32 = user.parse()?; path.push(uid.to_string())`
// was flagged as a path-traversal FILE_IO sink despite the SSA
// value being unambiguously typed as a numeric uid.
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
@ -8092,14 +8096,15 @@ fn is_abstract_safe_for_sink(
}
}
// Dual gate: SQL_QUERY / FILE_IO / SHELL_ESCAPE with proven Int type AND
// bounded interval. Both conditions required: type proves the value IS
// an integer (not a string that happened to parse), interval proves it's
// Dual gate: SQL_QUERY / SHELL_ESCAPE with proven Int type AND bounded
// interval. Both conditions required: type proves the value IS an
// integer (not a string that happened to parse), interval proves it's
// bounded (not arbitrary). Traces through Assign chains so
// "const_string + tainted_int" is caught. SHELL_ESCAPE is included
// because a bounded integer's decimal representation can't contain shell
// metacharacters.
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
// "const_string + tainted_int" is caught. SQL_QUERY keeps the bound
// requirement because RUSTSEC-2024-0363-style binary-protocol overflow
// requires a 4 GiB+ payload; SHELL_ESCAPE keeps it because a
// multi-line decimal can still trip newline-sensitive shell parsing.
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty()
@ -8212,10 +8217,13 @@ fn is_call_abstract_safe(
}
}
// HTML_ESCAPE type-only gate (same as non-Call path): digits never
// contain HTML metacharacters regardless of magnitude, so an integer
// payload is safe for an HTML sink without requiring a bounded interval.
if sink_caps.intersects(Cap::HTML_ESCAPE) {
// HTML_ESCAPE / FILE_IO type-only gate (same as non-Call path): digits
// never contain HTML metacharacters or path-traversal metacharacters
// regardless of magnitude, so an integer payload is safe for these
// sink classes without requiring a bounded interval. Closes the
// RUSTSEC-2023-0069 patched FP for cross-function summary-resolved
// path sinks like `open_for_user(uid)`.
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
@ -8224,8 +8232,10 @@ fn is_call_abstract_safe(
}
}
// Dual gate for Call sinks (same as non-Call path)
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
// Dual gate for Call sinks: SQL_QUERY / SHELL_ESCAPE keep the bounded-
// interval requirement (see is_abstract_safe_for_sink for the
// rationale).
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty()
@ -8368,6 +8378,15 @@ fn trace_single_leaf(
leaves.push(v);
}
}
SsaOp::Call { callee, .. } if crate::ssa::type_facts::is_int_producing_callee(callee) => {
// Int-producing conversion (`str.parse::<u32>()`, `Atoi`,
// `parseInt`, ...). Tracing past the Call would land on the
// String-typed source and defeat the type-only HTML/FILE_IO
// suppression below — but the Call's *result* is unambiguously
// numeric, so the value itself is the right leaf. Mirrors the
// is_numeric_length_access stop-leaf at the top of this fn.
leaves.push(v);
}
SsaOp::Call { args, .. } => {
// For a Call whose node is not itself a Source (so the Call
// introduces no fresh attacker-controlled taint), trace through

View file

@ -20,6 +20,7 @@ use super::{
use crate::cfg::{BodyId, Cfg, FuncSummaries};
use crate::labels::{Cap, SourceKind};
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator};
use crate::ssa::type_facts::{TypeFactResult, TypeKind, analyze_types_with_param_types};
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
use crate::taint::domain::{TaintOrigin, VarTaint};
@ -51,6 +52,7 @@ pub fn extract_ssa_func_summary(
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
formal_param_names: Option<&[String]>,
formal_destructured_fields: Option<&[Vec<String>]>,
param_types: Option<&[Option<TypeKind>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
extract_ssa_func_summary_full(
ssa,
@ -66,6 +68,7 @@ pub fn extract_ssa_func_summary(
formal_param_names,
None,
formal_destructured_fields,
param_types,
)
}
@ -104,7 +107,34 @@ pub fn extract_ssa_func_summary_full(
// taint flow through sibling bindings is visible to summary
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
formal_destructured_fields: Option<&[Vec<String>]>,
// BodyMeta.param_types parallel-vec. When supplied, drives a local
// `analyze_types_with_param_types` pass so the per-parameter probe's
// `SsaTaintTransfer.type_facts` is populated. Without this, helper
// bodies whose sinks are recognised only via type-qualified callee
// resolution (`receiver_type.label_prefix() + "." + method`, e.g.
// `DatabaseConnection.execute` for JDBC `Statement.execute`) silently
// drop the sink during summary extraction even though the same
// callee is correctly classified by the post-optimise transfer in
// `transfer_inst`. Surfaced by GHSA-h8cj-hpmg-636v (Appsmith
// FilterDataServiceCE.dropTable: helper `executeDbQuery(query)`
// routes the SQL string through `statement.execute(query)` whose
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
// legacy / test paths preserves prior behaviour.
param_types: Option<&[Option<TypeKind>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
// Pre-compute type facts on the un-optimised SSA body so the per-param
// probe can resolve sinks that depend on receiver-type inference.
// Empty const_values: this runs *before* the optimiser, so const-prop
// refinements aren't available yet, but the pass-1 instruction-shape
// typing (Source/Param/Call→constructor_type) and the second-pass
// Assign/Phi propagation are sufficient for the JDBC chain
// `Statement s = conn.createStatement(); s.execute(q);` to type `s`
// as `DatabaseConnection`.
let local_type_facts: Option<TypeFactResult> = param_types.map(|pt| {
let empty_consts: HashMap<SsaValue, crate::ssa::const_prop::ConstLattice> = HashMap::new();
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
});
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
use crate::summary::SinkSite;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
@ -215,7 +245,7 @@ pub fn extract_ssa_func_summary_full(
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
type_facts: local_type_facts_ref,
ssa_summaries,
extra_labels: None,
base_aliases: None,
@ -761,7 +791,7 @@ pub fn extract_ssa_func_summary_full(
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
type_facts: local_type_facts_ref,
ssa_summaries,
extra_labels: None,
base_aliases: None,