Precision pass on auth and resource analysis (#63)

This commit is contained in:
Eli Peter 2026-05-03 13:51:46 -04:00 committed by GitHub
parent 064801a3a4
commit c7c5e0f3a1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
62 changed files with 4248 additions and 138 deletions

View file

@ -1480,6 +1480,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
None,
Some(&formal_params),
None,
None,
);
// Only store if the summary has observable effects. With
@ -1610,6 +1611,11 @@ pub(crate) fn lower_all_functions_from_bodies(
} else {
None
};
let param_types_ref = if !body.meta.param_types.is_empty() {
Some(body.meta.param_types.as_slice())
} else {
None
};
let summary = ssa_transfer::extract_ssa_func_summary(
&func_ssa,
&body.graph,
@ -1623,6 +1629,7 @@ pub(crate) fn lower_all_functions_from_bodies(
locator,
Some(formal_params),
formal_destructured,
param_types_ref,
);
// Always insert the summary, even when all fields are empty/default.
@ -1860,6 +1867,11 @@ fn rerun_extraction_with_augmented_summaries(
} else {
None
};
let param_types_ref = if !body.meta.param_types.is_empty() {
Some(body.meta.param_types.as_slice())
} else {
None
};
let new_summary = ssa_transfer::extract_ssa_func_summary_full(
&callee.ssa,
parent_cfg,
@ -1874,6 +1886,7 @@ fn rerun_extraction_with_augmented_summaries(
Some(&body.meta.params),
Some(&augmented_snapshot),
formal_destructured,
param_types_ref,
);
// OR-merge sink-only fields into the existing summary.

View file

@ -308,6 +308,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
return PredicateKind::AllowlistCheck;
}
// ── Java/Kotlin Pattern.matcher().matches() chain (before TypeCheck) ─
//
// Recognise `<re>.matcher(value).matches()` as a regex allowlist
// validator, not a TypeCheck. The receiver of `.matcher(` must
// contain `regex` or `pattern` so we don't widen to arbitrary
// `obj.matcher(x).matches()` calls. Surfaced by GHSA-h8cj-hpmg-636v
// (Appsmith FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()).
// Matched here (before the generic `.matches(` TypeCheck branch
// below) so the chain doesn't silently fall into TypeCheck.
if let Some(matcher_pos) = lower.find(".matcher(")
&& lower[matcher_pos..].contains(".matches(")
{
let receiver = &lower[..matcher_pos];
if receiver.contains("regex") || receiver.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
// ── Type-check guards ──────────────────────────────────────────────
if lower.contains("typeof ")
|| lower.contains("isinstance(")
@ -395,6 +413,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
}
}
// Java idiom `<PATTERN>.matcher(value).matches()` — the regex
// allowlist on Java/Kotlin is a two-step chain (`Pattern.matcher`
// returns a `Matcher`, `.matches()` is the boolean predicate).
// The bare callee here is `matches` (no args), so the
// single-call recogniser above doesn't fire. Lock on the
// chain shape and require the receiver of `.matcher(` to carry
// a regex / pattern marker so we don't widen to `.matcher(` on
// arbitrary types. Surfaced by GHSA-h8cj-hpmg-636v
// (Appsmith FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()).
if bare == "matches"
&& let Some(matcher_pos) = lower.find(".matcher(")
{
let receiver = &lower[..matcher_pos];
if receiver.contains("regex") || receiver.contains("pattern") {
return PredicateKind::ValidationCall;
}
}
// Sanitizer
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
return PredicateKind::SanitizerCall;
@ -648,6 +684,25 @@ fn extract_validation_target(text: &str) -> Option<String> {
let trimmed = trimmed.trim_start_matches(['(', '!', ' ', '\t']);
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
// Java/Kotlin chain `<re>.matcher(value).matches()`: the validated
// target is the inner `.matcher()` argument, not the bare `.matches()`
// receiver. Locked on the same regex/pattern receiver gate as the
// classifier (GHSA-h8cj-hpmg-636v).
if trimmed.to_ascii_lowercase().contains(".matches(")
&& let Some(matcher_pos) = trimmed.find(".matcher(")
{
let receiver_lower = trimmed[..matcher_pos].to_ascii_lowercase();
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
let args_start = matcher_pos + ".matcher(".len();
if let Some(first_arg) = first_call_arg(&trimmed[args_start..]) {
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
if !first_arg.is_empty() && is_identifier(first_arg) {
return Some(first_arg.to_string());
}
}
}
}
// Find the first `(` which separates callee from args
let paren_pos = trimmed.find('(')?;
let callee_part = &trimmed[..paren_pos];
@ -1559,3 +1614,43 @@ mod tests {
assert!(is_bounded_length_check("x.len() <= 256"));
}
}
#[cfg(test)]
mod ghsa_h8cj_hpmg_636v_tests {
use super::*;
#[test]
fn java_pattern_matcher_chain_classifies_as_validation() {
let kind =
classify_condition("FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()");
assert_eq!(
kind,
PredicateKind::ValidationCall,
"matcher().matches() chain on PATTERN-named receiver should be ValidationCall"
);
}
#[test]
fn java_pattern_matcher_chain_target_is_matcher_arg() {
let (kind, target) = classify_condition_with_target(
"FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
);
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("tableName"));
}
#[test]
fn java_negated_pattern_matcher_chain_target_is_matcher_arg() {
let (kind, target) = classify_condition_with_target(
"!FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
);
assert_eq!(kind, PredicateKind::ValidationCall);
assert_eq!(target.as_deref(), Some("tableName"));
}
#[test]
fn java_pattern_matcher_chain_non_pattern_receiver_is_not_validation() {
// Precision guard: only fires when receiver name has regex/pattern marker.
let kind = classify_condition("obj.matcher(x).matches()");
assert!(
kind != PredicateKind::ValidationCall,
"no regex marker should not trigger validation"
);
}
}

View file

@ -8077,13 +8077,17 @@ fn is_abstract_safe_for_sink(
return true;
}
// HTML_ESCAPE type-only gate: an integer's decimal representation is
// always digits (with optional leading `-`), which never contain HTML
// metacharacters (`<`, `>`, `"`, `'`, `&`, `/`, `:`) in either text or
// attribute context. The interval bound is irrelevant here, a large
// magnitude doesn't introduce metachars, so HTML_ESCAPE uses a
// type-only leaf check rather than the SQL/FILE/SHELL dual gate below.
if sink_caps.intersects(Cap::HTML_ESCAPE) {
// HTML_ESCAPE / FILE_IO type-only gate: an integer's decimal
// representation is always digits (with optional leading `-`), which
// never contain HTML metacharacters (`<`, `>`, `"`, `'`, `&`, `/`,
// `:`) nor path metacharacters (`/`, `\`, `.`). Magnitude is
// irrelevant — a large value doesn't introduce metachars, so both
// sink classes use a type-only leaf check rather than the SQL/SHELL
// dual gate below. Closes the sudo-rs RUSTSEC-2023-0069 patched FP
// where `let uid: u32 = user.parse()?; path.push(uid.to_string())`
// was flagged as a path-traversal FILE_IO sink despite the SSA
// value being unambiguously typed as a numeric uid.
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
@ -8092,14 +8096,15 @@ fn is_abstract_safe_for_sink(
}
}
// Dual gate: SQL_QUERY / FILE_IO / SHELL_ESCAPE with proven Int type AND
// bounded interval. Both conditions required: type proves the value IS
// an integer (not a string that happened to parse), interval proves it's
// Dual gate: SQL_QUERY / SHELL_ESCAPE with proven Int type AND bounded
// interval. Both conditions required: type proves the value IS an
// integer (not a string that happened to parse), interval proves it's
// bounded (not arbitrary). Traces through Assign chains so
// "const_string + tainted_int" is caught. SHELL_ESCAPE is included
// because a bounded integer's decimal representation can't contain shell
// metacharacters.
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
// "const_string + tainted_int" is caught. SQL_QUERY keeps the bound
// requirement because RUSTSEC-2024-0363-style binary-protocol overflow
// requires a 4 GiB+ payload; SHELL_ESCAPE keeps it because a
// multi-line decimal can still trip newline-sensitive shell parsing.
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty()
@ -8212,10 +8217,13 @@ fn is_call_abstract_safe(
}
}
// HTML_ESCAPE type-only gate (same as non-Call path): digits never
// contain HTML metacharacters regardless of magnitude, so an integer
// payload is safe for an HTML sink without requiring a bounded interval.
if sink_caps.intersects(Cap::HTML_ESCAPE) {
// HTML_ESCAPE / FILE_IO type-only gate (same as non-Call path): digits
// never contain HTML metacharacters or path-traversal metacharacters
// regardless of magnitude, so an integer payload is safe for these
// sink classes without requiring a bounded interval. Closes the
// RUSTSEC-2023-0069 patched FP for cross-function summary-resolved
// path sinks like `open_for_user(uid)`.
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
@ -8224,8 +8232,10 @@ fn is_call_abstract_safe(
}
}
// Dual gate for Call sinks (same as non-Call path)
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
// Dual gate for Call sinks: SQL_QUERY / SHELL_ESCAPE keep the bounded-
// interval requirement (see is_abstract_safe_for_sink for the
// rationale).
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
if let Some(tf) = type_facts {
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
if !leaves.is_empty()
@ -8368,6 +8378,15 @@ fn trace_single_leaf(
leaves.push(v);
}
}
SsaOp::Call { callee, .. } if crate::ssa::type_facts::is_int_producing_callee(callee) => {
// Int-producing conversion (`str.parse::<u32>()`, `Atoi`,
// `parseInt`, ...). Tracing past the Call would land on the
// String-typed source and defeat the type-only HTML/FILE_IO
// suppression below — but the Call's *result* is unambiguously
// numeric, so the value itself is the right leaf. Mirrors the
// is_numeric_length_access stop-leaf at the top of this fn.
leaves.push(v);
}
SsaOp::Call { args, .. } => {
// For a Call whose node is not itself a Source (so the Call
// introduces no fresh attacker-controlled taint), trace through

View file

@ -20,6 +20,7 @@ use super::{
use crate::cfg::{BodyId, Cfg, FuncSummaries};
use crate::labels::{Cap, SourceKind};
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator};
use crate::ssa::type_facts::{TypeFactResult, TypeKind, analyze_types_with_param_types};
use crate::summary::GlobalSummaries;
use crate::symbol::Lang;
use crate::taint::domain::{TaintOrigin, VarTaint};
@ -51,6 +52,7 @@ pub fn extract_ssa_func_summary(
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
formal_param_names: Option<&[String]>,
formal_destructured_fields: Option<&[Vec<String>]>,
param_types: Option<&[Option<TypeKind>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
extract_ssa_func_summary_full(
ssa,
@ -66,6 +68,7 @@ pub fn extract_ssa_func_summary(
formal_param_names,
None,
formal_destructured_fields,
param_types,
)
}
@ -104,7 +107,34 @@ pub fn extract_ssa_func_summary_full(
// taint flow through sibling bindings is visible to summary
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
formal_destructured_fields: Option<&[Vec<String>]>,
// BodyMeta.param_types parallel-vec. When supplied, drives a local
// `analyze_types_with_param_types` pass so the per-parameter probe's
// `SsaTaintTransfer.type_facts` is populated. Without this, helper
// bodies whose sinks are recognised only via type-qualified callee
// resolution (`receiver_type.label_prefix() + "." + method`, e.g.
// `DatabaseConnection.execute` for JDBC `Statement.execute`) silently
// drop the sink during summary extraction even though the same
// callee is correctly classified by the post-optimise transfer in
// `transfer_inst`. Surfaced by GHSA-h8cj-hpmg-636v (Appsmith
// FilterDataServiceCE.dropTable: helper `executeDbQuery(query)`
// routes the SQL string through `statement.execute(query)` whose
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
// legacy / test paths preserves prior behaviour.
param_types: Option<&[Option<TypeKind>]>,
) -> crate::summary::ssa_summary::SsaFuncSummary {
// Pre-compute type facts on the un-optimised SSA body so the per-param
// probe can resolve sinks that depend on receiver-type inference.
// Empty const_values: this runs *before* the optimiser, so const-prop
// refinements aren't available yet, but the pass-1 instruction-shape
// typing (Source/Param/Call→constructor_type) and the second-pass
// Assign/Phi propagation are sufficient for the JDBC chain
// `Statement s = conn.createStatement(); s.execute(q);` to type `s`
// as `DatabaseConnection`.
let local_type_facts: Option<TypeFactResult> = param_types.map(|pt| {
let empty_consts: HashMap<SsaValue, crate::ssa::const_prop::ConstLattice> = HashMap::new();
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
});
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
use crate::summary::SinkSite;
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
@ -215,7 +245,7 @@ pub fn extract_ssa_func_summary_full(
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
type_facts: local_type_facts_ref,
ssa_summaries,
extra_labels: None,
base_aliases: None,
@ -761,7 +791,7 @@ pub fn extract_ssa_func_summary_full(
param_seed: None,
receiver_seed: None,
const_values: None,
type_facts: None,
type_facts: local_type_facts_ref,
ssa_summaries,
extra_labels: None,
base_aliases: None,

View file

@ -4332,6 +4332,7 @@ fn ssa_summary_identity_propagation() {
None,
None,
None,
None,
);
assert!(
!summary.param_to_return.is_empty(),
@ -4396,6 +4397,7 @@ fn ssa_summary_sanitizer_strips_bits() {
None,
None,
None,
None,
);
// Sanitizer should strip some bits
for (_, transform) in &summary.param_to_return {
@ -4453,6 +4455,7 @@ fn ssa_summary_source_adds_bits() {
None,
None,
None,
None,
);
assert!(
!summary.source_caps.is_empty(),
@ -4510,6 +4513,7 @@ fn ssa_summary_param_to_sink() {
None,
None,
None,
None,
);
assert!(
!summary.param_to_sink.is_empty(),