mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-30 20:39:39 +02:00
Precision pass on auth and resource analysis (#63)
This commit is contained in:
parent
064801a3a4
commit
c7c5e0f3a1
62 changed files with 4248 additions and 138 deletions
|
|
@ -1480,6 +1480,7 @@ pub(crate) fn extract_intra_file_ssa_summaries(
|
|||
None,
|
||||
Some(&formal_params),
|
||||
None,
|
||||
None,
|
||||
);
|
||||
|
||||
// Only store if the summary has observable effects. With
|
||||
|
|
@ -1610,6 +1611,11 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
} else {
|
||||
None
|
||||
};
|
||||
let param_types_ref = if !body.meta.param_types.is_empty() {
|
||||
Some(body.meta.param_types.as_slice())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let summary = ssa_transfer::extract_ssa_func_summary(
|
||||
&func_ssa,
|
||||
&body.graph,
|
||||
|
|
@ -1623,6 +1629,7 @@ pub(crate) fn lower_all_functions_from_bodies(
|
|||
locator,
|
||||
Some(formal_params),
|
||||
formal_destructured,
|
||||
param_types_ref,
|
||||
);
|
||||
|
||||
// Always insert the summary, even when all fields are empty/default.
|
||||
|
|
@ -1860,6 +1867,11 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
} else {
|
||||
None
|
||||
};
|
||||
let param_types_ref = if !body.meta.param_types.is_empty() {
|
||||
Some(body.meta.param_types.as_slice())
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let new_summary = ssa_transfer::extract_ssa_func_summary_full(
|
||||
&callee.ssa,
|
||||
parent_cfg,
|
||||
|
|
@ -1874,6 +1886,7 @@ fn rerun_extraction_with_augmented_summaries(
|
|||
Some(&body.meta.params),
|
||||
Some(&augmented_snapshot),
|
||||
formal_destructured,
|
||||
param_types_ref,
|
||||
);
|
||||
|
||||
// OR-merge sink-only fields into the existing summary.
|
||||
|
|
|
|||
|
|
@ -308,6 +308,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
return PredicateKind::AllowlistCheck;
|
||||
}
|
||||
|
||||
// ── Java/Kotlin Pattern.matcher().matches() chain (before TypeCheck) ─
|
||||
//
|
||||
// Recognise `<re>.matcher(value).matches()` as a regex allowlist
|
||||
// validator, not a TypeCheck. The receiver of `.matcher(` must
|
||||
// contain `regex` or `pattern` so we don't widen to arbitrary
|
||||
// `obj.matcher(x).matches()` calls. Surfaced by GHSA-h8cj-hpmg-636v
|
||||
// (Appsmith FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()).
|
||||
// Matched here (before the generic `.matches(` TypeCheck branch
|
||||
// below) so the chain doesn't silently fall into TypeCheck.
|
||||
if let Some(matcher_pos) = lower.find(".matcher(")
|
||||
&& lower[matcher_pos..].contains(".matches(")
|
||||
{
|
||||
let receiver = &lower[..matcher_pos];
|
||||
if receiver.contains("regex") || receiver.contains("pattern") {
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Type-check guards ──────────────────────────────────────────────
|
||||
if lower.contains("typeof ")
|
||||
|| lower.contains("isinstance(")
|
||||
|
|
@ -395,6 +413,24 @@ pub fn classify_condition(text: &str) -> PredicateKind {
|
|||
}
|
||||
}
|
||||
|
||||
// Java idiom `<PATTERN>.matcher(value).matches()` — the regex
|
||||
// allowlist on Java/Kotlin is a two-step chain (`Pattern.matcher`
|
||||
// returns a `Matcher`, `.matches()` is the boolean predicate).
|
||||
// The bare callee here is `matches` (no args), so the
|
||||
// single-call recogniser above doesn't fire. Lock on the
|
||||
// chain shape and require the receiver of `.matcher(` to carry
|
||||
// a regex / pattern marker so we don't widen to `.matcher(` on
|
||||
// arbitrary types. Surfaced by GHSA-h8cj-hpmg-636v
|
||||
// (Appsmith FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()).
|
||||
if bare == "matches"
|
||||
&& let Some(matcher_pos) = lower.find(".matcher(")
|
||||
{
|
||||
let receiver = &lower[..matcher_pos];
|
||||
if receiver.contains("regex") || receiver.contains("pattern") {
|
||||
return PredicateKind::ValidationCall;
|
||||
}
|
||||
}
|
||||
|
||||
// Sanitizer
|
||||
if bare.contains("sanitiz") || bare.contains("escape") || bare.contains("encode") {
|
||||
return PredicateKind::SanitizerCall;
|
||||
|
|
@ -648,6 +684,25 @@ fn extract_validation_target(text: &str) -> Option<String> {
|
|||
let trimmed = trimmed.trim_start_matches(['(', '!', ' ', '\t']);
|
||||
let trimmed = trimmed.strip_prefix("not ").unwrap_or(trimmed).trim();
|
||||
|
||||
// Java/Kotlin chain `<re>.matcher(value).matches()`: the validated
|
||||
// target is the inner `.matcher()` argument, not the bare `.matches()`
|
||||
// receiver. Locked on the same regex/pattern receiver gate as the
|
||||
// classifier (GHSA-h8cj-hpmg-636v).
|
||||
if trimmed.to_ascii_lowercase().contains(".matches(")
|
||||
&& let Some(matcher_pos) = trimmed.find(".matcher(")
|
||||
{
|
||||
let receiver_lower = trimmed[..matcher_pos].to_ascii_lowercase();
|
||||
if receiver_lower.contains("regex") || receiver_lower.contains("pattern") {
|
||||
let args_start = matcher_pos + ".matcher(".len();
|
||||
if let Some(first_arg) = first_call_arg(&trimmed[args_start..]) {
|
||||
let first_arg = first_arg.strip_prefix('&').unwrap_or(first_arg).trim();
|
||||
if !first_arg.is_empty() && is_identifier(first_arg) {
|
||||
return Some(first_arg.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the first `(` which separates callee from args
|
||||
let paren_pos = trimmed.find('(')?;
|
||||
let callee_part = &trimmed[..paren_pos];
|
||||
|
|
@ -1559,3 +1614,43 @@ mod tests {
|
|||
assert!(is_bounded_length_check("x.len() <= 256"));
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod ghsa_h8cj_hpmg_636v_tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn java_pattern_matcher_chain_classifies_as_validation() {
|
||||
let kind =
|
||||
classify_condition("FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()");
|
||||
assert_eq!(
|
||||
kind,
|
||||
PredicateKind::ValidationCall,
|
||||
"matcher().matches() chain on PATTERN-named receiver should be ValidationCall"
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn java_pattern_matcher_chain_target_is_matcher_arg() {
|
||||
let (kind, target) = classify_condition_with_target(
|
||||
"FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
|
||||
);
|
||||
assert_eq!(kind, PredicateKind::ValidationCall);
|
||||
assert_eq!(target.as_deref(), Some("tableName"));
|
||||
}
|
||||
#[test]
|
||||
fn java_negated_pattern_matcher_chain_target_is_matcher_arg() {
|
||||
let (kind, target) = classify_condition_with_target(
|
||||
"!FILTER_TEMP_TABLE_NAME_PATTERN.matcher(tableName).matches()",
|
||||
);
|
||||
assert_eq!(kind, PredicateKind::ValidationCall);
|
||||
assert_eq!(target.as_deref(), Some("tableName"));
|
||||
}
|
||||
#[test]
|
||||
fn java_pattern_matcher_chain_non_pattern_receiver_is_not_validation() {
|
||||
// Precision guard: only fires when receiver name has regex/pattern marker.
|
||||
let kind = classify_condition("obj.matcher(x).matches()");
|
||||
assert!(
|
||||
kind != PredicateKind::ValidationCall,
|
||||
"no regex marker should not trigger validation"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8077,13 +8077,17 @@ fn is_abstract_safe_for_sink(
|
|||
return true;
|
||||
}
|
||||
|
||||
// HTML_ESCAPE type-only gate: an integer's decimal representation is
|
||||
// always digits (with optional leading `-`), which never contain HTML
|
||||
// metacharacters (`<`, `>`, `"`, `'`, `&`, `/`, `:`) in either text or
|
||||
// attribute context. The interval bound is irrelevant here, a large
|
||||
// magnitude doesn't introduce metachars, so HTML_ESCAPE uses a
|
||||
// type-only leaf check rather than the SQL/FILE/SHELL dual gate below.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE) {
|
||||
// HTML_ESCAPE / FILE_IO type-only gate: an integer's decimal
|
||||
// representation is always digits (with optional leading `-`), which
|
||||
// never contain HTML metacharacters (`<`, `>`, `"`, `'`, `&`, `/`,
|
||||
// `:`) nor path metacharacters (`/`, `\`, `.`). Magnitude is
|
||||
// irrelevant — a large value doesn't introduce metachars, so both
|
||||
// sink classes use a type-only leaf check rather than the SQL/SHELL
|
||||
// dual gate below. Closes the sudo-rs RUSTSEC-2023-0069 patched FP
|
||||
// where `let uid: u32 = user.parse()?; path.push(uid.to_string())`
|
||||
// was flagged as a path-traversal FILE_IO sink despite the SSA
|
||||
// value being unambiguously typed as a numeric uid.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
|
||||
|
|
@ -8092,14 +8096,15 @@ fn is_abstract_safe_for_sink(
|
|||
}
|
||||
}
|
||||
|
||||
// Dual gate: SQL_QUERY / FILE_IO / SHELL_ESCAPE with proven Int type AND
|
||||
// bounded interval. Both conditions required: type proves the value IS
|
||||
// an integer (not a string that happened to parse), interval proves it's
|
||||
// Dual gate: SQL_QUERY / SHELL_ESCAPE with proven Int type AND bounded
|
||||
// interval. Both conditions required: type proves the value IS an
|
||||
// integer (not a string that happened to parse), interval proves it's
|
||||
// bounded (not arbitrary). Traces through Assign chains so
|
||||
// "const_string + tainted_int" is caught. SHELL_ESCAPE is included
|
||||
// because a bounded integer's decimal representation can't contain shell
|
||||
// metacharacters.
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
|
||||
// "const_string + tainted_int" is caught. SQL_QUERY keeps the bound
|
||||
// requirement because RUSTSEC-2024-0363-style binary-protocol overflow
|
||||
// requires a 4 GiB+ payload; SHELL_ESCAPE keeps it because a
|
||||
// multi-line decimal can still trip newline-sensitive shell parsing.
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty()
|
||||
|
|
@ -8212,10 +8217,13 @@ fn is_call_abstract_safe(
|
|||
}
|
||||
}
|
||||
|
||||
// HTML_ESCAPE type-only gate (same as non-Call path): digits never
|
||||
// contain HTML metacharacters regardless of magnitude, so an integer
|
||||
// payload is safe for an HTML sink without requiring a bounded interval.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE) {
|
||||
// HTML_ESCAPE / FILE_IO type-only gate (same as non-Call path): digits
|
||||
// never contain HTML metacharacters or path-traversal metacharacters
|
||||
// regardless of magnitude, so an integer payload is safe for these
|
||||
// sink classes without requiring a bounded interval. Closes the
|
||||
// RUSTSEC-2023-0069 patched FP for cross-function summary-resolved
|
||||
// path sinks like `open_for_user(uid)`.
|
||||
if sink_caps.intersects(Cap::HTML_ESCAPE | Cap::FILE_IO) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty() && leaves.iter().all(|v| tf.is_int(*v)) {
|
||||
|
|
@ -8224,8 +8232,10 @@ fn is_call_abstract_safe(
|
|||
}
|
||||
}
|
||||
|
||||
// Dual gate for Call sinks (same as non-Call path)
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::FILE_IO | Cap::SHELL_ESCAPE) {
|
||||
// Dual gate for Call sinks: SQL_QUERY / SHELL_ESCAPE keep the bounded-
|
||||
// interval requirement (see is_abstract_safe_for_sink for the
|
||||
// rationale).
|
||||
if sink_caps.intersects(Cap::SQL_QUERY | Cap::SHELL_ESCAPE) {
|
||||
if let Some(tf) = type_facts {
|
||||
let leaves = trace_tainted_leaf_values(inst, state, ssa, cfg);
|
||||
if !leaves.is_empty()
|
||||
|
|
@ -8368,6 +8378,15 @@ fn trace_single_leaf(
|
|||
leaves.push(v);
|
||||
}
|
||||
}
|
||||
SsaOp::Call { callee, .. } if crate::ssa::type_facts::is_int_producing_callee(callee) => {
|
||||
// Int-producing conversion (`str.parse::<u32>()`, `Atoi`,
|
||||
// `parseInt`, ...). Tracing past the Call would land on the
|
||||
// String-typed source and defeat the type-only HTML/FILE_IO
|
||||
// suppression below — but the Call's *result* is unambiguously
|
||||
// numeric, so the value itself is the right leaf. Mirrors the
|
||||
// is_numeric_length_access stop-leaf at the top of this fn.
|
||||
leaves.push(v);
|
||||
}
|
||||
SsaOp::Call { args, .. } => {
|
||||
// For a Call whose node is not itself a Source (so the Call
|
||||
// introduces no fresh attacker-controlled taint), trace through
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ use super::{
|
|||
use crate::cfg::{BodyId, Cfg, FuncSummaries};
|
||||
use crate::labels::{Cap, SourceKind};
|
||||
use crate::ssa::ir::{SsaBody, SsaOp, SsaValue, Terminator};
|
||||
use crate::ssa::type_facts::{TypeFactResult, TypeKind, analyze_types_with_param_types};
|
||||
use crate::summary::GlobalSummaries;
|
||||
use crate::symbol::Lang;
|
||||
use crate::taint::domain::{TaintOrigin, VarTaint};
|
||||
|
|
@ -51,6 +52,7 @@ pub fn extract_ssa_func_summary(
|
|||
locator: Option<&crate::summary::SinkSiteLocator<'_>>,
|
||||
formal_param_names: Option<&[String]>,
|
||||
formal_destructured_fields: Option<&[Vec<String>]>,
|
||||
param_types: Option<&[Option<TypeKind>]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
extract_ssa_func_summary_full(
|
||||
ssa,
|
||||
|
|
@ -66,6 +68,7 @@ pub fn extract_ssa_func_summary(
|
|||
formal_param_names,
|
||||
None,
|
||||
formal_destructured_fields,
|
||||
param_types,
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +107,34 @@ pub fn extract_ssa_func_summary_full(
|
|||
// taint flow through sibling bindings is visible to summary
|
||||
// extraction (CVE-2026-25544 / @payloadcms/drizzle SQLi).
|
||||
formal_destructured_fields: Option<&[Vec<String>]>,
|
||||
// BodyMeta.param_types parallel-vec. When supplied, drives a local
|
||||
// `analyze_types_with_param_types` pass so the per-parameter probe's
|
||||
// `SsaTaintTransfer.type_facts` is populated. Without this, helper
|
||||
// bodies whose sinks are recognised only via type-qualified callee
|
||||
// resolution (`receiver_type.label_prefix() + "." + method`, e.g.
|
||||
// `DatabaseConnection.execute` for JDBC `Statement.execute`) silently
|
||||
// drop the sink during summary extraction even though the same
|
||||
// callee is correctly classified by the post-optimise transfer in
|
||||
// `transfer_inst`. Surfaced by GHSA-h8cj-hpmg-636v (Appsmith
|
||||
// FilterDataServiceCE.dropTable: helper `executeDbQuery(query)`
|
||||
// routes the SQL string through `statement.execute(query)` whose
|
||||
// SQL_QUERY caps were invisible to the param-1 probe). `None` for
|
||||
// legacy / test paths preserves prior behaviour.
|
||||
param_types: Option<&[Option<TypeKind>]>,
|
||||
) -> crate::summary::ssa_summary::SsaFuncSummary {
|
||||
// Pre-compute type facts on the un-optimised SSA body so the per-param
|
||||
// probe can resolve sinks that depend on receiver-type inference.
|
||||
// Empty const_values: this runs *before* the optimiser, so const-prop
|
||||
// refinements aren't available yet, but the pass-1 instruction-shape
|
||||
// typing (Source/Param/Call→constructor_type) and the second-pass
|
||||
// Assign/Phi propagation are sufficient for the JDBC chain
|
||||
// `Statement s = conn.createStatement(); s.execute(q);` to type `s`
|
||||
// as `DatabaseConnection`.
|
||||
let local_type_facts: Option<TypeFactResult> = param_types.map(|pt| {
|
||||
let empty_consts: HashMap<SsaValue, crate::ssa::const_prop::ConstLattice> = HashMap::new();
|
||||
analyze_types_with_param_types(ssa, cfg, &empty_consts, Some(lang), pt)
|
||||
});
|
||||
let local_type_facts_ref: Option<&TypeFactResult> = local_type_facts.as_ref();
|
||||
use crate::summary::SinkSite;
|
||||
use crate::summary::ssa_summary::{SsaFuncSummary, TaintTransform};
|
||||
|
||||
|
|
@ -215,7 +245,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
type_facts: None,
|
||||
type_facts: local_type_facts_ref,
|
||||
ssa_summaries,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
|
|
@ -761,7 +791,7 @@ pub fn extract_ssa_func_summary_full(
|
|||
param_seed: None,
|
||||
receiver_seed: None,
|
||||
const_values: None,
|
||||
type_facts: None,
|
||||
type_facts: local_type_facts_ref,
|
||||
ssa_summaries,
|
||||
extra_labels: None,
|
||||
base_aliases: None,
|
||||
|
|
|
|||
|
|
@ -4332,6 +4332,7 @@ fn ssa_summary_identity_propagation() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.param_to_return.is_empty(),
|
||||
|
|
@ -4396,6 +4397,7 @@ fn ssa_summary_sanitizer_strips_bits() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
// Sanitizer should strip some bits
|
||||
for (_, transform) in &summary.param_to_return {
|
||||
|
|
@ -4453,6 +4455,7 @@ fn ssa_summary_source_adds_bits() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.source_caps.is_empty(),
|
||||
|
|
@ -4510,6 +4513,7 @@ fn ssa_summary_param_to_sink() {
|
|||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
assert!(
|
||||
!summary.param_to_sink.is_empty(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue