mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-27 20:29:39 +02:00
Authorization analysis logic improvements (#61)
This commit is contained in:
parent
3c89bddbf2
commit
40995e45e7
55 changed files with 4193 additions and 134 deletions
|
|
@ -345,6 +345,126 @@ pub(super) fn has_keyword_arg(call_node: Node, keyword_name: &str, code: &[u8])
|
|||
false
|
||||
}
|
||||
|
||||
/// Extract the literal value of a property `prop_name` from the object
|
||||
/// literal at positional argument `arg_index`. Returns `None` if the
|
||||
/// arg is absent, is not an object literal, the prop key isn't found,
|
||||
/// or the prop value isn't a literal (so callers can distinguish
|
||||
/// "present but dynamic" from "absent" only via [`has_object_arg_property`]).
|
||||
///
|
||||
/// Used by JS/TS-style "options object as kwargs" gates — e.g.
|
||||
/// `_.template(tpl, { evaluate: false })` — where the safe-flag lives
|
||||
/// in an inline object literal rather than as a dedicated kwarg node
|
||||
/// (which JS does not have). Strict-additive: returns `None` for any
|
||||
/// non-JS-object shape, including bare identifiers passed as the
|
||||
/// options arg, so the gate falls back to the conservative dynamic
|
||||
/// branch.
|
||||
pub(super) fn extract_object_arg_property(
|
||||
call_node: Node,
|
||||
arg_index: usize,
|
||||
prop_name: &str,
|
||||
code: &[u8],
|
||||
) -> Option<String> {
|
||||
let args = call_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
let arg = args.named_children(&mut cursor).nth(arg_index)?;
|
||||
let arg = unwrap_parens(arg);
|
||||
if !matches!(arg.kind(), "object" | "dictionary") {
|
||||
return None;
|
||||
}
|
||||
let mut c = arg.walk();
|
||||
for child in arg.named_children(&mut c) {
|
||||
if child.kind() != "pair" {
|
||||
continue;
|
||||
}
|
||||
let Some(key_node) = child.child_by_field_name("key") else {
|
||||
continue;
|
||||
};
|
||||
let key_text = match key_node.kind() {
|
||||
"string" | "string_literal" => text_of(key_node, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
}),
|
||||
"computed_property_name" => continue,
|
||||
_ => text_of(key_node, code),
|
||||
};
|
||||
if key_text.as_deref() != Some(prop_name) {
|
||||
continue;
|
||||
}
|
||||
let val_node = child.child_by_field_name("value")?;
|
||||
let val_node = unwrap_parens(val_node);
|
||||
return match val_node.kind() {
|
||||
"true" | "false" | "null" | "undefined" | "number" | "string" | "string_literal" => {
|
||||
text_of(val_node, code).map(|s| s.to_string())
|
||||
}
|
||||
// JS booleans true/false are their own node kinds (above), but
|
||||
// some grammar versions wrap them as identifier literals; surface
|
||||
// `undefined` similarly.
|
||||
"identifier" => text_of(val_node, code)
|
||||
.filter(|s| matches!(s.as_str(), "true" | "false" | "null" | "undefined")),
|
||||
_ => None,
|
||||
};
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Return `true` if the call node's positional arg at `arg_index` is an
|
||||
/// object literal containing a property named `prop_name` (whether the
|
||||
/// value is a literal or a dynamic expression). Used alongside
|
||||
/// [`extract_object_arg_property`] so gated-sink classification can
|
||||
/// distinguish "options key absent" (language default) from "options
|
||||
/// key present with dynamic value" (conservative dangerous).
|
||||
pub(super) fn has_object_arg_property(
|
||||
call_node: Node,
|
||||
arg_index: usize,
|
||||
prop_name: &str,
|
||||
code: &[u8],
|
||||
) -> bool {
|
||||
let Some(args) = call_node.child_by_field_name("arguments") else {
|
||||
return false;
|
||||
};
|
||||
let mut cursor = args.walk();
|
||||
let Some(arg) = args.named_children(&mut cursor).nth(arg_index) else {
|
||||
return false;
|
||||
};
|
||||
let arg = unwrap_parens(arg);
|
||||
if !matches!(arg.kind(), "object" | "dictionary") {
|
||||
return false;
|
||||
}
|
||||
let mut c = arg.walk();
|
||||
for child in arg.named_children(&mut c) {
|
||||
match child.kind() {
|
||||
"shorthand_property_identifier" | "shorthand_property_identifier_pattern"
|
||||
if text_of(child, code).as_deref() == Some(prop_name) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
"pair" => {
|
||||
if let Some(key_node) = child.child_by_field_name("key") {
|
||||
let key_text = match key_node.kind() {
|
||||
"string" | "string_literal" => text_of(key_node, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
}),
|
||||
"computed_property_name" => continue,
|
||||
_ => text_of(key_node, code),
|
||||
};
|
||||
if key_text.as_deref() == Some(prop_name) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Inspect the first positional argument of a call node and return its
|
||||
/// tree-sitter `kind()` plus a flag indicating whether any descendant is an
|
||||
/// `interpolation` node. Skips parenthesisation (`(arg0)` is treated as
|
||||
|
|
@ -584,6 +704,29 @@ pub(super) fn find_chained_inner_call<'a>(
|
|||
let function = outer
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| outer.child_by_field_name("method"))?;
|
||||
// Direct double-call form (`f()(x)`): the outer call's `function`
|
||||
// field IS itself a call_expression, with no intermediate
|
||||
// member-chain. Treat the inner call as the chain's innermost.
|
||||
// Without this, lodash-style template-render chains like
|
||||
// `_.template(t)(data)` evade the chained-inner rebinding because
|
||||
// the outer's function field is a `call_expression`, not the
|
||||
// `member_expression` shape the original branch below expects.
|
||||
if matches!(
|
||||
lookup(lang, function.kind()),
|
||||
Kind::CallFn | Kind::CallMethod
|
||||
) {
|
||||
// Recurse: the inner call may itself be chained.
|
||||
if let Some(inner) = find_chained_inner_call(function, lang, code) {
|
||||
return Some(inner);
|
||||
}
|
||||
let inner_func = function
|
||||
.child_by_field_name("function")
|
||||
.or_else(|| function.child_by_field_name("method"))
|
||||
.or_else(|| function.child_by_field_name("name"))?;
|
||||
let raw = text_of(inner_func, code)?;
|
||||
let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
|
||||
return Some((function, inner_text));
|
||||
}
|
||||
// The function/method field for a chained call is a member_expression
|
||||
// (JS/TS) or attribute (Python) etc.; its `object` field is the
|
||||
// receiver expression. Only proceed when that receiver is itself a
|
||||
|
|
|
|||
|
|
@ -54,8 +54,9 @@ use literals::{
|
|||
detect_rust_replace_chain_sanitizer, extract_arg_callees, extract_arg_string_literals,
|
||||
extract_arg_uses, extract_const_keyword_arg, extract_const_macro_arg, extract_const_string_arg,
|
||||
extract_destination_field_pairs, extract_destination_kwarg_pairs, extract_kwargs,
|
||||
extract_literal_rhs, extract_shell_array_payload_idents, find_call_node, find_call_node_deep,
|
||||
find_chained_inner_call, has_keyword_arg, has_only_literal_args, is_parameterized_query_call,
|
||||
extract_literal_rhs, extract_object_arg_property, extract_shell_array_payload_idents,
|
||||
find_call_node, find_call_node_deep, find_chained_inner_call, has_keyword_arg,
|
||||
has_object_arg_property, has_only_literal_args, is_parameterized_query_call,
|
||||
java_chain_arg0_kind_for_method, js_chain_arg0_kind_for_method,
|
||||
js_chain_outer_method_for_inner, ruby_chain_arg0_for_method, walk_chain_inner_call_args,
|
||||
};
|
||||
|
|
@ -67,11 +68,33 @@ use params::{
|
|||
/// Test-only re-export of [`extract_param_meta`] so the external
|
||||
/// `tests/typed_extractors_audit.rs` harness can drive the per-param
|
||||
/// classifier directly without spinning up the full scan pipeline.
|
||||
/// Projects away the destructured-siblings third tuple slot so the
|
||||
/// existing tuple-shape assertions in the audit harness keep working;
|
||||
/// the sibling info is plumbed separately through `BodyMeta`.
|
||||
pub fn extract_param_meta_for_test<'a>(
|
||||
func_node: tree_sitter::Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<(String, Option<crate::ssa::type_facts::TypeKind>)> {
|
||||
extract_param_meta(func_node, lang, code)
|
||||
.into_iter()
|
||||
.map(|(name, ty, _siblings)| (name, ty))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Test-only re-export that returns the full per-slot tuple including
|
||||
/// destructured sibling names. Used by the destructured-arg-probe
|
||||
/// regression tests in `src/taint/tests.rs` and the params unit tests
|
||||
/// in `src/cfg/cfg_tests.rs`.
|
||||
pub fn extract_param_meta_with_destructured_for_test<'a>(
|
||||
func_node: tree_sitter::Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<(
|
||||
String,
|
||||
Option<crate::ssa::type_facts::TypeKind>,
|
||||
Vec<String>,
|
||||
)> {
|
||||
extract_param_meta(func_node, lang, code)
|
||||
}
|
||||
|
||||
|
|
@ -567,6 +590,17 @@ pub struct BodyMeta {
|
|||
/// `None`, downstream behaviour is identical to the pre-Phase-1
|
||||
/// engine.
|
||||
pub param_types: Vec<Option<crate::ssa::type_facts::TypeKind>>,
|
||||
/// Per-parameter destructured-binding sibling names. Same length
|
||||
/// as `params`; entry `i` lists field names bound by the same
|
||||
/// argument slot as `params[i]`, excluding the primary name itself.
|
||||
/// Empty for non-destructured params. Today populated only for
|
||||
/// JS/TS object-pattern formals (`({ a, b, c })` → params=["a"],
|
||||
/// destructured=[["b","c"]]). Used by per-parameter taint-summary
|
||||
/// probing in `extract_ssa_func_summary` so destructured bindings
|
||||
/// inside the body share the slot's seeded caps and any of them
|
||||
/// being in `validated_must` at a return path counts as the slot
|
||||
/// being validated. Closes the residual gap behind CVE-2026-25544.
|
||||
pub param_destructured_fields: Vec<Vec<String>>,
|
||||
pub param_count: usize,
|
||||
pub span: (usize, usize),
|
||||
pub parent_body_id: Option<BodyId>,
|
||||
|
|
@ -1909,8 +1943,27 @@ pub(super) fn push_node<'a>(
|
|||
}
|
||||
})
|
||||
},
|
||||
|kw| extract_const_keyword_arg(cn, kw, code),
|
||||
|kw| has_keyword_arg(cn, kw, code),
|
||||
|kw| {
|
||||
// For JS/TS, options-bearing args are passed as inline
|
||||
// object literals (`fn(x, { evaluate: false })`) rather
|
||||
// than language-level keyword arguments. When the
|
||||
// standard `keyword_argument`-walking extractor returns
|
||||
// None, fall back to inspecting arg 1's object literal
|
||||
// for a property named `kw`. This lets gates like
|
||||
// `_.template` consult `{ evaluate: false }` literally.
|
||||
extract_const_keyword_arg(cn, kw, code).or_else(|| {
|
||||
if matches!(lang, "javascript" | "typescript") {
|
||||
extract_object_arg_property(cn, 1, kw, code)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
},
|
||||
|kw| {
|
||||
has_keyword_arg(cn, kw, code)
|
||||
|| (matches!(lang, "javascript" | "typescript")
|
||||
&& has_object_arg_property(cn, 1, kw, code))
|
||||
},
|
||||
);
|
||||
|
||||
if !matches.is_empty() {
|
||||
|
|
@ -3871,9 +3924,13 @@ pub(super) fn build_sub<'a>(
|
|||
let is_anon = is_anon_fn_name(&fn_name);
|
||||
let param_meta = extract_param_meta(ast, lang, code);
|
||||
let param_count = param_meta.len();
|
||||
let param_names: Vec<String> = param_meta.iter().map(|(n, _)| n.clone()).collect();
|
||||
let param_names: Vec<String> = param_meta.iter().map(|(n, _, _)| n.clone()).collect();
|
||||
let param_types: Vec<Option<crate::ssa::type_facts::TypeKind>> =
|
||||
param_meta.iter().map(|(_, t)| t.clone()).collect();
|
||||
param_meta.iter().map(|(_, t, _)| t.clone()).collect();
|
||||
let param_destructured_fields: Vec<Vec<String>> = param_meta
|
||||
.iter()
|
||||
.map(|(_, _, siblings)| siblings.clone())
|
||||
.collect();
|
||||
|
||||
// ── 1b) Compute identity discriminators ───────────────────────────
|
||||
let (fn_container, fn_kind) =
|
||||
|
|
@ -4130,6 +4187,7 @@ pub(super) fn build_sub<'a>(
|
|||
name: if is_anon { None } else { Some(fn_name.clone()) },
|
||||
params: param_names,
|
||||
param_types,
|
||||
param_destructured_fields,
|
||||
param_count,
|
||||
span: (ast.start_byte(), ast.end_byte()),
|
||||
parent_body_id: Some(current_body_id),
|
||||
|
|
@ -4628,6 +4686,7 @@ pub(crate) fn build_cfg<'a>(
|
|||
name: None,
|
||||
params: Vec::new(),
|
||||
param_types: Vec::new(),
|
||||
param_destructured_fields: Vec::new(),
|
||||
param_count: 0,
|
||||
span: (0, code.len()),
|
||||
parent_body_id: None,
|
||||
|
|
|
|||
|
|
@ -21,16 +21,27 @@ fn lookup_dto_class(class_name: &str) -> Option<TypeKind> {
|
|||
/// Extract parameter names + per-position [`TypeKind`] from a function
|
||||
/// AST node. Each entry's second slot is `Some(TypeKind)` when the
|
||||
/// parameter's decorator, attribute, or static type annotation maps to
|
||||
/// a known kind, and `None` otherwise. Strictly additive, when no
|
||||
/// type info is recoverable, behaviour is identical to the names-only
|
||||
/// path.
|
||||
/// a known kind, and `None` otherwise. The third slot lists
|
||||
/// destructured field names bound by the same parameter slot — empty
|
||||
/// for non-destructured params and for the primary name itself. E.g.
|
||||
/// for the JS/TS object-pattern formal `({ a, b, c })`, the entry is
|
||||
/// `("a", None, ["b", "c"])`. Strictly additive: when the param is
|
||||
/// not a destructured pattern (or the language has no destructure
|
||||
/// concept), behaviour is identical to the pre-Phase-5 names-only path.
|
||||
///
|
||||
/// Closes the residual gap behind CVE-2026-25544 (PayloadCMS Drizzle
|
||||
/// SQL injection): a per-parameter taint probe that seeds only the
|
||||
/// primary name `column` cannot see flow through sibling destructured
|
||||
/// bindings (`value` etc.) inside the body, so summary extraction
|
||||
/// misses `validated_params_to_return` when a validator helper is
|
||||
/// applied to one of the siblings.
|
||||
pub(super) fn extract_param_meta<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<(String, Option<TypeKind>)> {
|
||||
) -> Vec<(String, Option<TypeKind>, Vec<String>)> {
|
||||
let cfg = param_config(lang);
|
||||
let mut out: Vec<(String, Option<TypeKind>)> = Vec::new();
|
||||
let mut out: Vec<(String, Option<TypeKind>, Vec<String>)> = Vec::new();
|
||||
// Try the params_field directly on the function node first.
|
||||
// For C/C++, the parameter list is nested inside the declarator
|
||||
// (function_definition > declarator:function_declarator > parameters:parameter_list),
|
||||
|
|
@ -51,7 +62,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
if let Some(p) = func_node.child_by_field_name("parameter") {
|
||||
if p.kind() == "identifier" {
|
||||
if let Some(name) = text_of(p, code) {
|
||||
out.push((name, None));
|
||||
out.push((name, None, Vec::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -62,7 +73,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
for child in params.children(&mut cursor) {
|
||||
// Self/this parameter (e.g. Rust's `self_parameter`)
|
||||
if cfg.self_param_kinds.contains(&child.kind()) {
|
||||
out.push(("self".into(), None));
|
||||
out.push(("self".into(), None, Vec::new()));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -74,14 +85,26 @@ pub(super) fn extract_param_meta<'a>(
|
|||
if let Some(node) = child.child_by_field_name(field) {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(node, code, &mut tmp);
|
||||
let candidate = if lang == "rust" {
|
||||
tmp.into_iter().last()
|
||||
let primary = if lang == "rust" {
|
||||
// Rust: last ident is the binding name (e.g.
|
||||
// `Path(project_id): Path<i64>` → `project_id`).
|
||||
tmp.pop()
|
||||
} else if tmp.is_empty() {
|
||||
None
|
||||
} else {
|
||||
tmp.into_iter().next()
|
||||
Some(tmp.remove(0))
|
||||
};
|
||||
if let Some(name) = candidate {
|
||||
if let Some(name) = primary {
|
||||
let ty = classify_param_type(child, lang, code);
|
||||
out.push((name, ty));
|
||||
// Surface destructured siblings only when the
|
||||
// pattern node is a destructure container. For
|
||||
// ordinary (non-destructured) params, `tmp` is
|
||||
// already empty after `pop()` / `remove(0)`.
|
||||
// Object-pattern children of the same slot
|
||||
// (`{ a, b, c }`) leave the remaining names in
|
||||
// `tmp`, which become the slot's siblings.
|
||||
let siblings = sibling_names_for_destructure(node, &tmp, lang);
|
||||
out.push((name, ty, siblings));
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -92,7 +115,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
&& child.kind() == "identifier"
|
||||
&& let Some(txt) = text_of(child, code)
|
||||
{
|
||||
out.push((txt, None));
|
||||
out.push((txt, None, Vec::new()));
|
||||
found = true;
|
||||
}
|
||||
// Fallback for C/C++: look for nested declarator → identifier
|
||||
|
|
@ -101,7 +124,7 @@ pub(super) fn extract_param_meta<'a>(
|
|||
collect_idents(child, code, &mut tmp);
|
||||
if let Some(last) = tmp.pop() {
|
||||
let ty = classify_param_type(child, lang, code);
|
||||
out.push((last, ty));
|
||||
out.push((last, ty, Vec::new()));
|
||||
found = true;
|
||||
}
|
||||
}
|
||||
|
|
@ -112,12 +135,22 @@ pub(super) fn extract_param_meta<'a>(
|
|||
// *first* identifier, that is the parameter name; subsequent
|
||||
// identifiers are part of the type annotation or default
|
||||
// expression.
|
||||
//
|
||||
// Destructure-container case (JS arrow `({ a, b }) => …`):
|
||||
// when the child node IS a destructure pattern itself (no
|
||||
// `required_parameter` / `assignment_pattern` wrapper), the
|
||||
// remaining idents after the primary are destructured
|
||||
// bindings sharing this slot — surface them as siblings so
|
||||
// per-parameter summary probing seeds every binding the
|
||||
// slot produces.
|
||||
if !found {
|
||||
let mut tmp = Vec::new();
|
||||
collect_idents(child, code, &mut tmp);
|
||||
if let Some(first) = tmp.into_iter().next() {
|
||||
if !tmp.is_empty() {
|
||||
let first = tmp.remove(0);
|
||||
let ty = classify_param_type(child, lang, code);
|
||||
out.push((first, ty));
|
||||
let siblings = sibling_names_for_destructure(child, &tmp, lang);
|
||||
out.push((first, ty, siblings));
|
||||
}
|
||||
}
|
||||
continue;
|
||||
|
|
@ -127,13 +160,52 @@ pub(super) fn extract_param_meta<'a>(
|
|||
// where the child is an `identifier` node, not a `parameter` wrapper.
|
||||
if child.kind() == "identifier" {
|
||||
if let Some(txt) = text_of(child, code) {
|
||||
out.push((txt, None));
|
||||
out.push((txt, None, Vec::new()));
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Return destructured field-name siblings for a parameter's pattern
|
||||
/// node, but only when the pattern is a recognised destructure
|
||||
/// container (object / record pattern). For ordinary patterns the
|
||||
/// `remaining` slice is already empty so this is a noop. Restricting
|
||||
/// the return to destructure containers prevents typed-parameter
|
||||
/// idioms (`Path<i64>`, `@PathVariable Long userId`, Rust extractor
|
||||
/// wrappers) from accidentally surfacing the type identifier as a
|
||||
/// destructured sibling.
|
||||
fn sibling_names_for_destructure(
|
||||
pattern: Node<'_>,
|
||||
remaining: &[String],
|
||||
lang: &str,
|
||||
) -> Vec<String> {
|
||||
if remaining.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
if !is_destructure_container_kind(pattern.kind(), lang) {
|
||||
return Vec::new();
|
||||
}
|
||||
remaining.to_vec()
|
||||
}
|
||||
|
||||
/// Recognise tree-sitter pattern node kinds that destructure a
|
||||
/// single argument into multiple bindings — JS/TS object patterns
|
||||
/// today, plus Python's `pattern_list` / `tuple_pattern` for kwargs
|
||||
/// destructure if those ever come through this path. Conservative:
|
||||
/// only kinds we have explicit per-language reasoning for return
|
||||
/// `true`; everything else returns `false` so the existing single-
|
||||
/// name fallback path is preserved untouched.
|
||||
fn is_destructure_container_kind(kind: &str, lang: &str) -> bool {
|
||||
match (lang, kind) {
|
||||
("javascript" | "typescript", "object_pattern") => true,
|
||||
// Future languages: array pattern (`[a, b]`) is intentionally
|
||||
// omitted — the index-based unpacking is positional, and the
|
||||
// names don't map cleanly to "all share slot 0".
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk up from a function definition node and build a container path.
|
||||
///
|
||||
/// Records the names of enclosing classes / impls / modules / namespaces /
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue