mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-18 20:15:14 +02:00
Performance and precision pass (#64)
This commit is contained in:
parent
c7c5e0f3a1
commit
fb698d2c27
97 changed files with 9932 additions and 517 deletions
437
src/ast.rs
437
src/ast.rs
|
|
@ -40,7 +40,7 @@ use crate::utils::ext::lowercase_ext;
|
|||
use crate::utils::{Config, query_cache};
|
||||
use petgraph::graph::NodeIndex;
|
||||
use std::borrow::Cow;
|
||||
use std::cell::RefCell;
|
||||
use std::cell::{OnceCell, RefCell};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::ops::ControlFlow;
|
||||
use std::path::Path;
|
||||
|
|
@ -972,6 +972,27 @@ impl<'a> ParsedSource<'a> {
|
|||
{
|
||||
continue;
|
||||
}
|
||||
// Layer C2: PHP `Serializable::unserialize($input)` magic
|
||||
// method body — `public function unserialize($x) { ...
|
||||
// unserialize($x) ... }`. This is the legacy
|
||||
// `Serializable` interface contract (deprecated since PHP
|
||||
// 8.1). PHP itself invokes the method when restoring an
|
||||
// instance, so the body's `\unserialize($x)` call cannot
|
||||
// be removed without breaking the interface. The
|
||||
// actionable signal is at the class level (the class
|
||||
// implements Serializable — fix is to migrate to
|
||||
// `__serialize` / `__unserialize`), not at this call
|
||||
// site. Genuine deserialization sinks (free-function
|
||||
// `unserialize($_GET[..])`, helpers reading from session
|
||||
// / cache, etc.) keep firing because they are not inside
|
||||
// a method declaration named `unserialize` with a single
|
||||
// formal parameter passed straight to the call.
|
||||
if cq.meta.id == "php.deser.unserialize"
|
||||
&& self.lang_slug == "php"
|
||||
&& is_php_unserialize_magic_method_passthrough(cap.node, self.bytes)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
// Layer D: C/C++ buffer-overflow pattern rules
|
||||
// (`{c,cpp}.memory.strcpy`, `strcat`, `sprintf`) fire
|
||||
// syntactically on every call regardless of argument
|
||||
|
|
@ -1102,6 +1123,13 @@ struct ParsedFile<'a> {
|
|||
file_cfg: FileCfg,
|
||||
lang_rules: LangAnalysisRules,
|
||||
has_lang_rules: bool,
|
||||
/// Per-body SSA + const-prop + type-fact cache, lazily populated on first
|
||||
/// request and indexed by `BodyId.0`. Was being recomputed 2-3× per body
|
||||
/// across `run_cfg_analyses_with_lowered` (cfg analyses + state analyses)
|
||||
/// and `run_auth_analyses` (`collect_file_var_types`); on the gin profile
|
||||
/// `build_body_const_facts` accounted for 13.6% of wall-clock and a
|
||||
/// single-pass cache collapses that to ~4.5%.
|
||||
body_const_facts_cache: OnceCell<Vec<Option<cfg_analysis::BodyConstFacts>>>,
|
||||
}
|
||||
|
||||
impl<'a> ParsedFile<'a> {
|
||||
|
|
@ -1153,9 +1181,33 @@ impl<'a> ParsedFile<'a> {
|
|||
file_cfg,
|
||||
lang_rules,
|
||||
has_lang_rules,
|
||||
body_const_facts_cache: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-body const-fact cache, computed once on first request and shared
|
||||
/// across every per-body iteration in this file's analysis. Indexed by
|
||||
/// `BodyId.0` so callers can look up by body identity.
|
||||
fn body_const_facts_all(&self) -> &[Option<cfg_analysis::BodyConstFacts>] {
|
||||
self.body_const_facts_cache.get_or_init(|| {
|
||||
let lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
|
||||
self.file_cfg
|
||||
.bodies
|
||||
.iter()
|
||||
.map(|b| cfg_analysis::build_body_const_facts(b, lang))
|
||||
.collect()
|
||||
})
|
||||
}
|
||||
|
||||
/// Look up the cached const facts for a specific body.
|
||||
fn body_const_facts(
|
||||
&self,
|
||||
body: &crate::cfg::BodyCfg,
|
||||
) -> Option<&cfg_analysis::BodyConstFacts> {
|
||||
let all = self.body_const_facts_all();
|
||||
all.get(body.meta.id.0 as usize).and_then(|f| f.as_ref())
|
||||
}
|
||||
|
||||
/// The top-level body's CFG graph (for backward-compatible access).
|
||||
fn cfg_graph(&self) -> &Cfg {
|
||||
&self.file_cfg.toplevel().graph
|
||||
|
|
@ -1468,7 +1520,7 @@ impl<'a> ParsedFile<'a> {
|
|||
.filter(|f| f.body_id == body.meta.id)
|
||||
.cloned()
|
||||
.collect();
|
||||
let body_const_facts = cfg_analysis::build_body_const_facts(body, caller_lang);
|
||||
let body_const_facts = self.body_const_facts(body);
|
||||
let cfg_ctx = cfg_analysis::AnalysisContext {
|
||||
cfg: &body.graph,
|
||||
entry: body.entry,
|
||||
|
|
@ -1481,8 +1533,8 @@ impl<'a> ParsedFile<'a> {
|
|||
taint_findings: &body_taint,
|
||||
analysis_rules: self.rules_ref(),
|
||||
taint_active,
|
||||
body_const_facts: body_const_facts.as_ref(),
|
||||
type_facts: body_const_facts.as_ref().map(|f| &f.type_facts),
|
||||
body_const_facts,
|
||||
type_facts: body_const_facts.map(|f| &f.type_facts),
|
||||
auth_decorators: &body.meta.auth_decorators,
|
||||
closure_released_var_names: Some(
|
||||
closure_released_per_body
|
||||
|
|
@ -1546,13 +1598,11 @@ impl<'a> ParsedFile<'a> {
|
|||
// points-to facts so the proxy-acquire transfer can
|
||||
// suppress SymbolId attribution on field-aliased
|
||||
// receivers (e.g. `m := c.mu; m.Lock()`).
|
||||
let body_pointer_hints = cfg_analysis::build_body_const_facts(body, caller_lang)
|
||||
.as_ref()
|
||||
.and_then(|f| {
|
||||
f.pointer_facts
|
||||
.as_ref()
|
||||
.map(|pf| pf.name_proxy_hints(&f.ssa))
|
||||
});
|
||||
let body_pointer_hints = self.body_const_facts(body).and_then(|f| {
|
||||
f.pointer_facts
|
||||
.as_ref()
|
||||
.map(|pf| pf.name_proxy_hints(&f.ssa))
|
||||
});
|
||||
let state_findings = state::run_state_analysis(
|
||||
&body.graph,
|
||||
body.entry,
|
||||
|
|
@ -1666,12 +1716,11 @@ impl<'a> ParsedFile<'a> {
|
|||
/// syntactic heuristics. Returns `None` when no body produces a
|
||||
/// typed variable.
|
||||
fn collect_file_var_types(&self) -> Option<auth_analysis::VarTypes> {
|
||||
let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
|
||||
let mut merged: std::collections::HashMap<String, crate::ssa::type_facts::TypeKind> =
|
||||
std::collections::HashMap::new();
|
||||
let mut dropped: std::collections::HashSet<String> = std::collections::HashSet::new();
|
||||
for body in &self.file_cfg.bodies {
|
||||
let Some(facts) = cfg_analysis::build_body_const_facts(body, caller_lang) else {
|
||||
let Some(facts) = self.body_const_facts(body) else {
|
||||
continue;
|
||||
};
|
||||
for (idx, def) in facts.ssa.value_defs.iter().enumerate() {
|
||||
|
|
@ -1792,6 +1841,7 @@ pub fn extract_auth_model_for_debug(
|
|||
source.bytes,
|
||||
source.path,
|
||||
&rules,
|
||||
None,
|
||||
);
|
||||
Ok(Some(model))
|
||||
}
|
||||
|
|
@ -2401,6 +2451,165 @@ fn is_php_unserialize_allowed_classes_restricted(
|
|||
false
|
||||
}
|
||||
|
||||
/// PHP-only: returns `true` when the captured `function_call_expression`
|
||||
/// is the canonical `Serializable::unserialize($input)` magic-method
|
||||
/// pass-through — i.e. the call is inside a `method_declaration` named
|
||||
/// exactly `unserialize` (PHP method names are case-insensitive) with
|
||||
/// one formal parameter, and the call's single argument is the bare
|
||||
/// parameter variable.
|
||||
///
|
||||
/// **Why this is a non-actionable site for `php.deser.unserialize`:**
|
||||
/// `Serializable::unserialize($input)` is an interface contract method
|
||||
/// that PHP itself invokes when restoring an instance via the runtime
|
||||
/// `\unserialize($bytes)` machinery. The implementation MUST decode
|
||||
/// `$input` (the body's `\unserialize(...)` call) — there is no
|
||||
/// "safer" rewrite that preserves the contract. The actionable signal
|
||||
/// is at the class level (the class implements the deprecated
|
||||
/// `Serializable` interface — fix is to migrate to `__serialize` /
|
||||
/// `__unserialize`), not at this call site.
|
||||
///
|
||||
/// Conservative recognition:
|
||||
/// - method must be a `method_declaration` (NOT a free `function_definition` —
|
||||
/// the magic semantics only apply to instance methods)
|
||||
/// - method name == `unserialize` (case-insensitive)
|
||||
/// - exactly 1 formal parameter
|
||||
/// - call has exactly 1 argument
|
||||
/// - argument's inner expression is a `variable_name` whose name equals the
|
||||
/// formal parameter's name
|
||||
///
|
||||
/// Genuine deserialization sinks (free `unserialize($_GET[...])`, helpers
|
||||
/// reading from session/cache and passing through, etc.) keep firing
|
||||
/// because they are not inside a method declaration named `unserialize`.
|
||||
fn is_php_unserialize_magic_method_passthrough(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
|
||||
// The pattern captures `@n` (the function name); locate the enclosing
|
||||
// function_call_expression.
|
||||
let call_node = if cap_node.kind() == "function_call_expression" {
|
||||
cap_node
|
||||
} else {
|
||||
let mut cur = cap_node;
|
||||
let mut found = None;
|
||||
for _ in 0..4 {
|
||||
if cur.kind() == "function_call_expression" {
|
||||
found = Some(cur);
|
||||
break;
|
||||
}
|
||||
match cur.parent() {
|
||||
Some(p) => cur = p,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
match found {
|
||||
Some(c) => c,
|
||||
None => return false,
|
||||
}
|
||||
};
|
||||
|
||||
// Walk up to the nearest method_declaration. Stop at any other
|
||||
// function-introducing scope (free function, closure, arrow) — those
|
||||
// are not the Serializable contract.
|
||||
let mut cur = call_node;
|
||||
let method = loop {
|
||||
let Some(parent) = cur.parent() else {
|
||||
return false;
|
||||
};
|
||||
match parent.kind() {
|
||||
"method_declaration" => break parent,
|
||||
"function_definition"
|
||||
| "anonymous_function"
|
||||
| "anonymous_function_creation_expression"
|
||||
| "arrow_function"
|
||||
| "program" => return false,
|
||||
_ => {}
|
||||
}
|
||||
cur = parent;
|
||||
};
|
||||
|
||||
// Method name must be exactly `unserialize` (case-insensitive).
|
||||
let Some(name_node) = method
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| find_named_child_of_kind(method, "name"))
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let Ok(method_name) = std::str::from_utf8(&bytes[name_node.byte_range()]) else {
|
||||
return false;
|
||||
};
|
||||
if !method_name.eq_ignore_ascii_case("unserialize") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Method must have exactly 1 formal parameter; capture its bare name.
|
||||
let Some(params) = method
|
||||
.child_by_field_name("parameters")
|
||||
.or_else(|| find_named_child_of_kind(method, "formal_parameters"))
|
||||
else {
|
||||
return false;
|
||||
};
|
||||
let mut formal_params: Vec<tree_sitter::Node> = Vec::new();
|
||||
for i in 0..params.named_child_count() as u32 {
|
||||
if let Some(p) = params.named_child(i)
|
||||
&& matches!(
|
||||
p.kind(),
|
||||
"simple_parameter"
|
||||
| "variadic_parameter"
|
||||
| "property_promotion_parameter"
|
||||
| "promoted_constructor_parameter"
|
||||
)
|
||||
{
|
||||
formal_params.push(p);
|
||||
}
|
||||
}
|
||||
if formal_params.len() != 1 {
|
||||
return false;
|
||||
}
|
||||
let param = formal_params[0];
|
||||
let var_node = param
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| find_named_child_of_kind(param, "variable_name"));
|
||||
let Some(var_node) = var_node else {
|
||||
return false;
|
||||
};
|
||||
let inner_name_node = if var_node.kind() == "variable_name" {
|
||||
var_node.named_child(0)
|
||||
} else {
|
||||
Some(var_node)
|
||||
};
|
||||
let Some(inner_name_node) = inner_name_node else {
|
||||
return false;
|
||||
};
|
||||
let Ok(param_name) = std::str::from_utf8(&bytes[inner_name_node.byte_range()]) else {
|
||||
return false;
|
||||
};
|
||||
|
||||
// Call must have exactly 1 argument that is the bare parameter variable.
|
||||
let Some(arg_list) = find_named_child_of_kind(call_node, "arguments") else {
|
||||
return false;
|
||||
};
|
||||
let mut args: Vec<tree_sitter::Node> = Vec::new();
|
||||
for i in 0..arg_list.named_child_count() as u32 {
|
||||
if let Some(c) = arg_list.named_child(i)
|
||||
&& c.kind() == "argument"
|
||||
{
|
||||
args.push(c);
|
||||
}
|
||||
}
|
||||
if args.len() != 1 {
|
||||
return false;
|
||||
}
|
||||
let inner = args[0].named_child(0);
|
||||
let Some(inner) = inner else { return false };
|
||||
if inner.kind() != "variable_name" {
|
||||
return false;
|
||||
}
|
||||
let Some(arg_name_node) = inner.named_child(0) else {
|
||||
return false;
|
||||
};
|
||||
let Ok(arg_name) = std::str::from_utf8(&bytes[arg_name_node.byte_range()]) else {
|
||||
return false;
|
||||
};
|
||||
arg_name == param_name
|
||||
}
|
||||
|
||||
/// C/C++-only Layer D: structural suppression of buffer-overflow pattern
|
||||
/// rules when the source / format-string argument is a literal whose
|
||||
/// contributed length is statically bounded.
|
||||
|
|
@ -3999,6 +4208,15 @@ pub struct FusedResult {
|
|||
crate::symbol::FuncKey,
|
||||
auth_analysis::model::AuthCheckSummary,
|
||||
)>,
|
||||
/// Per-Python-file router-level dep declarations + `include_router`
|
||||
/// edges for cross-file FastAPI router-dep propagation. `None` for
|
||||
/// non-Python files; `Some((module_id, facts))` for Python files
|
||||
/// where `module_id` is the file's
|
||||
/// [`auth_analysis::router_facts::module_id_for_storage`] key.
|
||||
/// Pass 1 collects these into
|
||||
/// `GlobalSummaries.router_facts_by_module`; pass 2 resolves them
|
||||
/// per-file via `GlobalSummaries::resolve_cross_file_router_deps`.
|
||||
pub router_facts: Option<(String, auth_analysis::router_facts::PerFileRouterFacts)>,
|
||||
}
|
||||
|
||||
/// Parse the file once, build the CFG once, and produce both function
|
||||
|
|
@ -4034,6 +4252,7 @@ pub fn analyse_file_fused(
|
|||
cfg_nodes: 0,
|
||||
ssa_bodies: vec![],
|
||||
auth_summaries: vec![],
|
||||
router_facts: None,
|
||||
});
|
||||
};
|
||||
|
||||
|
|
@ -4081,6 +4300,28 @@ pub fn analyse_file_fused(
|
|||
(vec![], vec![])
|
||||
};
|
||||
|
||||
let mut auth_summaries: Vec<(
|
||||
crate::symbol::FuncKey,
|
||||
auth_analysis::model::AuthCheckSummary,
|
||||
)> = Vec::new();
|
||||
|
||||
// Per-file router-dep facts for cross-file FastAPI propagation.
|
||||
// Extracted unconditionally for Python files so pass 1 can persist
|
||||
// them into `GlobalSummaries.router_facts_by_module` even on Cfg /
|
||||
// Taint modes (the auth analysis itself runs only under Full, but
|
||||
// the index has to be populated by the time pass 2 launches).
|
||||
let router_facts_for_this_file = if parsed.source.lang_slug == "python" {
|
||||
auth_analysis::router_facts::module_id_for_storage(parsed.source.path).map(|module_id| {
|
||||
let facts = auth_analysis::router_facts::extract_router_facts_for_python(
|
||||
&parsed.source.tree,
|
||||
parsed.source.bytes,
|
||||
);
|
||||
(module_id, facts)
|
||||
})
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
|
||||
let ast_findings = parsed.source.run_ast_queries(cfg);
|
||||
// Layer B only applies when taint had the opportunity to evaluate
|
||||
|
|
@ -4095,23 +4336,70 @@ pub fn analyse_file_fused(
|
|||
} else {
|
||||
out.extend(ast_findings);
|
||||
}
|
||||
out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
|
||||
// Build the AuthorizationModel exactly once per file when Full
|
||||
// mode needs both diagnostics AND per-file summaries; pre-fix
|
||||
// the diag path and the summary path each ran their own
|
||||
// `extract::extract_authorization_model`, duplicating
|
||||
// `collect_top_level_units` + every framework extractor's AST
|
||||
// walk. See `auth_analysis::run_auth_analysis_with_model` for
|
||||
// measured savings.
|
||||
let auth_rules = auth_analysis::config::build_auth_rules(cfg, parsed.source.lang_slug);
|
||||
if auth_rules.enabled {
|
||||
// Resolve cross-file router-deps for the current file (Python only).
|
||||
// The resolved map lives on `AuthorizationModel.cross_file_router_deps`
|
||||
// BEFORE `FlaskExtractor::extract` runs, so the in-extractor merge
|
||||
// sees both inline router-deps and the cross-file lift in one pass.
|
||||
let cross_file_router_deps = if parsed.source.lang_slug == "python"
|
||||
&& let Some(gs) = global_summaries
|
||||
&& let Some(child_module_id) =
|
||||
auth_analysis::router_facts::module_id_for_path(parsed.source.path)
|
||||
{
|
||||
let resolved = gs.resolve_cross_file_router_deps(&child_module_id);
|
||||
if resolved.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(resolved)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let auth_model = auth_analysis::extract::extract_authorization_model(
|
||||
parsed.source.lang_slug,
|
||||
cfg.framework_ctx.as_ref(),
|
||||
&parsed.source.tree,
|
||||
parsed.source.bytes,
|
||||
parsed.source.path,
|
||||
&auth_rules,
|
||||
cross_file_router_deps.as_ref(),
|
||||
);
|
||||
// Extract summaries from the **base** model (pre var-types,
|
||||
// pre-helper-lifting) so the persisted per-file summary
|
||||
// carries only the helper's own intrinsic auth checks,
|
||||
// matching the legacy `extract_auth_summaries_by_key` path
|
||||
// bit-for-bit.
|
||||
if cfg.scanner.mode == AnalysisMode::Full {
|
||||
auth_summaries = auth_analysis::extract_auth_summaries_from_model(
|
||||
&auth_model,
|
||||
parsed.source.lang_slug,
|
||||
parsed.source.path,
|
||||
scan_root,
|
||||
);
|
||||
}
|
||||
let var_types = parsed.collect_file_var_types();
|
||||
out.extend(auth_analysis::run_auth_analysis_with_model(
|
||||
auth_model,
|
||||
&parsed.source.tree,
|
||||
parsed.source.lang_slug,
|
||||
parsed.source.path,
|
||||
&auth_rules,
|
||||
var_types.as_ref(),
|
||||
global_summaries,
|
||||
scan_root,
|
||||
));
|
||||
}
|
||||
}
|
||||
parsed.source.finalize_diags(&mut out, cfg);
|
||||
|
||||
let auth_summaries = if cfg.scanner.mode == AnalysisMode::Full {
|
||||
auth_analysis::extract_auth_summaries_by_key(
|
||||
&parsed.source.tree,
|
||||
parsed.source.bytes,
|
||||
parsed.source.lang_slug,
|
||||
parsed.source.path,
|
||||
cfg,
|
||||
scan_root,
|
||||
)
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Ok(FusedResult {
|
||||
summaries,
|
||||
diags: out,
|
||||
|
|
@ -4119,6 +4407,7 @@ pub fn analyse_file_fused(
|
|||
cfg_nodes,
|
||||
ssa_bodies,
|
||||
auth_summaries,
|
||||
router_facts: router_facts_for_this_file,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -4441,6 +4730,100 @@ fn php_unserialize_allowed_classes_recognises_safe_forms() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn php_unserialize_magic_method_passthrough_recognises_serializable_contract() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
|
||||
parser.set_language(&lang).unwrap();
|
||||
let q = r#"(function_call_expression function: (name) @n (#eq? @n "unserialize")) @vuln"#;
|
||||
|
||||
// Canonical Serializable::unserialize delegating to __unserialize.
|
||||
let code = b"<?php\nclass R {\n public function unserialize($serialized): void {\n $this->__unserialize(unserialize($serialized));\n }\n}\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"Serializable::unserialize($x) → unserialize($x) should be suppressed"
|
||||
);
|
||||
|
||||
// Multi-target list-destructuring assignment shape (Joomla Cli/Input).
|
||||
let code = b"<?php\nclass C {\n public function unserialize($input) {\n [$this->a, $this->b] = unserialize($input);\n }\n}\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"list-destructuring inside Serializable::unserialize should be suppressed"
|
||||
);
|
||||
|
||||
// Case-insensitive method name (PHP semantics).
|
||||
let code = b"<?php\nclass C { public function UnSerialize($d) { return unserialize($d); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"method name should match case-insensitively (PHP)"
|
||||
);
|
||||
|
||||
// Free function `unserialize` is NOT a magic method, must NOT be suppressed.
|
||||
let code = b"<?php\nfunction load($d) { return unserialize($d); }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"free function should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Different method name, NOT a Serializable contract, must NOT be suppressed.
|
||||
let code = b"<?php\nclass C { public function decode($d) { return unserialize($d); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"method named `decode` should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Method named `unserialize` but with TWO params, NOT the magic signature,
|
||||
// must NOT be suppressed.
|
||||
let code = b"<?php\nclass C { public function unserialize($d, $opts) { return unserialize($d, $opts); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"two-param method named unserialize should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Magic-method signature but the call argument is NOT the formal param —
|
||||
// user is unserializing some other source. Must NOT be suppressed.
|
||||
let code = b"<?php\nclass C { public function unserialize($input) { return unserialize($_GET['x']); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"non-pass-through arg inside magic method should NOT be suppressed"
|
||||
);
|
||||
|
||||
// Wrapped argument (`unserialize(trim($input))`) is NOT a bare-param
|
||||
// pass-through — keep firing. This shape covers cache/session
|
||||
// pass-throughs that the rule should still surface.
|
||||
let code = b"<?php\nclass C { public function unserialize($input) { return unserialize(trim($input)); } }\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"wrapped argument inside magic method should NOT be suppressed (conservative)"
|
||||
);
|
||||
|
||||
// Anonymous function named-like context (defensive — anonymous_function
|
||||
// is not a method_declaration).
|
||||
let code = b"<?php\n$f = function($input) { return unserialize($input); };\n";
|
||||
let tree = parser.parse(code, None).unwrap();
|
||||
let cap = first_php_capture(&tree, code, q);
|
||||
assert!(
|
||||
!is_php_unserialize_magic_method_passthrough(cap, code),
|
||||
"closure should NOT be suppressed"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn php_weak_hash_non_crypto_use_recognises_canonical_shapes() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue