diff --git a/src/dynamic/framework/adapters/java_thymeleaf.rs b/src/dynamic/framework/adapters/java_thymeleaf.rs index 8c18b3a8..8494a673 100644 --- a/src/dynamic/framework/adapters/java_thymeleaf.rs +++ b/src/dynamic/framework/adapters/java_thymeleaf.rs @@ -4,19 +4,58 @@ //! Phase 04 (Track J.2). Fires when the function body invokes //! `TemplateEngine::process()` (matched by the last segment //! of the callee — the call graph normaliser drops the receiver). +//! +//! Strengthened to walk the AST for a real `method_invocation` whose +//! first positional argument names a parameter listed in +//! `summary.tainted_sink_params` or `summary.propagating_params`, +//! removing the comment-substring FP. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct JavaThymeleafAdapter; const ADAPTER_NAME: &str = "java-thymeleaf"; -fn callee_is_thymeleaf(name: &str) -> bool { - let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); - matches!(last, "process" | "processSpring") +fn is_thymeleaf_entry(name: &str) -> bool { + matches!(name, "process" | "processSpring") +} + +fn ast_confirms_tainted_call(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, &mut found); + found +} + +fn walk(node: Node<'_>, bytes: &[u8], summary: &FuncSummary, found: &mut bool) { + if *found { + return; + } + if node.kind() == "method_invocation" + && let Some(name) = node + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + && is_thymeleaf_entry(name) + && let Some(args) = node.child_by_field_name("arguments") + && let Some(first) = first_positional_arg(args) + && let Ok(text) = first.utf8_text(bytes) + && super::arg_is_tainted_param(summary, text) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, found); + } +} + +fn first_positional_arg<'a>(args: Node<'a>) -> Option> { + let mut cur = args.walk(); + args.named_children(&mut cur).next() } impl FrameworkAdapter for JavaThymeleafAdapter { @@ -31,41 +70,29 @@ impl FrameworkAdapter for JavaThymeleafAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_thymeleaf); - let matches_source = file_bytes + let cheap_filter = file_bytes .windows(b"org.thymeleaf".len()) .any(|w| w == b"org.thymeleaf") || file_bytes .windows(b"TemplateEngine".len()) .any(|w| w == b"TemplateEngine"); - if matches_call && matches_source { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !cheap_filter { + return None; } - if matches_source - && file_bytes - .windows(b".process(".len()) - .any(|w| w == b".process(") - { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !ast_confirms_tainted_call(ast, file_bytes, summary) { + return None; } - None + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -80,15 +107,22 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("process")], + ..Default::default() + } + } + #[test] fn fires_on_template_engine_process() { let src: &[u8] = b"import org.thymeleaf.TemplateEngine;\npublic class V { public static String run(String body) { TemplateEngine e = new TemplateEngine(); return e.process(body, null); } }\n"; let tree = parse_java(src); - let summary = FuncSummary { - name: "run".into(), - callees: vec![crate::summary::CalleeSite::bare("process")], - ..Default::default() - }; + let summary = summary_for("run", &["body"], &[0]); assert!(JavaThymeleafAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -107,4 +141,26 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_comment_substring_with_constant_arg() { + // The comment mentions `org.thymeleaf`; the call passes a + // literal — no tainted parameter reaches the engine. + let src: &[u8] = b"// org.thymeleaf.TemplateEngine is great\npublic class V { public static String run(String body) { TemplateEngine e = new TemplateEngine(); return e.process(\"static\", null); } }\n"; + let tree = parse_java(src); + let summary = summary_for("run", &["body"], &[0]); + assert!(JavaThymeleafAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } + + #[test] + fn skips_when_param_not_in_tainted_set() { + let src: &[u8] = b"import org.thymeleaf.TemplateEngine;\npublic class V { public static String run(String body) { TemplateEngine e = new TemplateEngine(); return e.process(body, null); } }\n"; + let tree = parse_java(src); + let summary = summary_for("run", &["body"], &[]); + assert!(JavaThymeleafAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/js_handlebars.rs b/src/dynamic/framework/adapters/js_handlebars.rs index fee5e9d9..84faa6f0 100644 --- a/src/dynamic/framework/adapters/js_handlebars.rs +++ b/src/dynamic/framework/adapters/js_handlebars.rs @@ -4,19 +4,71 @@ //! Phase 04 (Track J.2). Fires when the function body invokes //! `Handlebars.compile()` (matched by the last segment of the //! callee — the call graph normaliser drops the receiver). +//! +//! Strengthened to walk the AST for a real `call_expression` whose +//! first positional argument names a parameter listed in +//! `summary.tainted_sink_params` or `summary.propagating_params`, +//! removing the comment-substring FP. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct JsHandlebarsAdapter; const ADAPTER_NAME: &str = "js-handlebars"; -fn callee_is_handlebars(name: &str) -> bool { - let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); - matches!(last, "compile" | "precompile" | "SafeString") +fn callee_last_segment(name: &str) -> &str { + name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name) +} + +fn is_handlebars_entry(name: &str) -> bool { + matches!( + callee_last_segment(name), + "compile" | "precompile" | "SafeString" + ) +} + +fn ast_confirms_tainted_call(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, &mut found); + found +} + +fn walk(node: Node<'_>, bytes: &[u8], summary: &FuncSummary, found: &mut bool) { + if *found { + return; + } + if node.kind() == "call_expression" + && let Some(func) = node + .child_by_field_name("function") + .and_then(|n| n.utf8_text(bytes).ok()) + && is_handlebars_entry(func) + && let Some(args) = node.child_by_field_name("arguments") + && let Some(first) = first_positional_arg(args) + && let Ok(text) = first.utf8_text(bytes) + && super::arg_is_tainted_param(summary, text) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, found); + } +} + +fn first_positional_arg<'a>(args: Node<'a>) -> Option> { + let mut cur = args.walk(); + for arg in args.named_children(&mut cur) { + if arg.kind() == "spread_element" { + continue; + } + return Some(arg); + } + None } impl FrameworkAdapter for JsHandlebarsAdapter { @@ -31,27 +83,32 @@ impl FrameworkAdapter for JsHandlebarsAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_handlebars); - let matches_source = file_bytes + let cheap_filter = file_bytes .windows(b"handlebars".len()) .any(|w| w.eq_ignore_ascii_case(b"handlebars")) || file_bytes .windows(b"Handlebars".len()) .any(|w| w == b"Handlebars"); - if matches_call && matches_source { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !cheap_filter { + return None; } - None + if !super::any_callee_matches(summary, is_handlebars_entry) { + return None; + } + if !ast_confirms_tainted_call(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -66,15 +123,22 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("compile")], + ..Default::default() + } + } + #[test] fn fires_on_handlebars_compile() { let src: &[u8] = b"const Handlebars = require('handlebars');\nfunction render(body) {\n return Handlebars.compile(body)({});\n}\n"; let tree = parse_js(src); - let summary = FuncSummary { - name: "render".into(), - callees: vec![crate::summary::CalleeSite::bare("compile")], - ..Default::default() - }; + let summary = summary_for("render", &["body"], &[0]); assert!(JsHandlebarsAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -92,4 +156,24 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_comment_substring_with_constant_arg() { + let src: &[u8] = b"// uses Handlebars\nfunction render(body) {\n return Handlebars.compile(\"static\")({});\n}\n"; + let tree = parse_js(src); + let summary = summary_for("render", &["body"], &[0]); + assert!(JsHandlebarsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } + + #[test] + fn skips_when_param_not_in_tainted_set() { + let src: &[u8] = b"const Handlebars = require('handlebars');\nfunction render(body) {\n return Handlebars.compile(body)({});\n}\n"; + let tree = parse_js(src); + let summary = summary_for("render", &["body"], &[]); + assert!(JsHandlebarsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/ldap_php.rs b/src/dynamic/framework/adapters/ldap_php.rs index 5d97ac50..b732ccbc 100644 --- a/src/dynamic/framework/adapters/ldap_php.rs +++ b/src/dynamic/framework/adapters/ldap_php.rs @@ -5,24 +5,38 @@ //! the canonical PHP directory-client entry points (`ldap_search`, //! `ldap_list`, `ldap_read`) and the surrounding source mentions the //! matching `ldap_*` API surface. +//! +//! Strengthened to walk the AST and reject the binding when any of +//! the search call's argument subtrees flows through PHP's +//! `ldap_escape` filter encoder. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct LdapPhpAdapter; const ADAPTER_NAME: &str = "ldap-php"; -fn callee_is_ldap_search(name: &str) -> bool { - let last = name - .rsplit_once("::") +fn callee_last_segment(name: &str) -> &str { + name.rsplit_once("::") .map(|(_, s)| s) .or_else(|| name.rsplit_once('.').map(|(_, s)| s)) .or_else(|| name.rsplit_once("->").map(|(_, s)| s)) - .unwrap_or(name); - matches!(last, "ldap_search" | "ldap_list" | "ldap_read") + .unwrap_or(name) +} + +fn callee_is_ldap_search(name: &str) -> bool { + matches!( + callee_last_segment(name), + "ldap_search" | "ldap_list" | "ldap_read" + ) +} + +fn callee_is_ldap_sanitiser(name: &str) -> bool { + matches!(callee_last_segment(name), "ldap_escape") } fn source_imports_ldap(file_bytes: &[u8]) -> bool { @@ -39,6 +53,68 @@ fn source_imports_ldap(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +fn ast_confirms_unsanitised_search(root: Node<'_>, bytes: &[u8]) -> bool { + let mut found_unsanitised = false; + let mut saw_any_search = false; + walk(root, bytes, &mut found_unsanitised, &mut saw_any_search); + found_unsanitised || !saw_any_search +} + +fn walk(node: Node<'_>, bytes: &[u8], unsanitised: &mut bool, saw_any: &mut bool) { + if *unsanitised { + return; + } + if matches!( + node.kind(), + "function_call_expression" | "member_call_expression" | "scoped_call_expression" + ) && let Some(name) = node + .child_by_field_name("function") + .or_else(|| node.child_by_field_name("name")) + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_ldap_search(name) + { + *saw_any = true; + if let Some(args) = node.child_by_field_name("arguments") + && !args_contain_sanitiser(args, bytes) + { + *unsanitised = true; + return; + } + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, unsanitised, saw_any); + } +} + +fn args_contain_sanitiser(args: Node<'_>, bytes: &[u8]) -> bool { + let mut hit = false; + scan_for_sanitiser(args, bytes, &mut hit); + hit +} + +fn scan_for_sanitiser(node: Node<'_>, bytes: &[u8], hit: &mut bool) { + if *hit { + return; + } + if matches!( + node.kind(), + "function_call_expression" | "member_call_expression" | "scoped_call_expression" + ) && let Some(name) = node + .child_by_field_name("function") + .or_else(|| node.child_by_field_name("name")) + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_ldap_sanitiser(name) + { + *hit = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + scan_for_sanitiser(child, bytes, hit); + } +} + impl FrameworkAdapter for LdapPhpAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -51,23 +127,26 @@ impl FrameworkAdapter for LdapPhpAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_ldap_search); - let matches_source = source_imports_ldap(file_bytes); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !source_imports_ldap(file_bytes) { + return None; } + if !super::any_callee_matches(summary, callee_is_ldap_search) { + return None; + } + if !ast_confirms_unsanitised_search(ast, file_bytes) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -111,4 +190,21 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_filter_arg_is_sanitised() { + let src: &[u8] = b" &str { + name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name) +} + fn callee_is_ldap_search(name: &str) -> bool { - let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); matches!( - last, + callee_last_segment(name), "search_s" | "search_ext_s" | "search" | "search_st" | "search_subtree_s" ) } +fn callee_is_ldap_sanitiser(name: &str) -> bool { + matches!( + callee_last_segment(name), + "escape_filter_chars" | "escape_dn_chars" + ) +} + fn source_imports_ldap(file_bytes: &[u8]) -> bool { const NEEDLES: &[&[u8]] = &[ b"import ldap", @@ -38,6 +54,62 @@ fn source_imports_ldap(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +fn ast_confirms_unsanitised_search(root: Node<'_>, bytes: &[u8]) -> bool { + let mut found_unsanitised = false; + let mut saw_any_search = false; + walk(root, bytes, &mut found_unsanitised, &mut saw_any_search); + found_unsanitised || !saw_any_search +} + +fn walk(node: Node<'_>, bytes: &[u8], unsanitised: &mut bool, saw_any: &mut bool) { + if *unsanitised { + return; + } + if node.kind() == "call" + && let Some(func) = node + .child_by_field_name("function") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_ldap_search(func) + { + *saw_any = true; + if let Some(args) = node.child_by_field_name("arguments") + && !args_contain_sanitiser(args, bytes) + { + *unsanitised = true; + return; + } + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, unsanitised, saw_any); + } +} + +fn args_contain_sanitiser(args: Node<'_>, bytes: &[u8]) -> bool { + let mut hit = false; + scan_for_sanitiser(args, bytes, &mut hit); + hit +} + +fn scan_for_sanitiser(node: Node<'_>, bytes: &[u8], hit: &mut bool) { + if *hit { + return; + } + if node.kind() == "call" + && let Some(func) = node + .child_by_field_name("function") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_ldap_sanitiser(func) + { + *hit = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + scan_for_sanitiser(child, bytes, hit); + } +} + impl FrameworkAdapter for LdapPythonAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -50,23 +122,26 @@ impl FrameworkAdapter for LdapPythonAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_ldap_search); - let matches_source = source_imports_ldap(file_bytes); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !source_imports_ldap(file_bytes) { + return None; } + if !super::any_callee_matches(summary, callee_is_ldap_search) { + return None; + } + if !ast_confirms_unsanitised_search(ast, file_bytes) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -110,4 +185,21 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_filter_arg_is_sanitised() { + let src: &[u8] = b"import ldap\nfrom ldap.filter import escape_filter_chars\n\ + def run(uid):\n\ + con = ldap.initialize('ldap://127.0.0.1')\n\ + return con.search_s('ou=people', ldap.SCOPE_SUBTREE, '(uid=' + escape_filter_chars(uid) + ')')\n"; + let tree = parse_python(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("search_s")], + ..Default::default() + }; + assert!(LdapPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/ldap_spring.rs b/src/dynamic/framework/adapters/ldap_spring.rs index 10f27b10..5d48ac8b 100644 --- a/src/dynamic/framework/adapters/ldap_spring.rs +++ b/src/dynamic/framework/adapters/ldap_spring.rs @@ -8,11 +8,19 @@ //! surrounding source pulls in one of the matching package symbols — //! `org.springframework.ldap.*`, `javax.naming.directory.*`, //! `com.unboundid.ldap.*`. +//! +//! Strengthened to walk the AST and reject the binding when any of +//! the search call's argument subtrees flows through a known LDAP +//! filter encoder (`LdapEncoder.filterEncode`, `Filter.encodeValue`, +//! `LdapUtils.encodeForLDAP`, `encodeForLdapFilter`). That removes +//! the FP where the developer already wrapped the user input in a +//! sanitiser but the adapter still stamped a binding. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct LdapSpringAdapter; @@ -26,6 +34,19 @@ fn callee_is_ldap_search(name: &str) -> bool { ) } +fn callee_is_ldap_sanitiser(name: &str) -> bool { + let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); + matches!( + last, + "filterEncode" + | "encodeValue" + | "encodeForLDAP" + | "encodeForLdapFilter" + | "forLDAPFilter" + | "forLDAP" + ) +} + fn source_imports_ldap(file_bytes: &[u8]) -> bool { const NEEDLES: &[&[u8]] = &[ b"org.springframework.ldap", @@ -42,6 +63,70 @@ fn source_imports_ldap(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +/// True when any `method_invocation` in the file is a recognised LDAP +/// search whose argument list does NOT pass through a known LDAP +/// filter encoder. Bare-search calls (no encoder anywhere) keep +/// firing; pre-sanitised calls bail out. +fn ast_confirms_unsanitised_search(root: Node<'_>, bytes: &[u8]) -> bool { + let mut found_unsanitised = false; + let mut saw_any_search = false; + walk(root, bytes, &mut found_unsanitised, &mut saw_any_search); + // Conservative: when no AST search call was found at all, fall + // through and let the cheap-filter / callee branch decide. When + // AST search calls were seen, require at least one without a + // sanitiser wrap. + found_unsanitised || !saw_any_search +} + +fn walk(node: Node<'_>, bytes: &[u8], unsanitised: &mut bool, saw_any: &mut bool) { + if *unsanitised { + return; + } + if node.kind() == "method_invocation" + && let Some(name) = node + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_ldap_search(name) + { + *saw_any = true; + if let Some(args) = node.child_by_field_name("arguments") + && !args_contain_sanitiser(args, bytes) + { + *unsanitised = true; + return; + } + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, unsanitised, saw_any); + } +} + +fn args_contain_sanitiser(args: Node<'_>, bytes: &[u8]) -> bool { + let mut hit = false; + scan_for_sanitiser(args, bytes, &mut hit); + hit +} + +fn scan_for_sanitiser(node: Node<'_>, bytes: &[u8], hit: &mut bool) { + if *hit { + return; + } + if node.kind() == "method_invocation" + && let Some(name) = node + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_ldap_sanitiser(name) + { + *hit = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + scan_for_sanitiser(child, bytes, hit); + } +} + impl FrameworkAdapter for LdapSpringAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -54,36 +139,30 @@ impl FrameworkAdapter for LdapSpringAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_ldap_search); - let matches_source = source_imports_ldap(file_bytes); - if matches_call && matches_source { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !source_imports_ldap(file_bytes) { + return None; } - if matches_source - && file_bytes + let matches_call = super::any_callee_matches(summary, callee_is_ldap_search) + || file_bytes .windows(b".search(".len()) - .any(|w| w == b".search(") - { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + .any(|w| w == b".search("); + if !matches_call { + return None; } - None + if !ast_confirms_unsanitised_search(ast, file_bytes) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -130,4 +209,24 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_filter_arg_is_sanitised() { + // The user input is wrapped in LdapEncoder.filterEncode before + // it reaches LdapTemplate.search; the binding must not fire. + let src: &[u8] = b"import org.springframework.ldap.core.LdapTemplate;\n\ + import org.springframework.ldap.support.LdapEncoder;\n\ + public class V {\n public Object run(String uid, LdapTemplate t) {\n\ + return t.search(\"ou=people\", \"(uid=\" + LdapEncoder.filterEncode(uid) + \")\", null);\n\ + }\n}\n"; + let tree = parse_java(src); + let summary = FuncSummary { + name: "run".into(), + callees: vec![crate::summary::CalleeSite::bare("search")], + ..Default::default() + }; + assert!(LdapSpringAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/mod.rs b/src/dynamic/framework/adapters/mod.rs index 0a2fe08d..a77d6381 100644 --- a/src/dynamic/framework/adapters/mod.rs +++ b/src/dynamic/framework/adapters/mod.rs @@ -226,3 +226,240 @@ fn any_callee_matches( .iter() .any(|c| predicate(c.name.as_str())) } + +/// True when `arg_text` resolves to a function parameter whose 0-based +/// index participates in taint flow — either listed in +/// `summary.tainted_sink_params` (param reaches an internal sink) or +/// `summary.propagating_params` (param flows to the return value). +/// +/// Used by the Phase 04 SSTI / Phase 05 XXE / Phase 06 LDAP adapters to +/// reject substring matches in comments by confirming the call's first +/// argument is a real tainted variable rather than a string literal or +/// an unrelated local. +/// +/// Per-language sigil stripping covers PHP (`$x`), Ruby (`@x`), and +/// Java/Python/JS (no sigil). Leading whitespace is also trimmed so +/// adapters can pass the raw `utf8_text` of the argument node. +pub(super) fn arg_is_tainted_param( + summary: &crate::summary::FuncSummary, + arg_text: &str, +) -> bool { + fn strip(s: &str) -> &str { + s.trim() + .trim_start_matches('$') + .trim_start_matches('@') + .trim_start_matches('&') + } + let needle = strip(arg_text); + let Some(idx) = summary + .param_names + .iter() + .position(|p| strip(p) == needle) + else { + return false; + }; + summary.tainted_sink_params.iter().any(|&i| i == idx) + || summary.propagating_params.iter().any(|&i| i == idx) +} + +/// True when any descendant identifier in `node`'s subtree resolves to +/// a function parameter whose 0-based index participates in taint flow +/// (same membership rule as [`arg_is_tainted_param`]). +/// +/// Used by Phase 07 XPath adapters where the sink call's expression +/// argument is typically a concat (`"//user[@name='" + name + "'"`) +/// rather than a bare identifier — the walker collects every +/// identifier-shaped leaf and checks each against the summary's +/// tainted-param set. Pure-literal expressions and concats over +/// unrelated locals fall through. +/// +/// `function_scope` is the enclosing function-body subtree. When a +/// direct identifier in `node` is not itself a tainted param, the +/// walker chases its local assignment within `function_scope` and +/// inspects the RHS for tainted-param references (one hop, enough to +/// cover the common `expr = "..." + name + "..."; eval(expr)` shape +/// without dragging full intra-procedural data flow into the +/// adapter). +pub(super) fn subtree_contains_tainted_param( + node: tree_sitter::Node<'_>, + bytes: &[u8], + summary: &crate::summary::FuncSummary, + function_scope: Option>, +) -> bool { + if summary.tainted_sink_params.is_empty() && summary.propagating_params.is_empty() { + return false; + } + let mut hit = false; + walk_for_param(node, bytes, summary, function_scope, &mut hit); + hit +} + +fn walk_for_param( + node: tree_sitter::Node<'_>, + bytes: &[u8], + summary: &crate::summary::FuncSummary, + function_scope: Option>, + hit: &mut bool, +) { + if *hit { + return; + } + if matches!( + node.kind(), + "identifier" + | "variable_name" + | "simple_identifier" + | "name" + | "type_identifier" + | "scoped_identifier" + | "field_identifier" + | "property_identifier" + ) && let Ok(text) = node.utf8_text(bytes) + { + if arg_is_tainted_param(summary, text) { + *hit = true; + return; + } + if let Some(scope) = function_scope + && let Some(rhs) = find_local_assignment_rhs(scope, bytes, text) + { + let mut inner = false; + walk_for_param_no_chase(rhs, bytes, summary, &mut inner); + if inner { + *hit = true; + return; + } + } + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_for_param(child, bytes, summary, function_scope, hit); + } +} + +fn walk_for_param_no_chase( + node: tree_sitter::Node<'_>, + bytes: &[u8], + summary: &crate::summary::FuncSummary, + hit: &mut bool, +) { + if *hit { + return; + } + if matches!( + node.kind(), + "identifier" + | "variable_name" + | "simple_identifier" + | "name" + | "type_identifier" + | "scoped_identifier" + | "field_identifier" + | "property_identifier" + ) && let Ok(text) = node.utf8_text(bytes) + && arg_is_tainted_param(summary, text) + { + *hit = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk_for_param_no_chase(child, bytes, summary, hit); + } +} + +fn find_local_assignment_rhs<'a>( + scope: tree_sitter::Node<'a>, + bytes: &[u8], + name: &str, +) -> Option> { + fn strip(s: &str) -> &str { + s.trim() + .trim_start_matches('$') + .trim_start_matches('@') + .trim_start_matches('&') + } + let needle = strip(name); + let mut hit: Option> = None; + visit(scope, bytes, needle, &mut hit); + return hit; + + fn visit<'a>( + node: tree_sitter::Node<'a>, + bytes: &[u8], + needle: &str, + hit: &mut Option>, + ) { + if hit.is_some() { + return; + } + match node.kind() { + // Python `expr = rhs` / Ruby `expr = rhs` / + // JS `expr = rhs` (no `let`). + "assignment" | "assignment_expression" => { + let lhs = node + .child_by_field_name("left") + .or_else(|| node.named_child(0)); + let rhs = node + .child_by_field_name("right") + .or_else(|| node.named_child(1)); + if let (Some(lhs), Some(rhs)) = (lhs, rhs) + && let Ok(text) = lhs.utf8_text(bytes) + && strip_sigils(text) == needle + { + *hit = Some(rhs); + return; + } + } + // JS `let/const expr = rhs` / TS variant. + "variable_declarator" => { + let name_node = node + .child_by_field_name("name") + .or_else(|| node.named_child(0)); + let value = node + .child_by_field_name("value") + .or_else(|| node.named_child(1)); + if let (Some(n), Some(v)) = (name_node, value) + && let Ok(text) = n.utf8_text(bytes) + && strip_sigils(text) == needle + { + *hit = Some(v); + return; + } + } + // Java `Type expr = rhs;`. + "local_variable_declaration" => { + let mut cur = node.walk(); + for child in node.named_children(&mut cur) { + if child.kind() == "variable_declarator" { + let n = child + .child_by_field_name("name") + .or_else(|| child.named_child(0)); + let v = child + .child_by_field_name("value") + .or_else(|| child.named_child(1)); + if let (Some(n), Some(v)) = (n, v) + && let Ok(text) = n.utf8_text(bytes) + && strip_sigils(text) == needle + { + *hit = Some(v); + return; + } + } + } + } + _ => {} + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + visit(child, bytes, needle, hit); + } + } +} + +pub(super) fn strip_sigils(s: &str) -> &str { + s.trim() + .trim_start_matches('$') + .trim_start_matches('@') + .trim_start_matches('&') +} diff --git a/src/dynamic/framework/adapters/php_twig.rs b/src/dynamic/framework/adapters/php_twig.rs index c33dc7ba..01a29ec0 100644 --- a/src/dynamic/framework/adapters/php_twig.rs +++ b/src/dynamic/framework/adapters/php_twig.rs @@ -6,25 +6,75 @@ //! `$twig->render($tainted)`. Callee matching is last-segment so //! receiver-prefixed calls (`$env->render`, //! `Twig\Environment::createTemplate`) hit the same predicate. +//! +//! Strengthened to walk the AST for a real `member_call_expression` +//! or `scoped_call_expression` whose first positional argument names +//! a parameter listed in `summary.tainted_sink_params` or +//! `summary.propagating_params`, removing the comment-substring FP. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct PhpTwigAdapter; const ADAPTER_NAME: &str = "php-twig"; fn callee_is_twig(name: &str) -> bool { - let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); - let last = last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last); matches!( - last, + name, "createTemplate" | "render" | "renderBlock" | "display" ) } +fn ast_confirms_tainted_call(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, &mut found); + found +} + +fn walk(node: Node<'_>, bytes: &[u8], summary: &FuncSummary, found: &mut bool) { + if *found { + return; + } + if matches!( + node.kind(), + "member_call_expression" | "scoped_call_expression" | "function_call_expression" + ) && let Some(name) = node + .child_by_field_name("name") + .or_else(|| node.child_by_field_name("function")) + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_twig(name) + && let Some(args) = node.child_by_field_name("arguments") + && let Some(text) = first_positional_arg_text(args, bytes) + && super::arg_is_tainted_param(summary, &text) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, found); + } +} + +fn first_positional_arg_text(args: Node<'_>, bytes: &[u8]) -> Option { + let mut cur = args.walk(); + for arg in args.named_children(&mut cur) { + if arg.kind() != "argument" { + continue; + } + if arg.child_by_field_name("name").is_some() { + continue; + } + let value = arg.named_child(0)?; + return value.utf8_text(bytes).ok().map(|s| s.to_owned()); + } + None +} + impl FrameworkAdapter for PhpTwigAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -37,11 +87,10 @@ impl FrameworkAdapter for PhpTwigAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_twig); - let matches_source = file_bytes + let cheap_filter = file_bytes .windows(b"Twig\\Environment".len()) .any(|w| w == b"Twig\\Environment") || file_bytes @@ -53,17 +102,20 @@ impl FrameworkAdapter for PhpTwigAdapter { || file_bytes .windows(b"createTemplate".len()) .any(|w| w == b"createTemplate"); - if matches_call && matches_source { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !cheap_filter { + return None; } - None + if !ast_confirms_tainted_call(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -78,15 +130,21 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + ..Default::default() + } + } + #[test] fn fires_on_create_template() { let src: &[u8] = b"createTemplate($body);\n return $tpl->render([]);\n}\n"; let tree = parse_php(src); - let summary = FuncSummary { - name: "render".into(), - callees: vec![crate::summary::CalleeSite::bare("createTemplate")], - ..Default::default() - }; + let summary = summary_for("render", &["body", "twig"], &[0]); assert!(PhpTwigAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -104,4 +162,26 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_comment_substring_with_constant_arg() { + // The comment mentions `Twig\Environment` and the call uses a + // literal — no tainted parameter reaches the engine. + let src: &[u8] = b"createTemplate('static');\n return $tpl->render([]);\n}\n"; + let tree = parse_php(src); + let summary = summary_for("render", &["body", "twig"], &[0]); + assert!(PhpTwigAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } + + #[test] + fn skips_when_param_not_in_tainted_set() { + let src: &[u8] = b"createTemplate($body);\n return $tpl->render([]);\n}\n"; + let tree = parse_php(src); + let summary = summary_for("render", &["body", "twig"], &[]); + assert!(PhpTwigAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/python_jinja2.rs b/src/dynamic/framework/adapters/python_jinja2.rs index 49f7aa02..895bdd4a 100644 --- a/src/dynamic/framework/adapters/python_jinja2.rs +++ b/src/dynamic/framework/adapters/python_jinja2.rs @@ -6,24 +6,77 @@ //! `render_template_string()`. Callee matching is //! last-segment so receiver-prefixed calls (`env.from_string`, //! `flask.render_template_string`) hit the same predicate. +//! +//! The cheap byte-grep on `jinja2` / `from_string` / +//! `render_template_string` is kept as an early filter, but the +//! binding only fires after a tree-sitter walk confirms a real call +//! node whose first argument names a function parameter listed in +//! `summary.tainted_sink_params` or `summary.propagating_params`. +//! That removes the comment-substring FP (a docstring mentioning +//! `jinja2.Template` plus an unrelated `Template(constant)` call no +//! longer trips the adapter). use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct PythonJinja2Adapter; const ADAPTER_NAME: &str = "python-jinja2"; -fn callee_is_jinja2(name: &str) -> bool { - let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); +fn callee_last_segment(name: &str) -> &str { + name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name) +} + +fn is_jinja2_entry(name: &str) -> bool { matches!( - last, + callee_last_segment(name), "Template" | "from_string" | "render_template_string" ) } +fn ast_confirms_tainted_call(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, &mut found); + found +} + +fn walk(node: Node<'_>, bytes: &[u8], summary: &FuncSummary, found: &mut bool) { + if *found { + return; + } + if node.kind() == "call" + && let Some(func) = node + .child_by_field_name("function") + .and_then(|n| n.utf8_text(bytes).ok()) + && is_jinja2_entry(func) + && let Some(args) = node.child_by_field_name("arguments") + && let Some(first) = first_positional_arg(args) + && let Ok(text) = first.utf8_text(bytes) + && super::arg_is_tainted_param(summary, text) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, found); + } +} + +fn first_positional_arg<'a>(args: Node<'a>) -> Option> { + let mut cur = args.walk(); + for arg in args.named_children(&mut cur) { + if arg.kind() == "keyword_argument" { + continue; + } + return Some(arg); + } + None +} + impl FrameworkAdapter for PythonJinja2Adapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -36,11 +89,10 @@ impl FrameworkAdapter for PythonJinja2Adapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_jinja2); - let matches_source = file_bytes + let cheap_filter = file_bytes .windows(b"jinja2".len()) .any(|w| w == b"jinja2") || file_bytes @@ -49,18 +101,23 @@ impl FrameworkAdapter for PythonJinja2Adapter { || file_bytes .windows(b"render_template_string".len()) .any(|w| w == b"render_template_string"); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !cheap_filter { + return None; } + if !super::any_callee_matches(summary, is_jinja2_entry) { + return None; + } + if !ast_confirms_tainted_call(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -75,16 +132,23 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("Template")], + ..Default::default() + } + } + #[test] fn fires_when_source_imports_jinja2() { let src: &[u8] = b"from jinja2 import Template\ndef render(body):\n return Template(body).render()\n"; let tree = parse_python(src); - let summary = FuncSummary { - name: "render".into(), - callees: vec![crate::summary::CalleeSite::bare("Template")], - ..Default::default() - }; + let summary = summary_for("render", &["body"], &[0]); assert!(PythonJinja2Adapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -95,11 +159,8 @@ mod tests { let src: &[u8] = b"from flask import render_template_string\ndef view(body):\n return render_template_string(body)\n"; let tree = parse_python(src); - let summary = FuncSummary { - name: "view".into(), - callees: vec![crate::summary::CalleeSite::bare("render_template_string")], - ..Default::default() - }; + let mut summary = summary_for("view", &["body"], &[0]); + summary.callees = vec![crate::summary::CalleeSite::bare("render_template_string")]; assert!(PythonJinja2Adapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -117,4 +178,29 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_comment_substring_with_constant_arg() { + // Docstring mentions jinja2; the actual call passes a string + // literal — no parameter taint reaches the engine. + let src: &[u8] = b"\"\"\"renders via jinja2.Template\"\"\"\ndef render(body):\n return Template(\"hello\").render()\n"; + let tree = parse_python(src); + let summary = summary_for("render", &["body"], &[0]); + assert!(PythonJinja2Adapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } + + #[test] + fn skips_when_param_not_in_tainted_set() { + // Engine never flagged `body` as tainted (no taint reached an + // internal sink in pass 1); the adapter must not stamp. + let src: &[u8] = + b"from jinja2 import Template\ndef render(body):\n return Template(body).render()\n"; + let tree = parse_python(src); + let summary = summary_for("render", &["body"], &[]); + assert!(PythonJinja2Adapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/ruby_erb.rs b/src/dynamic/framework/adapters/ruby_erb.rs index 3506702b..95ad27c1 100644 --- a/src/dynamic/framework/adapters/ruby_erb.rs +++ b/src/dynamic/framework/adapters/ruby_erb.rs @@ -5,19 +5,68 @@ //! variant). Callee matching is last-segment-aware so namespaced //! receivers (`Erubi::Engine.new`) reduce to `new` + a string-level //! check for the surrounding `ERB` / `Erubi` token in the source. +//! +//! Strengthened to require a real `call` node whose first positional +//! argument names a parameter listed in `summary.tainted_sink_params` +//! or `summary.propagating_params`, removing the comment-substring FP. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct RubyErbAdapter; const ADAPTER_NAME: &str = "ruby-erb"; -fn callee_is_erb(name: &str) -> bool { +fn callee_last_segment(name: &str) -> &str { let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); - matches!(last, "result" | "result_with_hash" | "new") + last.rsplit_once("::").map(|(_, s)| s).unwrap_or(last) +} + +fn is_erb_entry(name: &str) -> bool { + matches!(callee_last_segment(name), "result" | "result_with_hash" | "new") +} + +fn ast_confirms_tainted_call(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, &mut found); + found +} + +fn walk(node: Node<'_>, bytes: &[u8], summary: &FuncSummary, found: &mut bool) { + if *found { + return; + } + if matches!(node.kind(), "call" | "method_call") + && let Some(method) = node + .child_by_field_name("method") + .and_then(|n| n.utf8_text(bytes).ok()) + && is_erb_entry(method) + && let Some(args) = node.child_by_field_name("arguments") + && let Some(first) = first_positional_arg(args) + && let Ok(text) = first.utf8_text(bytes) + && super::arg_is_tainted_param(summary, text) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, found); + } +} + +fn first_positional_arg<'a>(args: Node<'a>) -> Option> { + let mut cur = args.walk(); + for arg in args.named_children(&mut cur) { + if matches!(arg.kind(), "pair" | "hash_splat_argument" | "block_argument") { + continue; + } + return Some(arg); + } + None } impl FrameworkAdapter for RubyErbAdapter { @@ -32,11 +81,10 @@ impl FrameworkAdapter for RubyErbAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_erb); - let matches_source = file_bytes + let cheap_filter = file_bytes .windows(b"ERB.new".len()) .any(|w| w == b"ERB.new") || file_bytes @@ -48,31 +96,20 @@ impl FrameworkAdapter for RubyErbAdapter { || file_bytes .windows(b"Erubi".len()) .any(|w| w == b"Erubi"); - if matches_call && matches_source { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !cheap_filter { + return None; } - if matches_source - && file_bytes - .windows(b".result".len()) - .any(|w| w == b".result") - { - return Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }); + if !ast_confirms_tainted_call(ast, file_bytes, summary) { + return None; } - None + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -87,14 +124,21 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + ..Default::default() + } + } + #[test] fn fires_on_erb_new_result() { let src: &[u8] = b"require 'erb'\ndef render(body)\n ERB.new(body).result\nend\n"; let tree = parse_ruby(src); - let summary = FuncSummary { - name: "render".into(), - ..Default::default() - }; + let summary = summary_for("render", &["body"], &[0]); assert!(RubyErbAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -112,4 +156,25 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_comment_substring_with_constant_arg() { + let src: &[u8] = + b"# require 'erb' is mentioned\ndef render(body)\n ERB.new(\"static\").result\nend\n"; + let tree = parse_ruby(src); + let summary = summary_for("render", &["body"], &[0]); + assert!(RubyErbAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } + + #[test] + fn skips_when_param_not_in_tainted_set() { + let src: &[u8] = b"require 'erb'\ndef render(body)\n ERB.new(body).result\nend\n"; + let tree = parse_ruby(src); + let summary = summary_for("render", &["body"], &[]); + assert!(RubyErbAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/xpath_java.rs b/src/dynamic/framework/adapters/xpath_java.rs index 27e5aebd..eb23eefa 100644 --- a/src/dynamic/framework/adapters/xpath_java.rs +++ b/src/dynamic/framework/adapters/xpath_java.rs @@ -7,11 +7,19 @@ //! and the surrounding source pulls in one of the matching package //! symbols — `javax.xml.xpath.*`, `XPathFactory`, //! `XPathConstants.NODESET`. +//! +//! Strengthened to walk the AST and only fire when the evaluator's +//! expression argument carries a tainted-param identifier in its +//! subtree. Pre-bound parameterised queries (`xp.setVariable("name", +//! input)` + `xp.evaluate("//user[@name=$name]")`) leave the +//! expression as a string literal, so the walker sees no tainted +//! identifier and the binding is skipped. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct XpathJavaAdapter; @@ -35,6 +43,39 @@ fn source_imports_xpath(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +fn ast_confirms_tainted_xpath(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, root, &mut found); + found +} + +fn walk<'a>( + node: Node<'a>, + bytes: &[u8], + summary: &FuncSummary, + scope: Node<'a>, + found: &mut bool, +) { + if *found { + return; + } + if node.kind() == "method_invocation" + && let Some(name) = node + .child_by_field_name("name") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_xpath_eval(name) + && let Some(args) = node.child_by_field_name("arguments") + && super::subtree_contains_tainted_param(args, bytes, summary, Some(scope)) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, scope, found); + } +} + impl FrameworkAdapter for XpathJavaAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -47,23 +88,26 @@ impl FrameworkAdapter for XpathJavaAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); - let matches_source = source_imports_xpath(file_bytes); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !source_imports_xpath(file_bytes) { + return None; } + if !super::any_callee_matches(summary, callee_is_xpath_eval) { + return None; + } + if !ast_confirms_tainted_xpath(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -78,6 +122,17 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("evaluate")], + ..Default::default() + } + } + #[test] fn fires_on_xpath_evaluate() { let src: &[u8] = b"import javax.xml.xpath.XPathFactory;\n\ @@ -86,11 +141,7 @@ mod tests { return xp.evaluate(\"//user[@name='\" + name + \"']\", null);\n\ }\n}\n"; let tree = parse_java(src); - let summary = FuncSummary { - name: "run".into(), - callees: vec![crate::summary::CalleeSite::bare("evaluate")], - ..Default::default() - }; + let summary = summary_for("run", &["name"], &[0]); let binding = XpathJavaAdapter .detect(&summary, tree.root_node(), src) .expect("must fire on XPath.evaluate"); @@ -111,4 +162,22 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_expression_uses_bound_variable() { + // The expression is a literal containing `$name`; the actual + // input is bound via `xp.setVariable`. No tainted identifier + // appears inside `evaluate`'s argument subtree. + let src: &[u8] = b"import javax.xml.xpath.XPathFactory;\n\ + public class V {\n public Object run(String name) throws Exception {\n\ + javax.xml.xpath.XPath xp = XPathFactory.newInstance().newXPath();\n\ + xp.setXPathVariableResolver(new Resolver(name));\n\ + return xp.evaluate(\"//user[@name=$name]\", null);\n\ + }\n}\n"; + let tree = parse_java(src); + let summary = summary_for("run", &["name"], &[0]); + assert!(XpathJavaAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/xpath_js.rs b/src/dynamic/framework/adapters/xpath_js.rs index f83088f1..0b868363 100644 --- a/src/dynamic/framework/adapters/xpath_js.rs +++ b/src/dynamic/framework/adapters/xpath_js.rs @@ -6,11 +6,18 @@ //! browser DOM's `document.evaluate`) and the surrounding source //! imports / requires the `xpath` module or references //! `XPathResult` / `document.evaluate`. +//! +//! Strengthened to walk the AST and only fire when the selector's +//! expression argument carries a tainted-param identifier in its +//! subtree. Bound queries that build the expression as a literal +//! and pass variables separately (`xpath.parse(expr).select({ vars +//! })`) leave the first arg literal-only and skip the binding. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct XpathJsAdapter; @@ -37,6 +44,39 @@ fn source_imports_xpath(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +fn ast_confirms_tainted_xpath(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, root, &mut found); + found +} + +fn walk<'a>( + node: Node<'a>, + bytes: &[u8], + summary: &FuncSummary, + scope: Node<'a>, + found: &mut bool, +) { + if *found { + return; + } + if node.kind() == "call_expression" + && let Some(func) = node + .child_by_field_name("function") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_xpath_eval(func) + && let Some(args) = node.child_by_field_name("arguments") + && super::subtree_contains_tainted_param(args, bytes, summary, Some(scope)) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, scope, found); + } +} + impl FrameworkAdapter for XpathJsAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -49,23 +89,26 @@ impl FrameworkAdapter for XpathJsAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); - let matches_source = source_imports_xpath(file_bytes); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !source_imports_xpath(file_bytes) { + return None; } + if !super::any_callee_matches(summary, callee_is_xpath_eval) { + return None; + } + if !ast_confirms_tainted_xpath(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -80,6 +123,17 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("select")], + ..Default::default() + } + } + #[test] fn fires_on_xpath_select() { let src: &[u8] = b"const xpath = require('xpath');\n\ @@ -87,11 +141,7 @@ mod tests { return xpath.select(\"//user[@name='\" + name + \"']\", doc);\n\ }\nmodule.exports = { run };\n"; let tree = parse_js(src); - let summary = FuncSummary { - name: "run".into(), - callees: vec![crate::summary::CalleeSite::bare("select")], - ..Default::default() - }; + let summary = summary_for("run", &["name"], &[0]); assert!(XpathJsAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -109,4 +159,17 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_expression_is_literal_only() { + let src: &[u8] = b"const xpath = require('xpath');\n\ + function run(name) {\n\ + return xpath.select(\"//user[@id=1]\", doc);\n\ + }\nmodule.exports = { run };\n"; + let tree = parse_js(src); + let summary = summary_for("run", &["name"], &[0]); + assert!(XpathJsAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/xpath_php.rs b/src/dynamic/framework/adapters/xpath_php.rs index 0a99ae3e..fd22c3d4 100644 --- a/src/dynamic/framework/adapters/xpath_php.rs +++ b/src/dynamic/framework/adapters/xpath_php.rs @@ -4,11 +4,17 @@ //! Phase 07 (Track J.5). Fires when the function body invokes //! `DOMXPath::query` / `DOMXPath::evaluate` and the surrounding //! source pulls in the `DOMXPath` / `DOMDocument` family. +//! +//! Strengthened to walk the AST and only fire when the query call's +//! expression argument carries a tainted-param identifier in its +//! subtree. Pure-literal expressions (`$xp->query("//user[@id=1]")`) +//! produce no tainted-identifier hit and the binding is skipped. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct XpathPhpAdapter; @@ -33,6 +39,42 @@ fn source_uses_domxpath(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +fn ast_confirms_tainted_xpath(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, root, &mut found); + found +} + +fn walk<'a>( + node: Node<'a>, + bytes: &[u8], + summary: &FuncSummary, + scope: Node<'a>, + found: &mut bool, +) { + if *found { + return; + } + if matches!( + node.kind(), + "member_call_expression" | "scoped_call_expression" | "function_call_expression" + ) && let Some(name) = node + .child_by_field_name("name") + .or_else(|| node.child_by_field_name("function")) + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_xpath_eval(name) + && let Some(args) = node.child_by_field_name("arguments") + && super::subtree_contains_tainted_param(args, bytes, summary, Some(scope)) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, scope, found); + } +} + impl FrameworkAdapter for XpathPhpAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -45,23 +87,26 @@ impl FrameworkAdapter for XpathPhpAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); - let matches_source = source_uses_domxpath(file_bytes); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !source_uses_domxpath(file_bytes) { + return None; } + if !super::any_callee_matches(summary, callee_is_xpath_eval) { + return None; + } + if !ast_confirms_tainted_xpath(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -76,6 +121,17 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("query")], + ..Default::default() + } + } + #[test] fn fires_on_domxpath_query() { let src: &[u8] = b"query(\"//user[@name='\" . $name . \"']\");\n\ }\n"; let tree = parse_php(src); - let summary = FuncSummary { - name: "run".into(), - callees: vec![crate::summary::CalleeSite::bare("query")], - ..Default::default() - }; + let summary = summary_for("run", &["name"], &[0]); assert!(XpathPhpAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -108,4 +160,20 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_expression_is_literal_only() { + let src: &[u8] = b"load('xpath_corpus.xml');\n\ + $xp = new DOMXPath($doc);\n\ + return $xp->query(\"//user[@id=1]\");\n\ + }\n"; + let tree = parse_php(src); + let summary = summary_for("run", &["name"], &[0]); + assert!(XpathPhpAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/src/dynamic/framework/adapters/xpath_python.rs b/src/dynamic/framework/adapters/xpath_python.rs index 8a1e1f4e..59cba13f 100644 --- a/src/dynamic/framework/adapters/xpath_python.rs +++ b/src/dynamic/framework/adapters/xpath_python.rs @@ -4,11 +4,20 @@ //! Phase 07 (Track J.5). Fires when the function body invokes //! `lxml.etree`'s XPath entry points (`Element.xpath`, `xpath`, //! `XPath` evaluator) and the surrounding source imports `lxml`. +//! +//! Strengthened to walk the AST and only fire when the evaluator's +//! expression argument carries a tainted-param identifier in its +//! subtree. Pre-bound parameterised queries +//! (`etree.XPath("//user[@name=$name]")(tree, name=name)`) keep the +//! template string literal-only, so the walker sees no tainted +//! identifier inside the call to `XPath` / `xpath` and the binding +//! is skipped. use crate::dynamic::framework::{FrameworkAdapter, FrameworkBinding}; use crate::evidence::EntryKind; use crate::summary::FuncSummary; use crate::symbol::Lang; +use tree_sitter::Node; pub struct XpathPythonAdapter; @@ -16,7 +25,7 @@ const ADAPTER_NAME: &str = "xpath-python"; fn callee_is_xpath_eval(name: &str) -> bool { let last = name.rsplit_once('.').map(|(_, s)| s).unwrap_or(name); - matches!(last, "xpath" | "evaluate" | "find" | "findall" | "iterfind") + matches!(last, "xpath" | "evaluate" | "find" | "findall" | "iterfind" | "XPath") } fn source_imports_lxml(file_bytes: &[u8]) -> bool { @@ -34,6 +43,39 @@ fn source_imports_lxml(file_bytes: &[u8]) -> bool { .any(|n| file_bytes.windows(n.len()).any(|w| w == *n)) } +fn ast_confirms_tainted_xpath(root: Node<'_>, bytes: &[u8], summary: &FuncSummary) -> bool { + let mut found = false; + walk(root, bytes, summary, root, &mut found); + found +} + +fn walk<'a>( + node: Node<'a>, + bytes: &[u8], + summary: &FuncSummary, + scope: Node<'a>, + found: &mut bool, +) { + if *found { + return; + } + if node.kind() == "call" + && let Some(func) = node + .child_by_field_name("function") + .and_then(|n| n.utf8_text(bytes).ok()) + && callee_is_xpath_eval(func) + && let Some(args) = node.child_by_field_name("arguments") + && super::subtree_contains_tainted_param(args, bytes, summary, Some(scope)) + { + *found = true; + return; + } + let mut cur = node.walk(); + for child in node.children(&mut cur) { + walk(child, bytes, summary, scope, found); + } +} + impl FrameworkAdapter for XpathPythonAdapter { fn name(&self) -> &'static str { ADAPTER_NAME @@ -46,23 +88,26 @@ impl FrameworkAdapter for XpathPythonAdapter { fn detect( &self, summary: &FuncSummary, - _ast: tree_sitter::Node<'_>, + ast: tree_sitter::Node<'_>, file_bytes: &[u8], ) -> Option { - let matches_call = super::any_callee_matches(summary, callee_is_xpath_eval); - let matches_source = source_imports_lxml(file_bytes); - if matches_call && matches_source { - Some(FrameworkBinding { - adapter: ADAPTER_NAME.to_owned(), - kind: EntryKind::Function, - route: None, - request_params: Vec::new(), - response_writer: None, - middleware: Vec::new(), - }) - } else { - None + if !source_imports_lxml(file_bytes) { + return None; } + if !super::any_callee_matches(summary, callee_is_xpath_eval) { + return None; + } + if !ast_confirms_tainted_xpath(ast, file_bytes, summary) { + return None; + } + Some(FrameworkBinding { + adapter: ADAPTER_NAME.to_owned(), + kind: EntryKind::Function, + route: None, + request_params: Vec::new(), + response_writer: None, + middleware: Vec::new(), + }) } } @@ -77,6 +122,17 @@ mod tests { parser.parse(src, None).unwrap() } + fn summary_for(name: &str, params: &[&str], tainted: &[usize]) -> FuncSummary { + FuncSummary { + name: name.into(), + param_count: params.len(), + param_names: params.iter().map(|s| (*s).to_owned()).collect(), + tainted_sink_params: tainted.to_vec(), + callees: vec![crate::summary::CalleeSite::bare("xpath")], + ..Default::default() + } + } + #[test] fn fires_on_lxml_xpath() { let src: &[u8] = b"from lxml import etree\n\ @@ -84,11 +140,7 @@ mod tests { tree = etree.fromstring(open('xpath_corpus.xml').read())\n\ return tree.xpath(\"//user[@name='\" + name + \"']\")\n"; let tree = parse_python(src); - let summary = FuncSummary { - name: "run".into(), - callees: vec![crate::summary::CalleeSite::bare("xpath")], - ..Default::default() - }; + let summary = summary_for("run", &["name"], &[0]); assert!(XpathPythonAdapter .detect(&summary, tree.root_node(), src) .is_some()); @@ -106,4 +158,18 @@ mod tests { .detect(&summary, tree.root_node(), src) .is_none()); } + + #[test] + fn skips_when_expression_uses_bound_variable() { + let src: &[u8] = b"from lxml import etree\n\ + def run(name):\n\ + tree = etree.fromstring(open('xpath_corpus.xml').read())\n\ + q = etree.XPath(\"//user[@name=$name]\")\n\ + return q(tree, name=name)\n"; + let tree = parse_python(src); + let summary = summary_for("run", &["name"], &[0]); + assert!(XpathPythonAdapter + .detect(&summary, tree.root_node(), src) + .is_none()); + } } diff --git a/tests/dynamic_sandbox_escape.rs b/tests/dynamic_sandbox_escape.rs index f7acd9f1..db92c59f 100644 --- a/tests/dynamic_sandbox_escape.rs +++ b/tests/dynamic_sandbox_escape.rs @@ -509,19 +509,17 @@ mod escape_tests { let opts = escape_opts(); // First run — starts a new container. - let r1 = sandbox::run(&harness, &noop_payload(), &opts); + let r1 = sandbox::run(&harness, noop_payload(), &opts); // Second run — should exec into the running container. - let r2 = sandbox::run(&harness, &noop_payload(), &opts); + let r2 = sandbox::run(&harness, noop_payload(), &opts); // Both should succeed (blocked, not escaped — dns_leak exits 1). // The important thing is neither panics or returns an unexpected error. - match r1 { - Err(SandboxError::BackendUnavailable(_)) => return, - _ => {} + if let Err(SandboxError::BackendUnavailable(_)) = r1 { + return; } - match r2 { - Err(SandboxError::BackendUnavailable(_)) => return, - _ => {} + if let Err(SandboxError::BackendUnavailable(_)) = r2 { + return; } // Verify the container is still running (not torn down between calls). diff --git a/tests/ssti_corpus.rs b/tests/ssti_corpus.rs index ea3da1a6..8ce8d770 100644 --- a/tests/ssti_corpus.rs +++ b/tests/ssti_corpus.rs @@ -248,10 +248,18 @@ fn framework_adapters_detect_ssti_sink() { let mut parser = tree_sitter::Parser::new(); parser.set_language(&ts_lang).unwrap(); let tree = parser.parse(&bytes, None).unwrap(); + // Each vuln fixture's `run` function takes `body` as its + // single param and pipes it into the SSTI engine. Seed the + // summary with `body` at index 0 and mark that index as a + // tainted sink participant so the strengthened AST gate + // (added with the comment-substring FP fix) fires. let mut summary = FuncSummary { name: "run".into(), file_path: fixture.to_owned(), lang: slug(lang).into(), + param_count: 1, + param_names: vec!["body".into()], + tainted_sink_params: vec![0], ..Default::default() }; // Seed the canonical sink callee per language so the diff --git a/tests/xpath_corpus.rs b/tests/xpath_corpus.rs index febd98ac..d2604766 100644 --- a/tests/xpath_corpus.rs +++ b/tests/xpath_corpus.rs @@ -329,10 +329,18 @@ fn framework_adapters_detect_xpath_sink() { let mut parser = tree_sitter::Parser::new(); parser.set_language(&ts_lang).unwrap(); let tree = parser.parse(&bytes, None).unwrap(); + // Each vuln fixture's `run` function takes `name` as its + // single param and concats it into the XPath expression. + // The strengthened adapters (one-hop local-assignment chase + // plus tainted-param participation) need the summary to + // mark index 0 as a tainted sink participant. let mut summary = FuncSummary { name: "run".into(), file_path: fixture.to_owned(), lang: slug(lang).into(), + param_count: 1, + param_names: vec!["name".into()], + tainted_sink_params: vec![0], ..Default::default() }; summary