mirror of
https://github.com/elicpeter/nyx.git
synced 2026-06-18 20:15:14 +02:00
Critical bug fixes and recall improvements (#68)
This commit is contained in:
parent
7d0e7320e2
commit
55247b7fcd
352 changed files with 60069 additions and 900 deletions
|
|
@ -521,10 +521,21 @@ pub(super) fn build_switch<'a>(
|
|||
) -> Vec<NodeIndex> {
|
||||
// Locate the case container. Most grammars expose it as field "body"
|
||||
// (JS/TS, Java, C, C++); Go puts cases as direct children of the switch.
|
||||
//
|
||||
// Per-language gotcha: Go's `expression_case` / `default_case` /
|
||||
// `type_case` / `communication_case` map to `Kind::Block` (so the case
|
||||
// body is iterated by the Block handler), so a naive "first Block
|
||||
// child" fallback latches onto the FIRST case as the container, then
|
||||
// walks the case's interior looking for case-like children, finds none,
|
||||
// and falls through to the empty-cases early return (CFG dead-end:
|
||||
// dispatch If has no False edge, every post-switch statement becomes
|
||||
// unreachable). Skip case-kind nodes when picking the container so
|
||||
// Go's flat "cases-as-direct-children" shape uses `ast` itself.
|
||||
let body = ast.child_by_field_name("body").or_else(|| {
|
||||
let mut c = ast.walk();
|
||||
ast.children(&mut c)
|
||||
.find(|n| matches!(lookup(lang, n.kind()), Kind::Block))
|
||||
ast.children(&mut c).find(|n| {
|
||||
matches!(lookup(lang, n.kind()), Kind::Block) && !is_switch_case_kind(n.kind())
|
||||
})
|
||||
});
|
||||
let container = body.unwrap_or(ast);
|
||||
|
||||
|
|
|
|||
|
|
@ -1202,6 +1202,8 @@ fn clone_preserves_all_sub_structs() {
|
|||
defines: Some("r".into()),
|
||||
uses: vec!["a".into(), "b".into()],
|
||||
extra_defines: vec!["c".into()],
|
||||
array_pattern_indices: smallvec::SmallVec::new(),
|
||||
rhs_array_elements: smallvec::SmallVec::new(),
|
||||
},
|
||||
ast: AstMeta {
|
||||
span: (10, 100),
|
||||
|
|
@ -1501,6 +1503,105 @@ fn rust_println_macro_named_arg_lifted() {
|
|||
assert!(found, "no println! macro_invocation node found");
|
||||
}
|
||||
|
||||
/// `format!(URL_FMT, path)` where `URL_FMT` resolves to a top-level
|
||||
/// `const &str` literal must seed a `string_prefix` on the let-binding
|
||||
/// node so `is_string_safe_for_ssrf` can lock the host the same way
|
||||
/// `format!("https://api/{}", path)` does. The bridge fires only when
|
||||
/// the first non-string token in the macro is an identifier whose
|
||||
/// matching `const_item` has a string-literal value.
|
||||
#[test]
|
||||
fn rust_format_macro_const_first_arg_seeds_string_prefix() {
|
||||
let src = b"const URL_FMT: &str = \"https://api.example.com/users/{}\";\n\
|
||||
fn f(path: String) { let u = format!(URL_FMT, path); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut prefix: Option<String> = None;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u")
|
||||
&& let Some(p) = info.string_prefix.as_deref()
|
||||
{
|
||||
prefix = Some(p.to_string());
|
||||
}
|
||||
}
|
||||
assert_eq!(
|
||||
prefix.as_deref(),
|
||||
Some("https://api.example.com/users/"),
|
||||
"expected URL_FMT const to bridge into the format!() string_prefix",
|
||||
);
|
||||
}
|
||||
|
||||
/// Counter-test: when the named const has no string-literal initializer
|
||||
/// (e.g. `const X: usize = 4;`), the bridge must not fabricate a
|
||||
/// prefix from a non-string value.
|
||||
#[test]
|
||||
fn rust_format_macro_const_first_arg_non_string_skipped() {
|
||||
let src = b"const N: usize = 4;\n\
|
||||
fn f(path: String) { let u = format!(N, path); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u") {
|
||||
assert!(
|
||||
info.string_prefix.is_none(),
|
||||
"non-string const must not seed a prefix; got {:?}",
|
||||
info.string_prefix
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `static NAME: &str = "...";` declarations participate alongside
|
||||
/// `const_item`: both shapes carry a `name` field and a string-literal
|
||||
/// `value` so the bridge resolves either form identically.
|
||||
#[test]
|
||||
fn rust_format_macro_static_first_arg_seeds_string_prefix() {
|
||||
let src = b"static API_BASE: &str = \"https://api.example.com/users/{}\";\n\
|
||||
fn f(path: String) { let u = format!(API_BASE, path); }";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
let mut prefix: Option<String> = None;
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u")
|
||||
&& let Some(p) = info.string_prefix.as_deref()
|
||||
{
|
||||
prefix = Some(p.to_string());
|
||||
}
|
||||
}
|
||||
assert_eq!(
|
||||
prefix.as_deref(),
|
||||
Some("https://api.example.com/users/"),
|
||||
"expected static API_BASE to bridge into the format!() string_prefix",
|
||||
);
|
||||
}
|
||||
|
||||
/// A const declared inside a function body must not bridge: only
|
||||
/// file-level `const_item` declarations participate to keep the
|
||||
/// lookup deterministic. (The macro's first arg can shadow a
|
||||
/// file-level const with an inner-fn const, but inner consts are
|
||||
/// off-scope for the AST-time prefix bridge.)
|
||||
#[test]
|
||||
fn rust_format_macro_inner_const_not_bridged() {
|
||||
let src = b"fn f(path: String) {\n\
|
||||
const URL_FMT: &str = \"https://api/{}\";\n\
|
||||
let u = format!(URL_FMT, path);\n\
|
||||
}";
|
||||
let ts_lang = Language::from(tree_sitter_rust::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "rust", ts_lang);
|
||||
for n in cfg.node_indices() {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("u") {
|
||||
assert!(
|
||||
info.string_prefix.is_none(),
|
||||
"inner-fn const must not bridge; got {:?}",
|
||||
info.string_prefix
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_no_import_bindings() {
|
||||
let src = b"package main\nimport alias \"fmt\"\n";
|
||||
|
|
@ -2354,6 +2455,29 @@ fn py_subscript_write_lowers_to_index_set_call() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_selector_expression_call_sets_receiver() {
|
||||
// Regression for Phase 15 deferred GORM tuple-return case.
|
||||
// Go's `userDb.Raw(sql)` parses as `call_expression` whose `function`
|
||||
// field is a `selector_expression` (operand=userDb, field=Raw).
|
||||
// The CFG-side `Kind::CallFn` arm must extract `userDb` as the
|
||||
// receiver so type-qualified resolution can rewrite `userDb.Raw` →
|
||||
// `GormDb.Raw` once `userDb`'s SSA value is tagged via
|
||||
// `constructor_type(Lang::Go, "gorm.Open")`. Pre-fix the arm only
|
||||
// recognised JS/TS `member_expression`, Python `attribute`, and Rust
|
||||
// `field_expression`; Go fell through to receiver=None.
|
||||
let src = br#"package main
|
||||
func f(userDb int) {
|
||||
userDb.Raw("SELECT 1")
|
||||
}
|
||||
"#;
|
||||
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "go", ts_lang);
|
||||
let node =
|
||||
find_node_with_callee(&cfg, "userDb.Raw").expect("go: userDb.Raw node should be present");
|
||||
assert_eq!(node.call.receiver.as_deref(), Some("userDb"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_index_expr_read_lowers_to_index_get_call() {
|
||||
with_pointer_on(|| {
|
||||
|
|
@ -3217,3 +3341,620 @@ fn js_ternary_branch_subscript_source_classified() {
|
|||
"expected ternary subscript branch defining `x` to carry a Source label"
|
||||
);
|
||||
}
|
||||
|
||||
/// Regression: Go's `switch` with no `default` arm and an only-case body
|
||||
/// that returns must keep post-switch statements reachable from entry.
|
||||
///
|
||||
/// `expression_case` / `default_case` / `type_case` / `communication_case`
|
||||
/// all map to `Kind::Block` so the case body is iterated by the Block
|
||||
/// handler, but `build_switch`'s container fallback ("first Block child")
|
||||
/// would latch onto the FIRST case as the container. Walking the case's
|
||||
/// interior for case-like children finds nothing, the empty-cases early
|
||||
/// return fires, and the dispatch If has no False edge: every post-switch
|
||||
/// statement becomes unreachable, lighting up `cfg-unreachable-sanitizer`
|
||||
/// on real code (gin's `binding/form_mapping.go::setTimeField`, line 469
|
||||
/// `if isUTC, _ := strconv.ParseBool(...); isUTC` after a no-default
|
||||
/// `switch tf := strings.ToLower(timeFormat); tf` on the unix epoch
|
||||
/// formats).
|
||||
#[test]
|
||||
fn go_switch_no_default_keeps_post_switch_reachable() {
|
||||
use petgraph::visit::Bfs;
|
||||
use std::collections::HashSet;
|
||||
let src = br#"package p
|
||||
func f(x string) bool {
|
||||
switch tf := x; tf {
|
||||
case "unix":
|
||||
return false
|
||||
}
|
||||
after()
|
||||
return true
|
||||
}
|
||||
"#;
|
||||
let ts_lang = Language::from(tree_sitter_go::LANGUAGE);
|
||||
let (cfg, entry) = parse_and_build(src, "go", ts_lang);
|
||||
|
||||
let mut reachable: HashSet<NodeIndex> = HashSet::new();
|
||||
let mut bfs = Bfs::new(&cfg, entry);
|
||||
while let Some(n) = bfs.next(&cfg) {
|
||||
reachable.insert(n);
|
||||
}
|
||||
|
||||
let after = cfg
|
||||
.node_indices()
|
||||
.find(|&n| cfg[n].call.callee.as_deref() == Some("after"))
|
||||
.expect("expected after() Call node");
|
||||
assert!(
|
||||
reachable.contains(&after),
|
||||
"post-switch `after()` must be reachable from entry; got reachable={:?}",
|
||||
reachable
|
||||
);
|
||||
}
|
||||
|
||||
/// `qs = User.objects` at module/function level lowers as a Python
|
||||
/// `expression_statement` wrapping an `assignment`. The CFG-level
|
||||
/// `member_field` detector must unwrap the wrapper and pick up
|
||||
/// `Some("objects")` from the inner RHS so the type-fact pass can tag
|
||||
/// the bound value as `DjangoQuerySet`.
|
||||
#[test]
|
||||
fn python_member_field_assignment_detected_for_bare_objects() {
|
||||
let src = b"def view(req):\n qs = User.objects\n";
|
||||
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
|
||||
let detected: Vec<Option<String>> = cfg
|
||||
.node_indices()
|
||||
.filter_map(|n| {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("qs") {
|
||||
Some(info.member_field.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
detected.iter().any(|m| m.as_deref() == Some("objects")),
|
||||
"expected at least one `qs = ...` CFG node with member_field=Some(\"objects\"); got {:?}",
|
||||
detected
|
||||
);
|
||||
}
|
||||
|
||||
/// Negative shape: `qs = User.something_else` must NOT set
|
||||
/// `member_field == Some("objects")`. Guards against the unwrap
|
||||
/// accidentally picking up the wrong field name.
|
||||
#[test]
|
||||
fn python_member_field_assignment_non_objects_does_not_match() {
|
||||
let src = b"def view(req):\n qs = User.profile\n";
|
||||
let ts_lang = Language::from(tree_sitter_python::LANGUAGE);
|
||||
let (cfg, _entry) = parse_and_build(src, "python", ts_lang);
|
||||
let detected: Vec<Option<String>> = cfg
|
||||
.node_indices()
|
||||
.filter_map(|n| {
|
||||
let info = &cfg[n];
|
||||
if info.taint.defines.as_deref() == Some("qs") {
|
||||
Some(info.member_field.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
detected.iter().any(|m| m.as_deref() == Some("profile")),
|
||||
"expected `qs = User.profile` to detect member_field=Some(\"profile\"); got {:?}",
|
||||
detected
|
||||
);
|
||||
assert!(
|
||||
detected.iter().all(|m| m.as_deref() != Some("objects")),
|
||||
"must not falsely tag non-`objects` field; got {:?}",
|
||||
detected
|
||||
);
|
||||
}
|
||||
|
||||
/// Phase 15 chained-shape closure: a Java local of the form
|
||||
/// `Session sess = sf.openSession();` registers `(fn_start, "sess")`
|
||||
/// → `TypeKind::HibernateSession` in the per-file local-receiver-types
|
||||
/// map, so `find_classifiable_inner_call` can rewrite the chained
|
||||
/// inner `sess.createNativeQuery(...)` to
|
||||
/// `HibernateSession.createNativeQuery` when the legacy literal-
|
||||
/// receiver classify misses.
|
||||
#[test]
|
||||
fn java_hibernate_session_open_registers_local_receiver_type() {
|
||||
let src = br#"
|
||||
class Foo {
|
||||
void bar(SessionFactory sf, String sql) {
|
||||
Session sess = sf.openSession();
|
||||
sess.createNativeQuery(sql).getResultList();
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let ts_lang = Language::from(tree_sitter_java::LANGUAGE);
|
||||
let _ = parse_to_file_cfg(src, "java", ts_lang);
|
||||
// The TLS map is cleared at the end of `build_cfg`, but the
|
||||
// public lookup helper consults it during construction. Re-run
|
||||
// population manually for the assertion.
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_java::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
super::populate_local_receiver_types(&tree, "java", src);
|
||||
// Walk to find the function body's start_byte.
|
||||
fn find_method_start(node: tree_sitter::Node<'_>) -> Option<usize> {
|
||||
if node.kind() == "method_declaration" {
|
||||
return Some(node.start_byte());
|
||||
}
|
||||
let mut c = node.walk();
|
||||
for child in node.children(&mut c) {
|
||||
if let Some(s) = find_method_start(child) {
|
||||
return Some(s);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
let fn_start = find_method_start(tree.root_node()).expect("method_declaration in fixture");
|
||||
let got = super::lookup_local_receiver_type(fn_start, "sess");
|
||||
assert_eq!(
|
||||
got,
|
||||
Some(crate::ssa::type_facts::TypeKind::HibernateSession),
|
||||
"local `Session sess = sf.openSession()` should bind to HibernateSession"
|
||||
);
|
||||
// Cleanup so the TLS state doesn't leak into other tests.
|
||||
super::LOCAL_RECEIVER_TYPES.with(|cell| cell.borrow_mut().clear());
|
||||
}
|
||||
|
||||
/// Same Java per-file map: a local whose RHS is unrelated (no
|
||||
/// `constructor_type` match) must NOT register. Confirms the
|
||||
/// recogniser is anchored on `constructor_type`'s callee classifier
|
||||
/// rather than the declared receiver type, so a generic
|
||||
/// `Session foo = computeFoo()` doesn't bleed an unrelated method
|
||||
/// into the type-qualified pool.
|
||||
#[test]
|
||||
fn java_unrecognised_rhs_does_not_register_local_receiver_type() {
|
||||
let src = br#"
|
||||
class Foo {
|
||||
void bar() {
|
||||
Session sess = computeSomethingUnrelated();
|
||||
sess.doSomething();
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_java::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src.as_slice(), None).unwrap();
|
||||
super::populate_local_receiver_types(&tree, "java", src);
|
||||
fn find_method_start(node: tree_sitter::Node<'_>) -> Option<usize> {
|
||||
if node.kind() == "method_declaration" {
|
||||
return Some(node.start_byte());
|
||||
}
|
||||
let mut c = node.walk();
|
||||
for child in node.children(&mut c) {
|
||||
if let Some(s) = find_method_start(child) {
|
||||
return Some(s);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
let fn_start = find_method_start(tree.root_node()).expect("method_declaration in fixture");
|
||||
let got = super::lookup_local_receiver_type(fn_start, "sess");
|
||||
assert_eq!(
|
||||
got, None,
|
||||
"unrecognised RHS `computeSomethingUnrelated()` must not register a receiver-type"
|
||||
);
|
||||
super::LOCAL_RECEIVER_TYPES.with(|cell| cell.borrow_mut().clear());
|
||||
}
|
||||
|
||||
/// `collect_array_pattern_bindings_indexed` walks JS/TS `array_pattern`
|
||||
/// children in source order and records `(name, position)` for each
|
||||
/// simple-identifier binding. Skip slots (commas with no binding
|
||||
/// between) advance the position counter without emitting a binding,
|
||||
/// so `const [, b]` produces `[("b", 1)]` and `const [a, ,]` produces
|
||||
/// `[("a", 0)]`. Complex sub-patterns (`assignment_pattern`,
|
||||
/// `rest_pattern`, nested `array_pattern`) cause the helper to return
|
||||
/// an empty vec so the lowering rewrite falls back to scalar union.
|
||||
#[test]
|
||||
fn array_pattern_indexed_bindings_recognise_skip_slots() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_array_pattern<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
|
||||
if n.kind() == "array_pattern" {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_array_pattern(child) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_javascript::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first(src);
|
||||
let pat = first_array_pattern(tree.root_node()).expect("array_pattern in fixture");
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
assert_eq!(
|
||||
run_case(b"const [a, b] = x;"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(run_case(b"const [, b] = x;"), vec![("b".into(), 1)]);
|
||||
assert_eq!(run_case(b"const [a, ,] = x;"), vec![("a".into(), 0)]);
|
||||
assert_eq!(
|
||||
run_case(b"const [a, , c] = x;"),
|
||||
vec![("a".into(), 0), ("c".into(), 2)],
|
||||
);
|
||||
// Rest patterns bail to empty so callers fall back to scalar union.
|
||||
assert!(run_case(b"const [a, ...rest] = x;").is_empty());
|
||||
// Default value patterns also bail.
|
||||
assert!(run_case(b"const [a = 1, b] = x;").is_empty());
|
||||
// Nested array patterns bail.
|
||||
assert!(run_case(b"const [[a, b], c] = x;").is_empty());
|
||||
}
|
||||
|
||||
/// Rust `tuple_pattern` shares the helper. The `_` wildcard
|
||||
/// (`_pattern` node) advances the position counter without binding,
|
||||
/// mirroring JS skip-slot semantics. Other complex sub-patterns
|
||||
/// (tuple-struct, parenthesized) bail to empty.
|
||||
#[test]
|
||||
fn tuple_pattern_indexed_bindings_recognise_rust_wildcards() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_tuple_pattern<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
|
||||
if n.kind() == "tuple_pattern" {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_tuple_pattern(child) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first_rust(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_rust::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first_rust(src);
|
||||
let pat = first_tuple_pattern(tree.root_node()).expect("tuple_pattern in fixture");
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (a, b) = (1, 2); }"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (_, b) = (1, 2); }"),
|
||||
vec![("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (a, _) = (1, 2); }"),
|
||||
vec![("a".into(), 0)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"fn f() { let (a, _, c) = (1, 2, 3); }"),
|
||||
vec![("a".into(), 0), ("c".into(), 2)],
|
||||
);
|
||||
}
|
||||
|
||||
/// Python `pattern_list` (bare `a, b = ...`) and `tuple_pattern`
|
||||
/// (parenthesised `(a, b) = ...`) share the helper. Python's `_` is
|
||||
/// a normal identifier binding (not a wildcard), so every identifier
|
||||
/// child emits a `(name, position)` entry — `_` lands at its source
|
||||
/// position alongside any other names. `list_splat_pattern`
|
||||
/// (`a, *rest`) bails to empty so callers fall back to scalar union.
|
||||
#[test]
|
||||
fn pattern_list_indexed_bindings_recognise_python_destructure() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_pattern<'t>(
|
||||
n: tree_sitter::Node<'t>,
|
||||
kinds: &[&str],
|
||||
) -> Option<tree_sitter::Node<'t>> {
|
||||
if kinds.contains(&n.kind()) {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_pattern(child, kinds) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first_python(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_python::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8], kinds: &[&str]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first_python(src);
|
||||
let pat = first_pattern(tree.root_node(), kinds)
|
||||
.unwrap_or_else(|| panic!("no {kinds:?} in fixture"));
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
// Bare comma-list `a, b = ...` is `pattern_list`.
|
||||
assert_eq!(
|
||||
run_case(b"a, b = (1, 2)\n", &["pattern_list"]),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
// Three-binding bare comma list.
|
||||
assert_eq!(
|
||||
run_case(b"a, b, c = (1, 2, 3)\n", &["pattern_list"]),
|
||||
vec![("a".into(), 0), ("b".into(), 1), ("c".into(), 2)],
|
||||
);
|
||||
// Underscore is a regular identifier binding in Python.
|
||||
assert_eq!(
|
||||
run_case(b"_, b = (1, 2)\n", &["pattern_list"]),
|
||||
vec![("_".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"a, _ = (1, 2)\n", &["pattern_list"]),
|
||||
vec![("a".into(), 0), ("_".into(), 1)],
|
||||
);
|
||||
// Parenthesised destructure surfaces as `tuple_pattern`.
|
||||
assert_eq!(
|
||||
run_case(b"(a, b) = (1, 2)\n", &["tuple_pattern"]),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
// Splat / rest bindings bail because positional mapping breaks.
|
||||
assert!(run_case(b"a, *rest = (1, 2, 3)\n", &["pattern_list"]).is_empty());
|
||||
// Nested destructure bails — recogniser doesn't recurse into
|
||||
// sub-patterns to preserve flat-binding-only semantics.
|
||||
assert!(run_case(b"(a, b), c = ((1, 2), 3)\n", &["pattern_list"]).is_empty());
|
||||
}
|
||||
|
||||
/// Ruby `left_assignment_list` is the LHS node tree-sitter-ruby produces
|
||||
/// for `a, b = ...`. The helper walks comma-separated identifier
|
||||
/// children in source order, emitting `(name, position)` for each.
|
||||
/// Ruby `_` is a normal identifier (matches Python convention).
|
||||
/// `rest_assignment` (`*rest`) and `destructured_left_assignment`
|
||||
/// (parenthesised nested destructure) hit the bail branch so callers
|
||||
/// fall back to scalar union for those advanced shapes.
|
||||
#[test]
|
||||
fn left_assignment_list_indexed_bindings_recognise_ruby_destructure() {
|
||||
use super::helpers::collect_array_pattern_bindings_indexed;
|
||||
fn first_left_assignment_list<'t>(n: tree_sitter::Node<'t>) -> Option<tree_sitter::Node<'t>> {
|
||||
if n.kind() == "left_assignment_list" {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = first_left_assignment_list(child) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
fn parse_first_ruby(src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&Language::from(tree_sitter_ruby::LANGUAGE))
|
||||
.unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
fn run_case(src: &[u8]) -> Vec<(String, usize)> {
|
||||
let (tree, bytes) = parse_first_ruby(src);
|
||||
let pat =
|
||||
first_left_assignment_list(tree.root_node()).expect("left_assignment_list in fixture");
|
||||
collect_array_pattern_bindings_indexed(pat, &bytes)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
assert_eq!(
|
||||
run_case(b"a, b = [x, y]\n"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"a, b, c = [x, y, z]\n"),
|
||||
vec![("a".into(), 0), ("b".into(), 1), ("c".into(), 2)],
|
||||
);
|
||||
// Underscore is a regular identifier binding in Ruby (idiomatic
|
||||
// "unused" marker, but still resolvable in scope).
|
||||
assert_eq!(
|
||||
run_case(b"_, b = [x, y]\n"),
|
||||
vec![("_".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
assert_eq!(
|
||||
run_case(b"a, _ = [x, y]\n"),
|
||||
vec![("a".into(), 0), ("_".into(), 1)],
|
||||
);
|
||||
// Call return value, helper walks LHS regardless of RHS shape.
|
||||
assert_eq!(
|
||||
run_case(b"a, b = func()\n"),
|
||||
vec![("a".into(), 0), ("b".into(), 1)],
|
||||
);
|
||||
// Splat tail bails because rest_assignment is a complex sub-pattern.
|
||||
assert!(run_case(b"a, *rest = [x, y, z]\n").is_empty());
|
||||
// Parenthesised nested destructure bails because
|
||||
// destructured_left_assignment isn't in the simple-identifier
|
||||
// whitelist.
|
||||
assert!(run_case(b"(a, b) = [x, y]\n").is_empty());
|
||||
}
|
||||
|
||||
/// Helper for `src/ssa/lower.rs` bare-array destructure rewrite.
|
||||
/// Walks the RHS of a destructure assignment and emits one slot per
|
||||
/// source-order element. Each slot is `Ident(name)`, `Literal`, or
|
||||
/// `Complex(inner_uses)`. Bails (empty) on shapes that shift index
|
||||
/// alignment (spread / list splat).
|
||||
#[test]
|
||||
fn rhs_array_literal_elements_recognise_per_language_shapes() {
|
||||
use super::RhsArraySlot;
|
||||
use super::helpers::collect_rhs_array_literal_elements;
|
||||
|
||||
fn parse(lang_label: &str, src: &[u8]) -> (tree_sitter::Tree, Vec<u8>) {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
let lang = match lang_label {
|
||||
"javascript" => Language::from(tree_sitter_javascript::LANGUAGE),
|
||||
"typescript" => Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
|
||||
"python" => Language::from(tree_sitter_python::LANGUAGE),
|
||||
"ruby" => Language::from(tree_sitter_ruby::LANGUAGE),
|
||||
"rust" => Language::from(tree_sitter_rust::LANGUAGE),
|
||||
other => panic!("unsupported lang: {}", other),
|
||||
};
|
||||
parser.set_language(&lang).unwrap();
|
||||
let tree = parser.parse(src, None).unwrap();
|
||||
(tree, src.to_vec())
|
||||
}
|
||||
|
||||
fn find_first<'t>(n: tree_sitter::Node<'t>, kinds: &[&str]) -> Option<tree_sitter::Node<'t>> {
|
||||
if kinds.iter().any(|k| *k == n.kind()) {
|
||||
return Some(n);
|
||||
}
|
||||
let mut c = n.walk();
|
||||
for child in n.children(&mut c) {
|
||||
if let Some(found) = find_first(child, kinds) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn run(lang: &str, src: &[u8], rhs_kinds: &[&str]) -> Vec<RhsArraySlot> {
|
||||
let (tree, bytes) = parse(lang, src);
|
||||
let rhs = find_first(tree.root_node(), rhs_kinds).expect("rhs in fixture");
|
||||
collect_rhs_array_literal_elements(rhs, lang, &bytes, None)
|
||||
.into_iter()
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn ident(name: &str) -> RhsArraySlot {
|
||||
RhsArraySlot::Ident(name.to_string())
|
||||
}
|
||||
fn complex(uses: &[&str]) -> RhsArraySlot {
|
||||
RhsArraySlot::Complex {
|
||||
uses: uses.iter().map(|s| s.to_string()).collect(),
|
||||
source_cap: crate::labels::Cap::empty(),
|
||||
}
|
||||
}
|
||||
fn complex_source(uses: &[&str]) -> RhsArraySlot {
|
||||
RhsArraySlot::Complex {
|
||||
uses: uses.iter().map(|s| s.to_string()).collect(),
|
||||
source_cap: crate::labels::Cap::all(),
|
||||
}
|
||||
}
|
||||
|
||||
// JS/TS `array` literal: two bare idents.
|
||||
assert_eq!(
|
||||
run("javascript", b"const _ = [safe, tainted];\n", &["array"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// JS/TS `array` mixed ident + string literal.
|
||||
assert_eq!(
|
||||
run("javascript", b"const _ = [tainted, \"ok\"];\n", &["array"]),
|
||||
vec![ident("tainted"), RhsArraySlot::Literal],
|
||||
);
|
||||
// JS/TS now classifies a call as `Complex` carrying inner idents
|
||||
// rather than bailing. `collect_idents_with_paths` lifts both paths
|
||||
// and bare idents, so a member access surfaces as the dotted path
|
||||
// (e.g. `req.query.x`) followed by its component idents.
|
||||
assert_eq!(
|
||||
run("javascript", b"const _ = [fn(x), 'lit'];\n", &["array"]),
|
||||
vec![complex(&["fn", "x"]), RhsArraySlot::Literal],
|
||||
);
|
||||
// JS/TS member access becomes Complex; dotted path + component idents.
|
||||
// Per-slot Source classification fires when the slot's subtree carries
|
||||
// a member-expression that strip-and-retry-classifies as Source
|
||||
// (`req.query.x` → strip `.x` → `req.query` matches the JS Source rule).
|
||||
assert_eq!(
|
||||
run(
|
||||
"javascript",
|
||||
b"const _ = [req.query.x, 'lit'];\n",
|
||||
&["array"],
|
||||
),
|
||||
vec![
|
||||
complex_source(&["req.query.x", "req", "query", "x"]),
|
||||
RhsArraySlot::Literal,
|
||||
],
|
||||
);
|
||||
// Sibling-precision: a Source-classified Complex slot ALONGSIDE a
|
||||
// Complex slot whose subtree does NOT classify as Source. Pre-session
|
||||
// 0047 every Complex slot was conservatively re-emitted as Source by
|
||||
// the outer-node fallback in `src/ssa/lower.rs`; with per-slot
|
||||
// classification the safe sibling stays empty so the SSA lowering can
|
||||
// emit `Assign(safe)` instead.
|
||||
assert_eq!(
|
||||
run(
|
||||
"javascript",
|
||||
b"const _ = [process.env.X, helper(local)];\n",
|
||||
&["array"],
|
||||
),
|
||||
vec![
|
||||
complex_source(&["process.env.X", "process", "env", "X"]),
|
||||
complex(&["helper", "local"]),
|
||||
],
|
||||
);
|
||||
// JS/TS spread bails entirely (index alignment shifts).
|
||||
assert!(run("javascript", b"const _ = [...arr, b];\n", &["array"]).is_empty());
|
||||
// JS/TS binary expression becomes Complex with the inner ident.
|
||||
assert_eq!(
|
||||
run(
|
||||
"javascript",
|
||||
b"const _ = ['log-' + x, 'lit'];\n",
|
||||
&["array"],
|
||||
),
|
||||
vec![complex(&["x"]), RhsArraySlot::Literal],
|
||||
);
|
||||
|
||||
// Python `list` shape.
|
||||
assert_eq!(
|
||||
run("python", b"a = [safe, tainted]\n", &["list"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// Python `expression_list` (bare commas RHS in `a, b = x, y`).
|
||||
assert_eq!(
|
||||
run("python", b"a, b = safe, tainted\n", &["expression_list"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// Python `tuple` (parenthesised).
|
||||
assert_eq!(
|
||||
run("python", b"x = (safe, 42)\n", &["tuple"]),
|
||||
vec![ident("safe"), RhsArraySlot::Literal],
|
||||
);
|
||||
// Python list-splat bails.
|
||||
assert!(run("python", b"x = [*a, b]\n", &["list"]).is_empty());
|
||||
|
||||
// Ruby `array`.
|
||||
assert_eq!(
|
||||
run("ruby", b"a, b = [safe, tainted]\n", &["array"]),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
// Ruby `array` with literal + ident.
|
||||
assert_eq!(
|
||||
run("ruby", b"a, b = [tainted, \"safe\"]\n", &["array"]),
|
||||
vec![ident("tainted"), RhsArraySlot::Literal],
|
||||
);
|
||||
|
||||
// Rust `tuple_expression`.
|
||||
assert_eq!(
|
||||
run(
|
||||
"rust",
|
||||
b"fn f(safe: &str, tainted: &str) { let _ = (safe, tainted); }\n",
|
||||
&["tuple_expression"]
|
||||
),
|
||||
vec![ident("safe"), ident("tainted")],
|
||||
);
|
||||
|
||||
// Non-array-shape node returns empty (defensive guard).
|
||||
assert!(run("javascript", b"const x = tainted;\n", &["identifier"]).is_empty());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ use super::helpers::first_member_label;
|
|||
use super::{
|
||||
AstMeta, Cfg, EdgeKind, MAX_COND_VARS, MAX_CONDITION_TEXT_LEN, NodeInfo, StmtKind,
|
||||
collect_idents, connect_all, detect_eq_with_const, detect_negation, has_call_descendant,
|
||||
member_expr_text, push_node, text_of,
|
||||
member_expr_text, push_node, text_of, try_lower_jsx_dangerous_html,
|
||||
};
|
||||
use crate::labels::{DataLabel, LangAnalysisRules, classify};
|
||||
use crate::utils::snippet::truncate_at_char_boundary;
|
||||
|
|
@ -378,7 +378,24 @@ pub(super) fn lower_ternary_branch<'a>(
|
|||
}
|
||||
|
||||
connect_all(g, preds, node, pred_edge);
|
||||
vec![node]
|
||||
|
||||
// React JSX `dangerouslySetInnerHTML={{__html: x}}` synthesis when the
|
||||
// branch expression is itself a JSX element (or contains one as a
|
||||
// descendant). Without this, `cond ? <div dangerouslySetInnerHTML=...
|
||||
// /> : null` and similar ternary-RHS shapes never reach the
|
||||
// `Kind::Return` / `Kind::Assignment` arms that own the synthesis hook,
|
||||
// because `build_ternary_diamond` lowers each branch directly.
|
||||
let post_jsx = try_lower_jsx_dangerous_html(
|
||||
branch_ast,
|
||||
&[node],
|
||||
g,
|
||||
lang,
|
||||
code,
|
||||
enclosing_func,
|
||||
call_ordinal,
|
||||
analysis_rules,
|
||||
);
|
||||
post_jsx
|
||||
}
|
||||
|
||||
/// Extract `(lhs_ast, ternary_ast)` when `outer_ast` is an expression-statement
|
||||
|
|
|
|||
|
|
@ -554,3 +554,469 @@ fn collect_ruby_symbol_list(node: Node<'_>, code: &[u8], out: &mut Vec<String>)
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract route-path capture variable names from framework routing decorators
|
||||
/// on a function AST node.
|
||||
///
|
||||
/// Supported languages:
|
||||
/// * Python: walks Flask-style `@app.route("/users/<name>")`,
|
||||
/// blueprint-prefixed `@bp.get("/u/<int:id>")`, and verb-shaped
|
||||
/// `@router.post("/<path:slug>")` decorators. Returns inner names from
|
||||
/// `<name>` / `<conv:name>` brace-segments.
|
||||
/// * Ruby: walks Sinatra `get "/u/:name" do |name| ... end`. The
|
||||
/// `func_node` is the `do_block`; its parent `call` carries the verb
|
||||
/// in the `method` field and the path pattern in the first positional
|
||||
/// string argument. Returns inner names from `:name` colon-segments.
|
||||
///
|
||||
/// Functions without a recognised routing pattern return an empty `Vec`.
|
||||
/// Strict additive: downstream consumers gate the result via
|
||||
/// `param.contains(name)` so empty captures preserve today's behaviour.
|
||||
pub(super) fn extract_route_path_captures<'a>(
|
||||
func_node: Node<'a>,
|
||||
lang: &str,
|
||||
code: &'a [u8],
|
||||
) -> Vec<String> {
|
||||
let mut out: Vec<String> = Vec::new();
|
||||
match lang {
|
||||
"python" => extract_python_route_captures(func_node, code, &mut out),
|
||||
"ruby" => extract_ruby_route_captures(func_node, code, &mut out),
|
||||
_ => {}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn extract_python_route_captures<'a>(func_node: Node<'a>, code: &'a [u8], out: &mut Vec<String>) {
|
||||
let Some(parent) = func_node.parent() else {
|
||||
return;
|
||||
};
|
||||
if parent.kind() != "decorated_definition" {
|
||||
return;
|
||||
}
|
||||
let mut w = parent.walk();
|
||||
for ch in parent.children(&mut w) {
|
||||
if ch.kind() != "decorator" {
|
||||
continue;
|
||||
}
|
||||
let mut dw = ch.walk();
|
||||
let Some(expr) = ch.children(&mut dw).find(|c| c.kind() != "@") else {
|
||||
continue;
|
||||
};
|
||||
if expr.kind() != "call" {
|
||||
continue;
|
||||
}
|
||||
let Some(target) = expr.child_by_field_name("function") else {
|
||||
continue;
|
||||
};
|
||||
if target.kind() != "attribute" {
|
||||
continue;
|
||||
}
|
||||
let Some(attr) = target.child_by_field_name("attribute") else {
|
||||
continue;
|
||||
};
|
||||
let Some(attr_text) = text_of(attr, code) else {
|
||||
continue;
|
||||
};
|
||||
let attr_lower = attr_text.to_ascii_lowercase();
|
||||
let is_route_verb = matches!(
|
||||
attr_lower.as_str(),
|
||||
"route" | "get" | "post" | "put" | "patch" | "delete" | "head" | "options"
|
||||
);
|
||||
if !is_route_verb {
|
||||
continue;
|
||||
}
|
||||
let Some(args) = expr.child_by_field_name("arguments") else {
|
||||
continue;
|
||||
};
|
||||
let Some(pattern) = first_positional_string_arg(args, code) else {
|
||||
continue;
|
||||
};
|
||||
collect_flask_path_captures(&pattern, out);
|
||||
collect_fastapi_path_captures(&pattern, out);
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk up from a Ruby `do_block` / `block` to the enclosing `call`.
|
||||
/// If the call's method is a Sinatra-style HTTP verb and its first
|
||||
/// positional argument is a static string literal, parse Sinatra
|
||||
/// `:name` path captures into `out`.
|
||||
fn extract_ruby_route_captures<'a>(func_node: Node<'a>, code: &'a [u8], out: &mut Vec<String>) {
|
||||
let Some(parent) = func_node.parent() else {
|
||||
return;
|
||||
};
|
||||
if parent.kind() != "call" {
|
||||
return;
|
||||
}
|
||||
let Some(method_node) = parent.child_by_field_name("method") else {
|
||||
return;
|
||||
};
|
||||
let Some(verb) = text_of(method_node, code) else {
|
||||
return;
|
||||
};
|
||||
let verb_lc = verb.to_ascii_lowercase();
|
||||
let is_sinatra_verb = matches!(
|
||||
verb_lc.as_str(),
|
||||
"get" | "post" | "put" | "patch" | "delete" | "head" | "options" | "link" | "unlink"
|
||||
);
|
||||
if !is_sinatra_verb {
|
||||
return;
|
||||
}
|
||||
let Some(args) = parent.child_by_field_name("arguments") else {
|
||||
return;
|
||||
};
|
||||
let Some(pattern) = first_positional_string_arg_ruby(args, code) else {
|
||||
return;
|
||||
};
|
||||
collect_sinatra_path_captures(&pattern, out);
|
||||
}
|
||||
|
||||
/// Return the literal text of the first positional string argument inside a
|
||||
/// Python `argument_list`. Skips keyword args and non-string positionals.
|
||||
fn first_positional_string_arg(args: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
match arg.kind() {
|
||||
"(" | ")" | "," => continue,
|
||||
"keyword_argument" => continue,
|
||||
"string" => {
|
||||
return python_string_text(arg, code);
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip Python string-literal quoting from a `string` AST node. Rejects
|
||||
/// f-strings (interpolation children present) because the captured pattern
|
||||
/// is not statically known.
|
||||
fn python_string_text(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = node.walk();
|
||||
for ch in node.children(&mut cursor) {
|
||||
if ch.kind() == "interpolation" {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
let raw = text_of(node, code)?;
|
||||
let trimmed = raw.trim();
|
||||
let trimmed = trimmed.trim_start_matches(['r', 'R', 'b', 'B', 'u', 'U', 'f', 'F']);
|
||||
let stripped = trimmed
|
||||
.strip_prefix("\"\"\"")
|
||||
.and_then(|s| s.strip_suffix("\"\"\""))
|
||||
.or_else(|| {
|
||||
trimmed
|
||||
.strip_prefix("'''")
|
||||
.and_then(|s| s.strip_suffix("'''"))
|
||||
})
|
||||
.or_else(|| trimmed.strip_prefix('"').and_then(|s| s.strip_suffix('"')))
|
||||
.or_else(|| {
|
||||
trimmed
|
||||
.strip_prefix('\'')
|
||||
.and_then(|s| s.strip_suffix('\''))
|
||||
})?;
|
||||
Some(stripped.to_string())
|
||||
}
|
||||
|
||||
/// Return the literal text of the first positional string argument inside a
|
||||
/// Ruby `argument_list`. Hash literals (`pair`), block arguments,
|
||||
/// hash-splat arguments, and non-string positionals all return `None`.
|
||||
fn first_positional_string_arg_ruby(args: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
match arg.kind() {
|
||||
"(" | ")" | "," => continue,
|
||||
"pair" | "hash" | "block_argument" | "hash_splat_argument" => return None,
|
||||
"string" => return ruby_string_text(arg, code),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Strip Ruby string-literal quoting from a `string` AST node. Rejects
|
||||
/// strings with `#{...}` interpolation (the captured pattern is not
|
||||
/// statically known). Returns the concatenation of `string_content`
|
||||
/// children.
|
||||
fn ruby_string_text(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = node.walk();
|
||||
let mut content = String::new();
|
||||
let mut had_content = false;
|
||||
for ch in node.children(&mut cursor) {
|
||||
match ch.kind() {
|
||||
"interpolation" => return None,
|
||||
"string_content" => {
|
||||
if let Some(t) = text_of(ch, code) {
|
||||
content.push_str(&t);
|
||||
had_content = true;
|
||||
}
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
if had_content { Some(content) } else { None }
|
||||
}
|
||||
|
||||
/// Parse Sinatra-style `:name` capture segments out of a route pattern.
|
||||
/// A capture is a `:` followed by an identifier-ish run of bytes
|
||||
/// (`[A-Za-z0-9_]+`). Only fires when `:` is at pattern start or
|
||||
/// immediately follows `/`, so `Foo::Bar` style names embedded in a
|
||||
/// non-routing string are not mis-parsed as captures.
|
||||
fn collect_sinatra_path_captures(pattern: &str, out: &mut Vec<String>) {
|
||||
let bytes = pattern.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
let at_segment_boundary = i == 0 || bytes[i - 1] == b'/';
|
||||
if bytes[i] == b':' && at_segment_boundary {
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && (bytes[j].is_ascii_alphanumeric() || bytes[j] == b'_') {
|
||||
j += 1;
|
||||
}
|
||||
if j > i + 1 {
|
||||
let name = &pattern[i + 1..j];
|
||||
let lower = name.to_ascii_lowercase();
|
||||
if !out.iter().any(|existing| existing == &lower) {
|
||||
out.push(lower);
|
||||
}
|
||||
}
|
||||
i = j;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse FastAPI / Starlette-style `{name}` / `{name:converter}` capture
|
||||
/// segments out of a route pattern. Pushes the inner name (lowercased)
|
||||
/// into `out`. FastAPI puts the name FIRST (`{item_id:int}`), unlike
|
||||
/// Flask which puts the converter first (`<int:item_id>`). Skips
|
||||
/// malformed segments (no closing `}`, empty name) and rejects names
|
||||
/// with non-identifier characters.
|
||||
fn collect_fastapi_path_captures(pattern: &str, out: &mut Vec<String>) {
|
||||
let bytes = pattern.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'{' {
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && bytes[j] != b'}' {
|
||||
j += 1;
|
||||
}
|
||||
if j >= bytes.len() {
|
||||
break;
|
||||
}
|
||||
let inner = &pattern[i + 1..j];
|
||||
let name = inner.split(':').next().unwrap_or(inner).trim();
|
||||
if !name.is_empty() && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
|
||||
let lower = name.to_ascii_lowercase();
|
||||
if !out.iter().any(|existing| existing == &lower) {
|
||||
out.push(lower);
|
||||
}
|
||||
}
|
||||
i = j + 1;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse Flask-style `<conv:name>` / `<name>` capture segments out of a
|
||||
/// route pattern. Pushes the inner name (lowercased) into `out`. Skips
|
||||
/// malformed segments (no closing `>`, empty name).
|
||||
fn collect_flask_path_captures(pattern: &str, out: &mut Vec<String>) {
|
||||
let bytes = pattern.as_bytes();
|
||||
let mut i = 0;
|
||||
while i < bytes.len() {
|
||||
if bytes[i] == b'<' {
|
||||
let mut j = i + 1;
|
||||
while j < bytes.len() && bytes[j] != b'>' {
|
||||
j += 1;
|
||||
}
|
||||
if j >= bytes.len() {
|
||||
break;
|
||||
}
|
||||
let inner = &pattern[i + 1..j];
|
||||
let name = match inner.rsplit_once(':') {
|
||||
Some((_, n)) => n,
|
||||
None => inner,
|
||||
};
|
||||
let name = name.trim();
|
||||
if !name.is_empty() && name.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
|
||||
let lower = name.to_ascii_lowercase();
|
||||
if !out.iter().any(|existing| existing == &lower) {
|
||||
out.push(lower);
|
||||
}
|
||||
}
|
||||
i = j + 1;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod path_capture_tests {
|
||||
use super::*;
|
||||
|
||||
fn collect_for(pat: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_flask_path_captures(pat, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_bare_capture() {
|
||||
assert_eq!(collect_for("/users/<name>"), vec!["name".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_converter_capture() {
|
||||
assert_eq!(
|
||||
collect_for("/items/<int:item_id>"),
|
||||
vec!["item_id".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_path_converter() {
|
||||
assert_eq!(collect_for("/x/<path:slug>"), vec!["slug".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extracts_multiple_captures() {
|
||||
assert_eq!(
|
||||
collect_for("/u/<uid>/post/<int:pid>"),
|
||||
vec!["uid".to_string(), "pid".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dedupes_repeated_names() {
|
||||
let mut out = Vec::new();
|
||||
collect_flask_path_captures("/<a>/<a>", &mut out);
|
||||
assert_eq!(out, vec!["a".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_unclosed_brace() {
|
||||
assert_eq!(collect_for("/<oops"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_ident_chars() {
|
||||
assert_eq!(collect_for("/<bad name>"), Vec::<String>::new());
|
||||
assert_eq!(collect_for("/<name!>"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_when_no_captures() {
|
||||
assert_eq!(collect_for("/static/path"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
fn collect_sinatra_for(pat: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_sinatra_path_captures(pat, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_extracts_bare_capture() {
|
||||
assert_eq!(
|
||||
collect_sinatra_for("/users/:name"),
|
||||
vec!["name".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_extracts_multiple_captures() {
|
||||
assert_eq!(
|
||||
collect_sinatra_for("/u/:uid/post/:pid"),
|
||||
vec!["uid".to_string(), "pid".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_extracts_leading_capture() {
|
||||
assert_eq!(collect_sinatra_for(":root"), vec!["root".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_dedupes_repeated_names() {
|
||||
let mut out = Vec::new();
|
||||
collect_sinatra_path_captures("/:a/:a", &mut out);
|
||||
assert_eq!(out, vec!["a".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_ignores_double_colon() {
|
||||
assert_eq!(collect_sinatra_for("/Foo::Bar"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_ignores_lone_colon() {
|
||||
assert_eq!(collect_sinatra_for("/users/:"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sinatra_empty_when_no_captures() {
|
||||
assert_eq!(collect_sinatra_for("/static/path"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
fn collect_fastapi_for(pat: &str) -> Vec<String> {
|
||||
let mut out = Vec::new();
|
||||
collect_fastapi_path_captures(pat, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_bare_capture() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/items/{item_id}"),
|
||||
vec!["item_id".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_converter_capture() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/items/{item_id:int}"),
|
||||
vec!["item_id".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_path_converter() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/files/{file_path:path}"),
|
||||
vec!["file_path".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_extracts_multiple_captures() {
|
||||
assert_eq!(
|
||||
collect_fastapi_for("/u/{uid}/post/{pid:int}"),
|
||||
vec!["uid".to_string(), "pid".to_string()]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_dedupes_repeated_names() {
|
||||
let mut out = Vec::new();
|
||||
collect_fastapi_path_captures("/{a}/{a}", &mut out);
|
||||
assert_eq!(out, vec!["a".to_string()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_rejects_unclosed_brace() {
|
||||
assert_eq!(collect_fastapi_for("/{oops"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_rejects_non_ident_chars() {
|
||||
assert_eq!(collect_fastapi_for("/{bad name}"), Vec::<String>::new());
|
||||
assert_eq!(collect_fastapi_for("/{name!}"), Vec::<String>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fastapi_empty_when_no_captures() {
|
||||
assert_eq!(collect_fastapi_for("/static/path"), Vec::<String>::new());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use super::anon_fn_name;
|
||||
use super::conditions::unwrap_parens;
|
||||
use crate::labels::{DataLabel, Kind, classify, lookup};
|
||||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -210,7 +211,7 @@ pub(crate) fn first_call_ident_with_span<'a>(
|
|||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f.to_string()),
|
||||
(_, Some(f)) => Some(f),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -269,6 +270,11 @@ pub(crate) fn find_classifiable_inner_call<'a>(
|
|||
}
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => {
|
||||
// For CallMethod we also remember the bare receiver
|
||||
// identifier so we can try a type-qualified rewrite
|
||||
// when the literal classify misses.
|
||||
let mut method_receiver: Option<String> = None;
|
||||
let mut method_name: Option<String> = None;
|
||||
let ident = match lookup(lang, c.kind()) {
|
||||
Kind::CallFn => c
|
||||
.child_by_field_name("function")
|
||||
|
|
@ -286,6 +292,8 @@ pub(crate) fn find_classifiable_inner_call<'a>(
|
|||
.or_else(|| c.child_by_field_name("receiver"))
|
||||
.or_else(|| c.child_by_field_name("scope"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
method_receiver = recv.clone();
|
||||
method_name = func.clone();
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f),
|
||||
|
|
@ -302,6 +310,36 @@ pub(crate) fn find_classifiable_inner_call<'a>(
|
|||
{
|
||||
return Some((id.clone(), lbl, (c.start_byte(), c.end_byte())));
|
||||
}
|
||||
// Receiver-type rewrite fallback: when the literal
|
||||
// `recv.method` text didn't classify, AND we're inside
|
||||
// a chained call (parent `n` is itself a call), look
|
||||
// up `recv`'s locally-bound type and retry with the
|
||||
// type prefix. E.g. for
|
||||
// `sess.createNativeQuery(sql).getResultList()`, the
|
||||
// inner `sess.createNativeQuery` rewrites to
|
||||
// `HibernateSession.createNativeQuery` (rule fires).
|
||||
//
|
||||
// Gated on `n` being a Call-kind so the rewrite only
|
||||
// fires on chain-hop inner calls. When `n` is an
|
||||
// expression-statement / variable-declarator / etc.
|
||||
// the candidate `c` IS the outermost call of the
|
||||
// statement, and the SSA-time
|
||||
// `resolve_type_qualified_labels` path handles it
|
||||
// with multi-label semantics that single-label
|
||||
// `classify` here would erase.
|
||||
let parent_is_call = matches!(
|
||||
lookup(lang, n.kind()),
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro
|
||||
);
|
||||
if parent_is_call
|
||||
&& let (Some(recv), Some(method)) = (method_receiver, method_name)
|
||||
&& let Some(prefix) = crate::cfg::local_receiver_type_prefix(c, &recv, lang)
|
||||
{
|
||||
let alt = format!("{prefix}.{method}");
|
||||
if let Some(lbl) = classify(lang, &alt, extra) {
|
||||
return Some((alt, lbl, (c.start_byte(), c.end_byte())));
|
||||
}
|
||||
}
|
||||
// Recurse into arguments of this call
|
||||
if let Some(found) = find_classifiable_inner_call(c, lang, code, extra) {
|
||||
return Some(found);
|
||||
|
|
@ -412,6 +450,16 @@ pub(crate) fn first_member_label(
|
|||
}
|
||||
// PHP/Python/Ruby subscript access: `$_GET['cmd']`, `os.environ['KEY']`, `params[:cmd]`
|
||||
// Try to classify the object (before the `[`) as a source.
|
||||
//
|
||||
// Source-only on the receiver: a subscript reads a value from the
|
||||
// receiver, so a Sink label found on the receiver text (e.g.
|
||||
// `response.headers['content-type']`, where `response.headers`
|
||||
// matches the JS HEADER_INJECTION sink rule) describes the
|
||||
// *target* of a hypothetical write, not this read. Promoting it
|
||||
// would fire phantom sinks at every `body =
|
||||
// response.headers["X"]`-shape line. Sinks/Sanitizers reachable
|
||||
// via callable positions (function-arg, method-receiver) still
|
||||
// flow through the outer recursive walk below.
|
||||
"subscript_expression" | "subscript" | "element_reference" => {
|
||||
if let Some(obj) = n
|
||||
.child_by_field_name("object")
|
||||
|
|
@ -419,15 +467,23 @@ pub(crate) fn first_member_label(
|
|||
.or_else(|| n.child(0))
|
||||
{
|
||||
if let Some(txt) = text_of(obj, code)
|
||||
&& let Some(lbl) = classify(lang, &txt, extra_labels)
|
||||
&& let Some(lbl @ DataLabel::Source(_)) = classify(lang, &txt, extra_labels)
|
||||
{
|
||||
return Some(lbl);
|
||||
}
|
||||
// Recurse into the object for nested member accesses
|
||||
if let Some(lbl) = first_member_label(obj, lang, code, extra_labels) {
|
||||
// Recurse into the object for nested member accesses, but
|
||||
// keep the same Source-only restriction as above by passing
|
||||
// through the dedicated source-only walker.
|
||||
if let Some(lbl @ DataLabel::Source(_)) =
|
||||
first_member_label(obj, lang, code, extra_labels)
|
||||
{
|
||||
return Some(lbl);
|
||||
}
|
||||
}
|
||||
// Suppress further descent into this subscript node, the outer
|
||||
// child-walk loop would otherwise enter the receiver via the
|
||||
// member_expression arm and reattach a value-extraction Sink.
|
||||
return None;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
|
@ -678,6 +734,7 @@ pub(crate) fn collect_idents_with_paths(
|
|||
"identifier"
|
||||
| "field_identifier"
|
||||
| "property_identifier"
|
||||
| "shorthand_property_identifier"
|
||||
| "shorthand_property_identifier_pattern" => {
|
||||
if let Some(txt) = text_of(n, code) {
|
||||
idents.push(txt);
|
||||
|
|
@ -697,16 +754,241 @@ pub(crate) fn collect_idents_with_paths(
|
|||
}
|
||||
}
|
||||
|
||||
/// Walk an array/tuple destructure pattern in source order and return
|
||||
/// each simple-identifier binding paired with its position index.
|
||||
///
|
||||
/// Recognises:
|
||||
/// * JS/TS `array_pattern` — `const [a, b] = ...`, `const [, b] = ...`,
|
||||
/// `const [a, ,] = ...`. Skip slots (commas with no binding between)
|
||||
/// advance the position counter without emitting a binding.
|
||||
/// * Rust `tuple_pattern` — `let (a, _, b) = ...`. `_pattern` (wildcard)
|
||||
/// advances the position counter without emitting a binding.
|
||||
/// * Python `pattern_list` / `tuple_pattern` — `a, b = ...` and
|
||||
/// `(a, b) = ...`. Python `_` is a normal identifier binding (not a
|
||||
/// wildcard), so every `identifier` child emits a (name, position)
|
||||
/// entry.
|
||||
/// * Ruby `left_assignment_list` — `a, b = ...`. Bare comma-list LHS
|
||||
/// produced by `assignment` whose RHS is an array literal, a call
|
||||
/// return, or another tuple-yielding expression. Ruby `_` is a normal
|
||||
/// identifier (matches Python convention; `_` may still be referenced
|
||||
/// later in scope). Splat (`*rest` parsed as `rest_assignment`) and
|
||||
/// parenthesised nested destructure (`destructured_left_assignment`)
|
||||
/// hit the bail branch and fall back to scalar union.
|
||||
///
|
||||
/// Returns an empty `SmallVec` when the pattern is not one of the above
|
||||
/// kinds OR contains complex sub-patterns (`assignment_pattern` for
|
||||
/// `[a = 1, b]`, `rest_pattern` for `[a, ...rest]`, Python
|
||||
/// `list_splat_pattern` for `a, *rest = ...`, Ruby `rest_assignment` for
|
||||
/// `a, *rest = ...`, nested `array_pattern`, `object_pattern`,
|
||||
/// `destructured_left_assignment`). Callers treat the empty return as
|
||||
/// "no position-aware rewrite available; fall back to scalar union".
|
||||
pub(crate) fn collect_array_pattern_bindings_indexed(
|
||||
pat: Node,
|
||||
code: &[u8],
|
||||
) -> SmallVec<[(String, usize); 4]> {
|
||||
let mut out: SmallVec<[(String, usize); 4]> = SmallVec::new();
|
||||
let kind = pat.kind();
|
||||
if !matches!(
|
||||
kind,
|
||||
"array_pattern" | "tuple_pattern" | "pattern_list" | "left_assignment_list"
|
||||
) {
|
||||
return out;
|
||||
}
|
||||
let mut cursor = pat.walk();
|
||||
let mut pos: usize = 0;
|
||||
for child in pat.children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"[" | "]" | "(" | ")" => {}
|
||||
"," => {
|
||||
pos += 1;
|
||||
}
|
||||
"identifier" | "shorthand_property_identifier_pattern" => {
|
||||
if let Some(txt) = text_of(child, code) {
|
||||
out.push((txt, pos));
|
||||
}
|
||||
}
|
||||
// Rust wildcard `_` in tuple_pattern. Advances position counter
|
||||
// without binding; no emit. Tree-sitter-rust models the
|
||||
// wildcard as a leaf node whose `kind()` is literally "_".
|
||||
"_" => {}
|
||||
_ => {
|
||||
// Complex sub-pattern. Bail by clearing — caller treats
|
||||
// empty as "no position-aware rewrite", preserving the
|
||||
// pre-existing scalar-union behavior for these shapes.
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Walk an array-literal-shape RHS node and return one slot per source-order
|
||||
/// element. Each slot is one of:
|
||||
/// * `RhsArraySlot::Ident(name)` — bare identifier element.
|
||||
/// * `RhsArraySlot::Literal` — syntactic literal (string, number, bool,
|
||||
/// null/nil).
|
||||
/// * `RhsArraySlot::Complex(uses)` — call / binary / subscript / member
|
||||
/// access / nested array literal / etc. `uses` carries the inner
|
||||
/// identifier names (member-access paths first, bare idents second)
|
||||
/// harvested from the slot's subtree via `collect_idents_with_paths`.
|
||||
///
|
||||
/// Recognised RHS kinds:
|
||||
/// * JS/TS / Ruby `array` — `[a, b]`
|
||||
/// * Python `list` — `[a, b]`
|
||||
/// * Python `tuple` — `(a, b)`
|
||||
/// * Python `expression_list` — bare comma form `a, b`
|
||||
/// * Rust `tuple_expression` — `(a, b)`
|
||||
///
|
||||
/// Bails (returns empty) when the RHS is not one of these kinds OR contains
|
||||
/// a slot whose shape would shift index alignment (spread, list splat).
|
||||
/// Callers treat empty as "no per-element rewrite available; fall back to
|
||||
/// scalar union".
|
||||
pub(crate) fn collect_rhs_array_literal_elements(
|
||||
rhs: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> SmallVec<[crate::cfg::RhsArraySlot; 4]> {
|
||||
use crate::cfg::RhsArraySlot;
|
||||
use crate::labels::{Cap, DataLabel};
|
||||
|
||||
// Per-slot source classification: when a slot's own subtree carries a
|
||||
// Source-labeled member-expression / subscript, capture the Cap so the
|
||||
// SSA destructure rewrite emits Source for THIS slot specifically and
|
||||
// lets sibling Complex slots stay slot-scoped Assign. Falls back to
|
||||
// Cap::empty() when no per-slot source is recognised; the lowering
|
||||
// path then consults the outer-node Source flag for conservative
|
||||
// preservation of legacy behavior on shapes whose source pattern
|
||||
// doesn't text-classify (e.g. a subscript on a tainted local).
|
||||
let slot_source_cap = |slot: Node| -> Cap {
|
||||
match first_member_label(slot, lang, code, extra_labels) {
|
||||
Some(DataLabel::Source(c)) => c,
|
||||
_ => Cap::empty(),
|
||||
}
|
||||
};
|
||||
|
||||
let mut out: SmallVec<[RhsArraySlot; 4]> = SmallVec::new();
|
||||
let kind = rhs.kind();
|
||||
if !matches!(
|
||||
kind,
|
||||
"array" | "array_literal" | "list" | "tuple" | "tuple_expression" | "expression_list"
|
||||
) {
|
||||
return out;
|
||||
}
|
||||
let mut cursor = rhs.walk();
|
||||
for child in rhs.named_children(&mut cursor) {
|
||||
let ck = child.kind();
|
||||
match ck {
|
||||
"identifier"
|
||||
| "shorthand_property_identifier"
|
||||
| "shorthand_property_identifier_pattern"
|
||||
| "field_identifier"
|
||||
| "property_identifier" => match text_of(child, code) {
|
||||
Some(txt) => out.push(RhsArraySlot::Ident(txt)),
|
||||
None => {
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
},
|
||||
"variable_name" => match text_of(child, code) {
|
||||
Some(txt) => out.push(RhsArraySlot::Ident(txt.trim_start_matches('$').to_string())),
|
||||
None => {
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
},
|
||||
// Syntactic literal slots: no ident, no taint contribution.
|
||||
// Names follow tree-sitter's per-grammar literal kinds across
|
||||
// the supported languages.
|
||||
"string"
|
||||
| "string_literal"
|
||||
| "raw_string_literal"
|
||||
| "interpreted_string_literal"
|
||||
| "concatenated_string"
|
||||
| "integer"
|
||||
| "integer_literal"
|
||||
| "float"
|
||||
| "float_literal"
|
||||
| "number"
|
||||
| "numeric_literal"
|
||||
| "true"
|
||||
| "false"
|
||||
| "boolean_literal"
|
||||
| "boolean"
|
||||
| "null"
|
||||
| "null_literal"
|
||||
| "nil"
|
||||
| "none"
|
||||
| "None"
|
||||
| "undefined" => {
|
||||
out.push(RhsArraySlot::Literal);
|
||||
}
|
||||
// Spread / list-splat shift index alignment unpredictably
|
||||
// (`[...arr, b]` may expand to N elements at index 0). Bail
|
||||
// so callers fall back to scalar union.
|
||||
"spread_element" | "list_splat" | "list_splat_pattern" | "splat_argument"
|
||||
| "unary_splat" | "splat_expression" => {
|
||||
out.clear();
|
||||
return out;
|
||||
}
|
||||
// Interpolated strings carry inner identifier uses. Treat as
|
||||
// Complex so the slot picks up the contributions from
|
||||
// `${user.id}` etc.
|
||||
"template_string" | "string_interpolation" | "interpolation" | "encapsed_string" => {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(child, code, &mut idents, &mut paths);
|
||||
let mut uses: SmallVec<[String; 4]> = SmallVec::new();
|
||||
for p in paths {
|
||||
uses.push(p);
|
||||
}
|
||||
for ident in idents {
|
||||
if !uses.iter().any(|u| u == &ident) {
|
||||
uses.push(ident);
|
||||
}
|
||||
}
|
||||
let source_cap = slot_source_cap(child);
|
||||
out.push(RhsArraySlot::Complex { uses, source_cap });
|
||||
}
|
||||
// Everything else (call, member access, binary, subscript,
|
||||
// unary, ternary, nested array literal, etc.) is a "complex"
|
||||
// slot. Harvest inner ident uses so the SSA lowering can paint
|
||||
// the binding with this slot's contributions only — not the
|
||||
// union of every ident on the RHS.
|
||||
_ => {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(child, code, &mut idents, &mut paths);
|
||||
let mut uses: SmallVec<[String; 4]> = SmallVec::new();
|
||||
for p in paths {
|
||||
uses.push(p);
|
||||
}
|
||||
for ident in idents {
|
||||
if !uses.iter().any(|u| u == &ident) {
|
||||
uses.push(ident);
|
||||
}
|
||||
}
|
||||
let source_cap = slot_source_cap(child);
|
||||
out.push(RhsArraySlot::Complex { uses, source_cap });
|
||||
}
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Recursively collect every identifier that occurs inside `n`.
|
||||
///
|
||||
/// Recognises `identifier` (most languages), `variable_name` (PHP),
|
||||
/// `field_identifier` (Go), `property_identifier` (JS/TS), and
|
||||
/// `shorthand_property_identifier_pattern` (JS/TS destructuring).
|
||||
/// `shorthand_property_identifier` / `shorthand_property_identifier_pattern`
|
||||
/// (JS/TS object-literal shorthand uses and destructuring binding patterns).
|
||||
pub(crate) fn collect_idents(n: Node, code: &[u8], out: &mut Vec<String>) {
|
||||
match n.kind() {
|
||||
"identifier"
|
||||
| "field_identifier"
|
||||
| "property_identifier"
|
||||
| "shorthand_property_identifier"
|
||||
| "shorthand_property_identifier_pattern"
|
||||
// PHP `name`: leaf node carrying the bare identifier text for
|
||||
// function/method names and similar grammar slots. Without this
|
||||
|
|
|
|||
|
|
@ -337,7 +337,7 @@ fn collect_ruby<F: FnMut(String, String)>(root: Node<'_>, code: &[u8], push: &mu
|
|||
&& let Some(t) = text_of(c, code)
|
||||
{
|
||||
let leaf = t.rsplit("::").next().unwrap_or(&t).to_string();
|
||||
push(sub.clone(), leaf);
|
||||
push(sub, leaf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,140 @@
|
|||
use super::{
|
||||
ImportBinding, ImportBindings, PromisifyAlias, PromisifyAliases, member_expr_text, text_of,
|
||||
};
|
||||
use std::collections::HashMap;
|
||||
use tree_sitter::{Node, Tree};
|
||||
|
||||
/// File-local view of every JS/TS import binding: local-name → source-module
|
||||
/// specifier (verbatim from the `import` / `require` site, without `node:`
|
||||
/// stripping). Built once per CFG pass; consumed by the gated-label
|
||||
/// post-pass via [`crate::labels::ClassificationContext::local_imports`].
|
||||
///
|
||||
/// Records every binding regardless of aliasing (the legacy
|
||||
/// [`extract_import_bindings`] only preserves *renamed* bindings, which is
|
||||
/// not enough for Phase 05's `import { readFile } from 'fs/promises'`
|
||||
/// shape where `local_name == imported_name`).
|
||||
///
|
||||
/// Shares its top-level walk with [`crate::resolve::walk_js_top_level_imports`]
|
||||
/// so the import-clause / require-declarator parsing logic only lives in one
|
||||
/// place; this view simply discards the resolver verdict and side-effect-only
|
||||
/// markers.
|
||||
pub(super) fn extract_local_import_view(tree: &Tree, code: &[u8]) -> HashMap<String, String> {
|
||||
let mut out: HashMap<String, String> = HashMap::new();
|
||||
for raw in crate::resolve::walk_js_top_level_imports(tree, code) {
|
||||
if raw.local.is_empty() {
|
||||
continue;
|
||||
}
|
||||
out.insert(raw.local, raw.source_spec);
|
||||
}
|
||||
extend_with_promises_alias(tree, code, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
/// Recognise top-level `const fsp = fs.promises;` /
|
||||
/// `const fsp = require('fs').promises;` aliasing and add the new local
|
||||
/// name to the import view as `fs/promises` (or `node:fs/promises`,
|
||||
/// whichever the source binding spelt).
|
||||
///
|
||||
/// The Phase 05 `LabelGate::ImportedFromModule(&["fs/promises", ...])`
|
||||
/// only consults `local_imports[leading_identifier(callee)]`. Without
|
||||
/// this extension, `fsp.readFile(x)` evades the gate because `fsp`
|
||||
/// itself is not an import binding — only the underlying `fs`
|
||||
/// namespace is.
|
||||
fn extend_with_promises_alias(tree: &Tree, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
let root = tree.root_node();
|
||||
let mut top_cursor = root.walk();
|
||||
for child in root.children(&mut top_cursor) {
|
||||
if !matches!(child.kind(), "lexical_declaration" | "variable_declaration") {
|
||||
continue;
|
||||
}
|
||||
let mut decl_cursor = child.walk();
|
||||
for decl in child.children(&mut decl_cursor) {
|
||||
if decl.kind() != "variable_declarator" {
|
||||
continue;
|
||||
}
|
||||
let (Some(name_node), Some(value_node)) = (
|
||||
decl.child_by_field_name("name"),
|
||||
decl.child_by_field_name("value"),
|
||||
) else {
|
||||
continue;
|
||||
};
|
||||
if name_node.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(local_name) = text_of(name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
if value_node.kind() != "member_expression" {
|
||||
continue;
|
||||
}
|
||||
let property = value_node
|
||||
.child_by_field_name("property")
|
||||
.and_then(|p| text_of(p, code));
|
||||
if property.as_deref() != Some("promises") {
|
||||
continue;
|
||||
}
|
||||
let Some(obj) = value_node.child_by_field_name("object") else {
|
||||
continue;
|
||||
};
|
||||
let Some(source) = promises_alias_source(obj, code, out) else {
|
||||
continue;
|
||||
};
|
||||
// Don't override an existing import entry for the same name —
|
||||
// an explicit import of `fsp` from `fs/promises` already says
|
||||
// what we'd be inferring here.
|
||||
out.entry(local_name).or_insert(source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve the object side of a `<lhs> = <obj>.promises` member-expression
|
||||
/// to a source-module string when `<obj>` is a known `fs` binding.
|
||||
///
|
||||
/// Recognised shapes:
|
||||
/// - identifier `X` where `local_imports[X]` is `fs` or `node:fs`
|
||||
/// - `require('fs')` / `require("node:fs")` call expression
|
||||
fn promises_alias_source(
|
||||
obj: Node,
|
||||
code: &[u8],
|
||||
imports_so_far: &HashMap<String, String>,
|
||||
) -> Option<String> {
|
||||
match obj.kind() {
|
||||
"identifier" => {
|
||||
let id = text_of(obj, code)?;
|
||||
let module = imports_so_far.get(&id)?;
|
||||
map_fs_module_to_promises(module)
|
||||
}
|
||||
"call_expression" => {
|
||||
let func = obj.child_by_field_name("function")?;
|
||||
if text_of(func, code).as_deref() != Some("require") {
|
||||
return None;
|
||||
}
|
||||
let args = obj.child_by_field_name("arguments")?;
|
||||
let mut cursor = args.walk();
|
||||
for arg in args.children(&mut cursor) {
|
||||
if !matches!(arg.kind(), "string" | "template_string") {
|
||||
continue;
|
||||
}
|
||||
let raw = text_of(arg, code)?;
|
||||
let spec = raw.trim_matches(|c: char| c == '\'' || c == '"' || c == '`');
|
||||
return map_fs_module_to_promises(spec);
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn map_fs_module_to_promises(module: &str) -> Option<String> {
|
||||
if module.eq_ignore_ascii_case("fs") {
|
||||
Some("fs/promises".to_string())
|
||||
} else if module.eq_ignore_ascii_case("node:fs") {
|
||||
Some("node:fs/promises".to_string())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Import binding extraction
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
@ -360,6 +492,129 @@ fn extract_require_module(node: Node, code: &[u8]) -> Option<String> {
|
|||
None
|
||||
}
|
||||
|
||||
/// Per-file Rust scan: did the file `use` a join-style macro from `tokio` or
|
||||
/// `futures`? Returns the crate prefix to use when the file calls a bare
|
||||
/// `join!` / `try_join!` macro.
|
||||
///
|
||||
/// Rationale: tree-sitter records `tokio::join!(...)` with a fully qualified
|
||||
/// `macro` field text, but `use tokio::join; ... join!(a, b)` records the
|
||||
/// bare leaf. Without this lookup, the SSA-level promise-combinator
|
||||
/// recogniser (`crate::labels::is_promise_combinator`) misses the bare form
|
||||
/// and the macro's argument taint is dropped. Conservative: returns `None`
|
||||
/// when both `tokio::<name>` and `futures::<name>` are imported (ambiguous)
|
||||
/// or when neither is, leaving the bare `join` callee alone.
|
||||
pub(super) fn rust_bare_join_crate_prefix(
|
||||
root: Node,
|
||||
code: &[u8],
|
||||
leaf: &str,
|
||||
) -> Option<&'static str> {
|
||||
if !matches!(leaf, "join" | "try_join") {
|
||||
return None;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
let mut tokio_seen = false;
|
||||
let mut futures_seen = false;
|
||||
for child in root.children(&mut cursor) {
|
||||
if child.kind() != "use_declaration" {
|
||||
continue;
|
||||
}
|
||||
if rust_use_decl_imports_leaf(child, code, "tokio", leaf) {
|
||||
tokio_seen = true;
|
||||
}
|
||||
if rust_use_decl_imports_leaf(child, code, "futures", leaf) {
|
||||
futures_seen = true;
|
||||
}
|
||||
}
|
||||
match (tokio_seen, futures_seen) {
|
||||
(true, false) => Some("tokio"),
|
||||
(false, true) => Some("futures"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// True when `use_decl` brings `<crate_prefix>::<leaf>` into scope.
|
||||
///
|
||||
/// Recognises the common shapes:
|
||||
/// * `use tokio::join;` → leaf at the path tail
|
||||
/// * `use tokio::{join, select};` → leaf inside a use_list
|
||||
/// * `use tokio::join as my_join;` → aliased; we detect the
|
||||
/// original path even though the aliased name is unused (the macro is
|
||||
/// typically invoked under its alias, but if the alias and the bare form
|
||||
/// collide the rewrite is still safe).
|
||||
/// * `use tokio::*;` is NOT recognised — wildcard imports are too permissive
|
||||
/// for the bare-leaf rewrite to stay precise.
|
||||
fn rust_use_decl_imports_leaf(use_decl: Node, code: &[u8], crate_prefix: &str, leaf: &str) -> bool {
|
||||
let mut stack = vec![use_decl];
|
||||
while let Some(node) = stack.pop() {
|
||||
match node.kind() {
|
||||
// `use tokio::join;` — argument is a `scoped_identifier`.
|
||||
"scoped_identifier" => {
|
||||
if scoped_identifier_matches(node, code, crate_prefix, leaf) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// `use tokio::{join, select};` — the `path` field is `tokio`,
|
||||
// and a `use_list` enumerates leaves.
|
||||
"scoped_use_list" => {
|
||||
let path_ok = node
|
||||
.child_by_field_name("path")
|
||||
.and_then(|p| text_of(p, code))
|
||||
.as_deref()
|
||||
== Some(crate_prefix);
|
||||
if path_ok && let Some(list) = node.child_by_field_name("list") {
|
||||
let mut lc = list.walk();
|
||||
for entry in list.named_children(&mut lc) {
|
||||
match entry.kind() {
|
||||
"identifier" if text_of(entry, code).as_deref() == Some(leaf) => {
|
||||
return true;
|
||||
}
|
||||
"use_as_clause"
|
||||
if entry
|
||||
.child_by_field_name("path")
|
||||
.and_then(|p| text_of(p, code))
|
||||
.as_deref()
|
||||
== Some(leaf) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// `use tokio::join as my_join;` — aliased clause sits directly
|
||||
// under the use_declaration; check the path side.
|
||||
"use_as_clause" => {
|
||||
if let Some(p) = node.child_by_field_name("path")
|
||||
&& p.kind() == "scoped_identifier"
|
||||
&& scoped_identifier_matches(p, code, crate_prefix, leaf)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Walk children for nested groups (`use a::{b::{c, d}}`).
|
||||
let mut c = node.walk();
|
||||
for ch in node.children(&mut c) {
|
||||
stack.push(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn scoped_identifier_matches(node: Node, code: &[u8], crate_prefix: &str, leaf: &str) -> bool {
|
||||
let path_text = node
|
||||
.child_by_field_name("path")
|
||||
.and_then(|p| text_of(p, code));
|
||||
let leaf_text = node
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code));
|
||||
matches!((path_text.as_deref(), leaf_text.as_deref()),
|
||||
(Some(p), Some(l)) if p == crate_prefix && l == leaf)
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// === PUBLIC ENTRY POINT =================================================
|
||||
// -------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -1,22 +1,45 @@
|
|||
use super::conditions::unwrap_parens;
|
||||
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
|
||||
use super::{
|
||||
anon_fn_name, collect_idents, collect_idents_with_paths, find_constructor_type_child,
|
||||
first_call_ident, root_receiver_text, text_of,
|
||||
};
|
||||
use crate::labels::{Cap, Kind, lookup};
|
||||
use smallvec::SmallVec;
|
||||
use tree_sitter::Node;
|
||||
|
||||
/// Find the inner CallFn/CallMethod/CallMacro node within an AST node.
|
||||
/// For direct call nodes, returns the node itself. For wrappers, searches
|
||||
/// up to two levels of children.
|
||||
/// up to two levels of children, transparently descending through
|
||||
/// `await_expression` / `yield_expression` (`Kind::AwaitForward`) wrappers
|
||||
/// so `const x = await foo(y)` reaches the inner `call_expression` at
|
||||
/// effective depth 3 (`lexical_declaration > variable_declarator >
|
||||
/// await_expression > call_expression`).
|
||||
pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
|
||||
match lookup(lang, n.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
|
||||
Kind::AwaitForward => {
|
||||
// Transparent wrapper: descend into the awaited expression.
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
if let Some(found) = find_call_node(c, lang) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
_ => {
|
||||
let mut cursor = n.walk();
|
||||
for c in n.children(&mut cursor) {
|
||||
match lookup(lang, c.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(c),
|
||||
// Skip past await/yield wrappers without consuming a
|
||||
// recursion level — the wrapper itself is transparent.
|
||||
Kind::AwaitForward => {
|
||||
if let Some(found) = find_call_node(c, lang) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,11 +48,14 @@ pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
|
|||
for c in n.children(&mut cursor2) {
|
||||
let mut cursor3 = c.walk();
|
||||
for gc in c.children(&mut cursor3) {
|
||||
if matches!(
|
||||
lookup(lang, gc.kind()),
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro
|
||||
) {
|
||||
return Some(gc);
|
||||
match lookup(lang, gc.kind()) {
|
||||
Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(gc),
|
||||
Kind::AwaitForward => {
|
||||
if let Some(found) = find_call_node(gc, lang) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -108,9 +134,43 @@ pub(super) fn extract_destination_field_pairs(
|
|||
raw
|
||||
}
|
||||
}),
|
||||
// Computed keys like `[someVar]` can't be statically
|
||||
// resolved, skip (conservative: not a destination field).
|
||||
"computed_property_name" => continue,
|
||||
// Computed keys: resolve only when the inner expression
|
||||
// is a pure string literal (`['url']`). Dynamic forms
|
||||
// (`[someVar]`, `[`url-${i}`]`, ``[`url`]`` with
|
||||
// interpolation) stay conservative-skip.
|
||||
"computed_property_name" => {
|
||||
let mut inner_cursor = key_node.walk();
|
||||
let inner = key_node.named_children(&mut inner_cursor).find(|c| {
|
||||
!matches!(c.kind(), "comment" | "block_comment" | "line_comment")
|
||||
});
|
||||
match inner.map(|n| (n.kind(), n)) {
|
||||
Some(("string" | "string_literal", n)) => text_of(n, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
}),
|
||||
// Template strings only when no interpolation
|
||||
// (no `template_substitution` children).
|
||||
Some(("template_string", n))
|
||||
if {
|
||||
let mut tc = n.walk();
|
||||
!n.named_children(&mut tc)
|
||||
.any(|c| c.kind() == "template_substitution")
|
||||
} =>
|
||||
{
|
||||
text_of(n, code).map(|raw| {
|
||||
if raw.len() >= 2 {
|
||||
raw[1..raw.len() - 1].to_string()
|
||||
} else {
|
||||
raw
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
}
|
||||
_ => text_of(key_node, code),
|
||||
};
|
||||
let Some(key) = key_text else {
|
||||
|
|
@ -144,6 +204,13 @@ pub(super) fn extract_destination_field_pairs(
|
|||
/// `requests.post(url, data=tainted, json=safe)` where `data` and `json` are
|
||||
/// `keyword_argument` siblings of the positional URL.
|
||||
///
|
||||
/// Also covers Ruby, where tree-sitter-ruby emits `pair` nodes (with
|
||||
/// `key`/`value` fields) directly under `argument_list` for the
|
||||
/// `Faraday.new(url: x)` / `Net::HTTP.start(host, port, proxy_addr: prx)`
|
||||
/// kwarg shape. The `key` is typically a `hash_key_symbol` whose text is the
|
||||
/// bare identifier (`url`); `simple_symbol` (`:url`) and string keys are
|
||||
/// normalised by stripping a leading `:` or wrapping quotes.
|
||||
///
|
||||
/// Returns the union of matching kwargs, preserving the kwarg name in the
|
||||
/// `field` slot so callers can still attribute findings per-field. Empty
|
||||
/// when no matching kwargs exist or the call has no `arguments` field.
|
||||
|
|
@ -162,22 +229,38 @@ pub(super) fn extract_destination_kwarg_pairs(
|
|||
let mut cursor = args_node.walk();
|
||||
for child in args_node.named_children(&mut cursor) {
|
||||
let kind = child.kind();
|
||||
if kind != "keyword_argument" && kind != "named_argument" {
|
||||
let (name_node, value_node) = if kind == "keyword_argument" || kind == "named_argument" {
|
||||
let named_count = child.named_child_count();
|
||||
(
|
||||
child
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| child.named_child(0)),
|
||||
child
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32)),
|
||||
)
|
||||
} else if kind == "pair" {
|
||||
// Ruby `pair` node sits directly under `argument_list` for
|
||||
// kwarg-style call args (`f(url: x)`). `key`/`value` fields
|
||||
// are populated; key text is `hash_key_symbol` ("url"),
|
||||
// `simple_symbol` (":url"), or a string literal.
|
||||
(
|
||||
child.child_by_field_name("key"),
|
||||
child.child_by_field_name("value"),
|
||||
)
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
let named_count = child.named_child_count();
|
||||
let name_node = child
|
||||
.child_by_field_name("name")
|
||||
.or_else(|| child.named_child(0));
|
||||
let value_node = child
|
||||
.child_by_field_name("value")
|
||||
.or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
|
||||
};
|
||||
let (Some(nn), Some(vn)) = (name_node, value_node) else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = text_of(nn, code) else {
|
||||
let Some(name_raw) = text_of(nn, code) else {
|
||||
continue;
|
||||
};
|
||||
let name = name_raw
|
||||
.trim_start_matches(':')
|
||||
.trim_matches(['"', '\''])
|
||||
.to_string();
|
||||
if !fields.iter().any(|&f| f == name) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -387,11 +470,9 @@ pub(super) fn extract_const_macro_arg(
|
|||
// C/C++ identifier / PHP `name` node for define-style constants.
|
||||
// Scoped C++ identifiers (`Curl::OPT_POSTFIELDS`) and PHP namespaced
|
||||
// names also surface here so the dangerous_values match catches them.
|
||||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
}
|
||||
"identifier" | "name" | "qualified_name" | "scoped_identifier" => text_of(arg, code),
|
||||
// Ruby bare constant (`NOENT`) — leaf form.
|
||||
"constant" => text_of(arg, code).map(|s| s.to_string()),
|
||||
"constant" => text_of(arg, code),
|
||||
// Ruby scope-qualified constant (`Nokogiri::XML::ParseOptions::NOENT`).
|
||||
// Return only the rightmost `name` segment so the gate's
|
||||
// `dangerous_values` list can stay identifier-bare instead of
|
||||
|
|
@ -400,8 +481,7 @@ pub(super) fn extract_const_macro_arg(
|
|||
"scope_resolution" => arg
|
||||
.child_by_field_name("name")
|
||||
.and_then(|n| text_of(n, code))
|
||||
.map(|s| s.to_string())
|
||||
.or_else(|| text_of(arg, code).map(|s| s.to_string())),
|
||||
.or_else(|| text_of(arg, code)),
|
||||
// Integer literals at the activation arg position. PHP / C / C++
|
||||
// commonly use plain `0` to opt into the safe-default option set
|
||||
// (e.g. `simplexml_load_string($xml, "SimpleXMLElement", 0)`). The
|
||||
|
|
@ -409,7 +489,7 @@ pub(super) fn extract_const_macro_arg(
|
|||
// the literal text lets the comparison fail against `LIBXML_NOENT`
|
||||
// and suppresses the conservative-fire branch.
|
||||
"integer" | "integer_literal" | "number_literal" | "decimal_integer_literal" => {
|
||||
text_of(arg, code).map(|s| s.to_string())
|
||||
text_of(arg, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
|
|
@ -443,7 +523,7 @@ pub(super) fn extract_const_keyword_arg(
|
|||
// distinguish literal-safe from dynamic.
|
||||
return match value_node.kind() {
|
||||
"true" | "false" | "none" | "integer" | "float" | "string" | "string_literal"
|
||||
| "identifier" => text_of(value_node, code).map(|s| s.to_string()),
|
||||
| "identifier" => text_of(value_node, code),
|
||||
_ => None,
|
||||
}
|
||||
.filter(|_| {
|
||||
|
|
@ -537,7 +617,7 @@ pub(super) fn extract_object_arg_property(
|
|||
let val_node = unwrap_parens(val_node);
|
||||
return match val_node.kind() {
|
||||
"true" | "false" | "null" | "undefined" | "number" | "string" | "string_literal" => {
|
||||
text_of(val_node, code).map(|s| s.to_string())
|
||||
text_of(val_node, code)
|
||||
}
|
||||
// JS booleans true/false are their own node kinds (above), but
|
||||
// some grammar versions wrap them as identifier literals; surface
|
||||
|
|
@ -811,7 +891,7 @@ pub(super) fn js_chain_outer_method_for_inner<'a>(
|
|||
if inner_matched {
|
||||
return function
|
||||
.child_by_field_name("property")
|
||||
.and_then(|p| text_of(p, code).map(|s| s.to_string()));
|
||||
.and_then(|p| text_of(p, code));
|
||||
}
|
||||
}
|
||||
// Recurse: outer chain may have more depth (`a.b().c().d()` ,
|
||||
|
|
@ -1518,6 +1598,18 @@ pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>>
|
|||
return result;
|
||||
}
|
||||
|
||||
// Rust `tokio::join!` / `futures::join!` (and their `try_*` variants).
|
||||
// tree-sitter-rust models macro args as a `token_tree` rather than an
|
||||
// `arguments` field, so a vanilla extraction returns nothing. Walk the
|
||||
// top-level token_tree splitting on `,` separators, lifting identifiers
|
||||
// out of each chunk so the existing PromiseCombinator transfer can union
|
||||
// arg-side taint into the resulting tuple value.
|
||||
if call_node.kind() == "macro_invocation"
|
||||
&& let Some(arg_uses) = extract_rust_macro_join_arg_uses(call_node, code)
|
||||
{
|
||||
return arg_uses;
|
||||
}
|
||||
|
||||
let Some(args_node) = call_node.child_by_field_name("arguments") else {
|
||||
return Vec::new();
|
||||
};
|
||||
|
|
@ -1551,6 +1643,82 @@ pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>>
|
|||
result
|
||||
}
|
||||
|
||||
/// `tokio::join!` / `futures::join!` (and their `try_*` variants) bundle
|
||||
/// concurrently-awaited futures into a tuple result. tree-sitter-rust
|
||||
/// represents the args as a `token_tree` whose children alternate between
|
||||
/// expressions and `,` separators (`token_tree` itself nests on every
|
||||
/// parenthesised group, e.g. the `(x)` inside `fetch(x)`). Walk the
|
||||
/// top-level token_tree, segment by `,` leaves, and lift identifiers out
|
||||
/// of each chunk so the SSA Call op carries one positional arg per future.
|
||||
///
|
||||
/// Returns `Some(arg_uses)` only when the macro is one of the recognised
|
||||
/// join macros, so `extract_arg_uses` can fall through to its normal
|
||||
/// `arguments`-field path for every other macro shape (`format!`,
|
||||
/// `println!`, custom DSL macros) where arg lifting could disturb existing
|
||||
/// label / SSA flow.
|
||||
pub(super) fn extract_rust_macro_join_arg_uses(
|
||||
call_node: Node,
|
||||
code: &[u8],
|
||||
) -> Option<Vec<Vec<String>>> {
|
||||
let macro_node = call_node.child_by_field_name("macro")?;
|
||||
let macro_text = text_of(macro_node, code)?;
|
||||
if !is_rust_join_macro(¯o_text) {
|
||||
return None;
|
||||
}
|
||||
let tt = match call_node.child_by_field_name("token_tree") {
|
||||
Some(t) => t,
|
||||
None => {
|
||||
let mut cursor = call_node.walk();
|
||||
call_node
|
||||
.children(&mut cursor)
|
||||
.find(|c| c.kind() == "token_tree")?
|
||||
}
|
||||
};
|
||||
let mut chunks: Vec<Vec<Node>> = vec![Vec::new()];
|
||||
let mut cursor = tt.walk();
|
||||
for child in tt.children(&mut cursor) {
|
||||
// Skip the surrounding `(`/`)` punctuation.
|
||||
if !child.is_named() {
|
||||
let kind = child.kind();
|
||||
if kind == "," {
|
||||
chunks.push(Vec::new());
|
||||
continue;
|
||||
}
|
||||
if kind == "(" || kind == ")" {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
chunks.last_mut().unwrap().push(child);
|
||||
}
|
||||
let mut result = Vec::new();
|
||||
for chunk in chunks {
|
||||
if chunk.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
for n in chunk {
|
||||
collect_idents_with_paths(n, code, &mut idents, &mut paths);
|
||||
}
|
||||
let mut combined = paths;
|
||||
combined.extend(idents);
|
||||
result.push(combined);
|
||||
}
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn is_rust_join_macro(macro_text: &str) -> bool {
|
||||
matches!(
|
||||
macro_text,
|
||||
"tokio::join"
|
||||
| "tokio::try_join"
|
||||
| "futures::join"
|
||||
| "futures::try_join"
|
||||
| "join"
|
||||
| "try_join"
|
||||
)
|
||||
}
|
||||
|
||||
/// Extract keyword / named argument bindings for a call node.
|
||||
///
|
||||
/// Returns `Vec<(name, uses)>` where `uses` are the identifier references
|
||||
|
|
@ -1891,11 +2059,31 @@ pub(super) fn call_ident_of<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Opti
|
|||
.child_by_field_name("method")
|
||||
.or_else(|| n.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code));
|
||||
let recv = n
|
||||
let recv_node = n
|
||||
.child_by_field_name("object")
|
||||
.or_else(|| n.child_by_field_name("receiver"))
|
||||
.or_else(|| n.child_by_field_name("scope"))
|
||||
.and_then(|f| root_receiver_text(f, lang, code));
|
||||
.or_else(|| n.child_by_field_name("scope"));
|
||||
let recv = recv_node.and_then(|f| root_receiver_text(f, lang, code));
|
||||
// Preserve Java `.getClass()` segment in the chained callee text
|
||||
// so downstream predicates (e.g.
|
||||
// [`crate::ssa::type_facts::is_safe_string_producing_callee`])
|
||||
// can recognise idiomatic `obj.getClass().<accessor>()` chains.
|
||||
// Without this, `root_receiver_text` collapses the chain to
|
||||
// `obj.<accessor>`, indistinguishable from a user-defined method.
|
||||
let recv = if lang == "java"
|
||||
&& let Some(rn) = recv_node
|
||||
&& lookup(lang, rn.kind()) == Kind::CallMethod
|
||||
&& let Some(inner_method) = rn
|
||||
.child_by_field_name("method")
|
||||
.or_else(|| rn.child_by_field_name("name"))
|
||||
.and_then(|f| text_of(f, code))
|
||||
&& inner_method == "getClass"
|
||||
&& let Some(r) = recv
|
||||
{
|
||||
Some(format!("{r}.getClass"))
|
||||
} else {
|
||||
recv
|
||||
};
|
||||
match (recv, func) {
|
||||
(Some(r), Some(f)) => Some(format!("{r}.{f}")),
|
||||
(_, Some(f)) => Some(f),
|
||||
|
|
@ -1984,7 +2172,7 @@ pub(super) fn extract_arg_string_literals(call_node: Node, code: &[u8]) -> Vec<O
|
|||
| "integer"
|
||||
| "number"
|
||||
| "number_literal"
|
||||
| "decimal_literal" => text_of(target, code).map(|s| s.to_string()),
|
||||
| "decimal_literal" => text_of(target, code),
|
||||
_ => None,
|
||||
};
|
||||
result.push(literal);
|
||||
|
|
@ -2003,7 +2191,7 @@ pub(super) fn strip_literal_quotes(raw: &str, node: Node, code: &[u8]) -> Option
|
|||
let mut cursor = node.walk();
|
||||
for child in node.named_children(&mut cursor) {
|
||||
if child.kind() == "string_content" {
|
||||
return text_of(child, code).map(|s| s.to_string());
|
||||
return text_of(child, code);
|
||||
}
|
||||
}
|
||||
if raw.len() >= 2 {
|
||||
|
|
@ -2044,20 +2232,43 @@ pub(super) fn extract_arg_callees(call_node: Node, lang: &str, code: &[u8]) -> V
|
|||
result
|
||||
}
|
||||
|
||||
/// Return `(defines, uses)` for the AST fragment `ast`.
|
||||
/// Returns (defines, uses, extra_defines) where extra_defines captures additional
|
||||
/// bindings from destructuring patterns beyond the primary define.
|
||||
/// Return `(defines, uses, extra_defines, array_pattern_indices,
|
||||
/// rhs_array_elements)` for the AST fragment `ast`.
|
||||
///
|
||||
/// `extra_defines` captures additional bindings from destructuring patterns
|
||||
/// beyond the primary define. `array_pattern_indices`, when non-empty, gives
|
||||
/// the source-order position of each binding in `iter::once(defines).chain(
|
||||
/// extra_defines)` for `array_pattern` / `tuple_pattern` LHS shapes. Empty
|
||||
/// for non-array destructures and for non-skip array patterns where callers
|
||||
/// can derive sequential 0..N indices implicitly.
|
||||
///
|
||||
/// `rhs_array_elements`, when non-empty, gives source-order RHS slots for
|
||||
/// destructure-from-array-literal shapes (`const [a, b] = [safe, tainted]`,
|
||||
/// `let (a, b) = (safe, tainted)`, Python `a, b = safe, tainted`). Each slot
|
||||
/// is `Some(ident)` for a bare-ident element or `None` for a syntactic
|
||||
/// literal. Empty when RHS isn't an array-literal shape or any element is
|
||||
/// too complex; callers fall back to scalar union in that case.
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub(super) fn def_use(
|
||||
ast: Node,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> (Option<String>, Vec<String>, Vec<String>) {
|
||||
extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
|
||||
) -> (
|
||||
Option<String>,
|
||||
Vec<String>,
|
||||
Vec<String>,
|
||||
SmallVec<[usize; 4]>,
|
||||
SmallVec<[crate::cfg::RhsArraySlot; 4]>,
|
||||
) {
|
||||
match lookup(lang, ast.kind()) {
|
||||
// Declaration wrappers (let, var, short_var_declaration, etc.)
|
||||
Kind::CallWrapper => {
|
||||
let mut defs = None;
|
||||
let mut extra_defs = Vec::new();
|
||||
let mut uses = Vec::new();
|
||||
let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
|
||||
let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
|
||||
|
||||
// Try direct field names first (Rust `let_declaration`, Go `short_var_declaration`)
|
||||
let def_node = ast
|
||||
|
|
@ -2076,17 +2287,30 @@ pub(super) fn def_use(
|
|||
|
||||
if def_node.is_some() || val_node.is_some() {
|
||||
if let Some(pat) = def_node {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
// Remaining idents are extra defines (for destructuring)
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
let bindings = collect_array_pattern_bindings_indexed(pat, code);
|
||||
if !bindings.is_empty() {
|
||||
let mut iter = bindings.into_iter();
|
||||
if let Some((first_name, first_idx)) = iter.next() {
|
||||
defs = Some(first_name);
|
||||
pattern_indices.push(first_idx);
|
||||
}
|
||||
for (name, idx) in iter {
|
||||
extra_defs.push(name);
|
||||
pattern_indices.push(idx);
|
||||
}
|
||||
} else {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(pat, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
// Remaining idents are extra defines (for destructuring)
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
}
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
if let Some(val) = val_node {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2099,6 +2323,14 @@ pub(super) fn def_use(
|
|||
// the format-string bytes, not as a separate AST
|
||||
// argument node, so collect_idents misses it.
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(val, code));
|
||||
// When the LHS is a recognised destructure pattern AND
|
||||
// the RHS is a bare array-literal shape (no call), record
|
||||
// per-element idents so the SSA destructure rewrite can
|
||||
// map each binding to its specific RHS slot.
|
||||
if !pattern_indices.is_empty() {
|
||||
rhs_array_elements =
|
||||
collect_rhs_array_literal_elements(val, lang, code, extra_labels);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
|
||||
|
|
@ -2135,16 +2367,29 @@ pub(super) fn def_use(
|
|||
if let Some(name_node) = child_name
|
||||
&& defs.is_none()
|
||||
{
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
let bindings = collect_array_pattern_bindings_indexed(name_node, code);
|
||||
if !bindings.is_empty() {
|
||||
let mut iter = bindings.into_iter();
|
||||
if let Some((first_name, first_idx)) = iter.next() {
|
||||
defs = Some(first_name);
|
||||
pattern_indices.push(first_idx);
|
||||
}
|
||||
for (name, idx) in iter {
|
||||
extra_defs.push(name);
|
||||
pattern_indices.push(idx);
|
||||
}
|
||||
} else {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
|
||||
let first = paths.pop().or_else(|| idents.first().cloned());
|
||||
for ident in &idents {
|
||||
if first.as_ref() != Some(ident) {
|
||||
extra_defs.push(ident.clone());
|
||||
}
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
defs = first;
|
||||
}
|
||||
if let Some(val_node) = child_value {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2153,6 +2398,14 @@ pub(super) fn def_use(
|
|||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(val_node, code));
|
||||
if !pattern_indices.is_empty() && rhs_array_elements.is_empty() {
|
||||
rhs_array_elements = collect_rhs_array_literal_elements(
|
||||
val_node,
|
||||
lang,
|
||||
code,
|
||||
extra_labels,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2168,19 +2421,42 @@ pub(super) fn def_use(
|
|||
uses.extend(extract_rust_format_macro_named_idents_in(ast, code));
|
||||
}
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
(defs, uses, extra_defs, pattern_indices, rhs_array_elements)
|
||||
}
|
||||
|
||||
// Plain assignment `x = y`
|
||||
// Plain assignment `x = y` or destructuring assignment such as
|
||||
// Python `a, b = await asyncio.gather(...)` whose LHS surfaces as
|
||||
// a `pattern_list` / `tuple_pattern`. When the LHS is a
|
||||
// destructure pattern that the indexed helper recognises, the
|
||||
// primary binding lands in `defs`, the rest land in `extra_defs`,
|
||||
// and `pattern_indices` carries source-order positions so the
|
||||
// SSA lowering's destructure-promise rewrite can paint each
|
||||
// binding from the matching combinator argument.
|
||||
Kind::Assignment => {
|
||||
let mut defs = None;
|
||||
let mut extra_defs = Vec::new();
|
||||
let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
|
||||
let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
|
||||
let mut uses = Vec::new();
|
||||
if let Some(lhs) = ast.child_by_field_name("left") {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
|
||||
// Prefer dotted path (member expression) over last ident
|
||||
defs = paths.pop().or_else(|| idents.pop());
|
||||
let bindings = collect_array_pattern_bindings_indexed(lhs, code);
|
||||
if !bindings.is_empty() {
|
||||
let mut iter = bindings.into_iter();
|
||||
if let Some((first_name, first_idx)) = iter.next() {
|
||||
defs = Some(first_name);
|
||||
pattern_indices.push(first_idx);
|
||||
}
|
||||
for (name, idx) in iter {
|
||||
extra_defs.push(name);
|
||||
pattern_indices.push(idx);
|
||||
}
|
||||
} else {
|
||||
let mut idents = Vec::new();
|
||||
let mut paths = Vec::new();
|
||||
collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
|
||||
// Prefer dotted path (member expression) over last ident
|
||||
defs = paths.pop().or_else(|| idents.pop());
|
||||
}
|
||||
}
|
||||
if let Some(rhs) = ast.child_by_field_name("right") {
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2189,8 +2465,16 @@ pub(super) fn def_use(
|
|||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
uses.extend(extract_rust_format_macro_named_idents_in(rhs, code));
|
||||
// When the LHS is a recognised destructure pattern AND the
|
||||
// RHS is a bare array-literal shape, record per-element
|
||||
// idents so the SSA destructure rewrite can map each
|
||||
// binding to its specific RHS slot.
|
||||
if !pattern_indices.is_empty() {
|
||||
rhs_array_elements =
|
||||
collect_rhs_array_literal_elements(rhs, lang, code, extra_labels);
|
||||
}
|
||||
}
|
||||
(defs, uses, vec![])
|
||||
(defs, uses, extra_defs, pattern_indices, rhs_array_elements)
|
||||
}
|
||||
|
||||
// if‑let / while‑let, the `let_condition` binds a variable from
|
||||
|
|
@ -2215,7 +2499,7 @@ pub(super) fn def_use(
|
|||
if let Some(val) = c.child_by_field_name("value") {
|
||||
collect_idents(val, code, &mut uses);
|
||||
}
|
||||
return (defs, uses, vec![]);
|
||||
return (defs, uses, vec![], SmallVec::new(), SmallVec::new());
|
||||
}
|
||||
|
||||
let mut idents = Vec::new();
|
||||
|
|
@ -2223,7 +2507,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
(None, uses, vec![])
|
||||
(None, uses, vec![], SmallVec::new(), SmallVec::new())
|
||||
}
|
||||
|
||||
// for-in / for-of / Python `for x in iter:` ─────────────────────────
|
||||
|
|
@ -2267,7 +2551,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
return (None, uses, vec![]);
|
||||
return (None, uses, vec![], SmallVec::new(), SmallVec::new());
|
||||
}
|
||||
|
||||
let mut defs: Option<String> = None;
|
||||
|
|
@ -2293,7 +2577,7 @@ pub(super) fn def_use(
|
|||
uses.extend(paths);
|
||||
uses.extend(idents);
|
||||
}
|
||||
(defs, uses, extra_defs)
|
||||
(defs, uses, extra_defs, SmallVec::new(), SmallVec::new())
|
||||
}
|
||||
|
||||
// everything else – no definition, but may read vars
|
||||
|
|
@ -2303,7 +2587,7 @@ pub(super) fn def_use(
|
|||
collect_idents_with_paths(ast, code, &mut idents, &mut paths);
|
||||
let mut uses = paths;
|
||||
uses.extend(idents);
|
||||
(None, uses, vec![])
|
||||
(None, uses, vec![], SmallVec::new(), SmallVec::new())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
1236
src/cfg/mod.rs
1236
src/cfg/mod.rs
File diff suppressed because it is too large
Load diff
882
src/cfg/safe_fields.rs
Normal file
882
src/cfg/safe_fields.rs
Normal file
|
|
@ -0,0 +1,882 @@
|
|||
//! Per-file extraction of class fields whose `.get(...)` lookups are
|
||||
//! provably safe.
|
||||
//!
|
||||
//! Recognises Java `final` fields whose initializer is `Map.of(K1, V1,
|
||||
//! K2, V2, ...)` with all string-literal arguments. At a downstream
|
||||
//! `<FIELD>.get(taintedKey)` call the result is bounded to the literal
|
||||
//! value set, so the SSA taint engine can suppress propagation from the
|
||||
//! key to the result. Without this pre-pass the engine sees `<FIELD>`
|
||||
//! as a free identifier with no SSA value, fails to resolve the
|
||||
//! container, and falls back to default arg-to-result propagation.
|
||||
//!
|
||||
//! Strictly additive: unrecognised initializer shapes (factory chains,
|
||||
//! `Map.ofEntries`, builders) produce no entry and the engine keeps
|
||||
//! its prior behaviour.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use tree_sitter::Node;
|
||||
|
||||
use super::helpers::text_of;
|
||||
|
||||
thread_local! {
|
||||
/// Per-file safe-lookup field map published by [`with_safe_lookup_fields`]
|
||||
/// around taint passes that need it. The SSA taint engine's container
|
||||
/// Load fallback consults this view via [`safe_lookup_field_values`] when
|
||||
/// the receiver is a free identifier (no SSA value to resolve against).
|
||||
static SAFE_LOOKUP_FIELDS_TLS: RefCell<Option<HashMap<String, Vec<String>>>> =
|
||||
const { RefCell::new(None) };
|
||||
}
|
||||
|
||||
/// Run `f` with `fields` published as the per-thread safe-lookup view.
|
||||
/// Restores the prior value on drop so nested calls compose; pass `None`
|
||||
/// to suppress the gate for callers that lack a file context.
|
||||
pub fn with_safe_lookup_fields<R>(
|
||||
fields: Option<&HashMap<String, Vec<String>>>,
|
||||
f: impl FnOnce() -> R,
|
||||
) -> R {
|
||||
let prev = SAFE_LOOKUP_FIELDS_TLS.with(|cell| {
|
||||
cell.borrow_mut()
|
||||
.replace(fields.cloned().unwrap_or_default())
|
||||
});
|
||||
let restore_to = if fields.is_some() { prev } else { None };
|
||||
struct Guard(Option<HashMap<String, Vec<String>>>);
|
||||
impl Drop for Guard {
|
||||
fn drop(&mut self) {
|
||||
SAFE_LOOKUP_FIELDS_TLS.with(|cell| *cell.borrow_mut() = self.0.take());
|
||||
}
|
||||
}
|
||||
let _guard = Guard(restore_to);
|
||||
f()
|
||||
}
|
||||
|
||||
/// Look up the literal value set for a safe field. Returns `None` when
|
||||
/// no view is published, the field is not a known safe lookup, or the
|
||||
/// value list is empty.
|
||||
pub fn safe_lookup_field_values(name: &str) -> Option<Vec<String>> {
|
||||
SAFE_LOOKUP_FIELDS_TLS.with(|cell| {
|
||||
let borrowed = cell.borrow();
|
||||
let map = borrowed.as_ref()?;
|
||||
let values = map.get(name)?;
|
||||
if values.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(values.clone())
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Per-file safe-lookup field map: field name → finite set of literal
|
||||
/// values that `<field>.get(...)` may return. Empty for non-Java files.
|
||||
pub fn collect_safe_lookup_fields(
|
||||
root: Node<'_>,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> HashMap<String, Vec<String>> {
|
||||
let mut out: HashMap<String, Vec<String>> = HashMap::new();
|
||||
if lang == "java" {
|
||||
collect_java(root, code, &mut out);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Per-file file-level constant scalar map: name → literal value text.
|
||||
///
|
||||
/// Recognises declarations that bind a name to a primitive scalar literal at
|
||||
/// file or class scope, where the per-function SSA const-prop has no view of
|
||||
/// the binding (the name is a free identifier from inside any function body):
|
||||
///
|
||||
/// - Java: `static final TYPE NAME = LITERAL;` fields (any class depth).
|
||||
/// - Python: `NAME = LITERAL` at module scope.
|
||||
/// - Go: `const NAME = LITERAL` and `const NAME TYPE = LITERAL` at package scope.
|
||||
/// - Rust: `const NAME: TYPE = LITERAL;` and `static NAME: TYPE = LITERAL;` at
|
||||
/// crate or module scope.
|
||||
///
|
||||
/// Used by `cfg_analysis::guards` to suppress `cfg-unguarded-sink` when a
|
||||
/// sink's argument is one of these bindings. `LITERAL` covers strings (no
|
||||
/// interpolation), integers in any supported base, floats, booleans, null /
|
||||
/// nil / None, and unary negation / not over those.
|
||||
///
|
||||
/// Empty for unsupported languages. Scalar means single-value, not
|
||||
/// container; the `Map.of(...)` form is captured by
|
||||
/// [`collect_safe_lookup_fields`].
|
||||
pub fn collect_class_constant_scalars(
|
||||
root: Node<'_>,
|
||||
lang: &str,
|
||||
code: &[u8],
|
||||
) -> HashMap<String, String> {
|
||||
let mut out: HashMap<String, String> = HashMap::new();
|
||||
match lang {
|
||||
"java" => collect_java_constant_scalars(root, code, &mut out),
|
||||
"python" => collect_python_constant_scalars(root, code, &mut out),
|
||||
"go" => collect_go_constant_scalars(root, code, &mut out),
|
||||
"rust" => collect_rust_constant_scalars(root, code, &mut out),
|
||||
_ => {}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn collect_java_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
walk(root, &mut |node| {
|
||||
if node.kind() != "field_declaration" {
|
||||
return;
|
||||
}
|
||||
if !has_static_modifier(node) || !has_final_modifier(node) {
|
||||
return;
|
||||
}
|
||||
// A single `field_declaration` may carry multiple
|
||||
// `variable_declarator` children (`static final int A = 1, B = 2;`).
|
||||
// Iterate every declarator field; tree-sitter exposes them under
|
||||
// the `declarator` field name as repeated entries.
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children_by_field_name("declarator", &mut cursor) {
|
||||
let Some(name_node) = child.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
let Some(field_name) = text_of(name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(value_node) = child.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = scalar_literal_text(value_node, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(field_name, literal);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/// Python: module-level `NAME = LITERAL` assignments. Only top-level
|
||||
/// expression statements are considered; assignments inside function bodies,
|
||||
/// class bodies, or other blocks are out of scope (a per-function SSA pass
|
||||
/// already sees those).
|
||||
fn collect_python_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
if root.kind() != "module" {
|
||||
return;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.named_children(&mut cursor) {
|
||||
if child.kind() != "expression_statement" {
|
||||
continue;
|
||||
}
|
||||
let Some(assign) = child.named_child(0) else {
|
||||
continue;
|
||||
};
|
||||
if assign.kind() != "assignment" {
|
||||
continue;
|
||||
}
|
||||
let Some(target) = assign.child_by_field_name("left") else {
|
||||
continue;
|
||||
};
|
||||
if target.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = text_of(target, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(value) = assign.child_by_field_name("right") else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = python_scalar_literal_text(value, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(name, literal);
|
||||
}
|
||||
}
|
||||
|
||||
/// Go: package-level `const NAME = LITERAL` and `const NAME TYPE = LITERAL`,
|
||||
/// including the grouped `const (...)` form. Iterates direct
|
||||
/// `const_declaration` children of the source file, then per-`const_spec`
|
||||
/// reads the `name` list and `value` expression list, binding by position.
|
||||
fn collect_go_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
if root.kind() != "source_file" {
|
||||
return;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.named_children(&mut cursor) {
|
||||
if child.kind() != "const_declaration" {
|
||||
continue;
|
||||
}
|
||||
let mut spec_cursor = child.walk();
|
||||
for spec in child.named_children(&mut spec_cursor) {
|
||||
if spec.kind() != "const_spec" {
|
||||
continue;
|
||||
}
|
||||
collect_go_const_spec(spec, code, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_go_const_spec(spec: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
// tree-sitter-go `const_spec`:
|
||||
// name: <identifier> (repeated) — one or more identifiers
|
||||
// value: <expression_list> — list of value expressions
|
||||
// For a multi-target spec `const A, B = 1, 2`, identifiers and values pair
|
||||
// up positionally. The simpler single-target form parses the same way
|
||||
// with one entry per side.
|
||||
let mut name_cursor = spec.walk();
|
||||
let names: Vec<Node<'_>> = spec
|
||||
.children_by_field_name("name", &mut name_cursor)
|
||||
.collect();
|
||||
if names.is_empty() {
|
||||
return;
|
||||
}
|
||||
let Some(value_list) = spec.child_by_field_name("value") else {
|
||||
return;
|
||||
};
|
||||
let mut value_cursor = value_list.walk();
|
||||
let values: Vec<Node<'_>> = value_list.named_children(&mut value_cursor).collect();
|
||||
if values.len() != names.len() {
|
||||
return;
|
||||
}
|
||||
for (name_node, value_node) in names.iter().zip(values.iter()) {
|
||||
if name_node.kind() != "identifier" {
|
||||
continue;
|
||||
}
|
||||
let Some(name) = text_of(*name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = go_scalar_literal_text(*value_node, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(name, literal);
|
||||
}
|
||||
}
|
||||
|
||||
/// Rust: module-level `const NAME: TYPE = LITERAL;` and `static NAME: TYPE =
|
||||
/// LITERAL;`. Only direct children of `source_file` participate so a `const`
|
||||
/// defined inside a function body does not bleed across scopes.
|
||||
fn collect_rust_constant_scalars(root: Node<'_>, code: &[u8], out: &mut HashMap<String, String>) {
|
||||
if root.kind() != "source_file" {
|
||||
return;
|
||||
}
|
||||
let mut cursor = root.walk();
|
||||
for child in root.named_children(&mut cursor) {
|
||||
if !matches!(child.kind(), "const_item" | "static_item") {
|
||||
continue;
|
||||
}
|
||||
let Some(name_node) = child.child_by_field_name("name") else {
|
||||
continue;
|
||||
};
|
||||
let Some(name) = text_of(name_node, code) else {
|
||||
continue;
|
||||
};
|
||||
let Some(value_node) = child.child_by_field_name("value") else {
|
||||
continue;
|
||||
};
|
||||
let Some(literal) = rust_scalar_literal_text(value_node, code) else {
|
||||
continue;
|
||||
};
|
||||
out.insert(name, literal);
|
||||
}
|
||||
}
|
||||
|
||||
/// `true` when `field_declaration` carries a `static` modifier.
|
||||
fn has_static_modifier(field_decl: Node<'_>) -> bool {
|
||||
let mut cursor = field_decl.walk();
|
||||
for child in field_decl.children(&mut cursor) {
|
||||
if child.kind() != "modifiers" {
|
||||
continue;
|
||||
}
|
||||
let mut sub = child.walk();
|
||||
for mod_child in child.children(&mut sub) {
|
||||
if mod_child.kind() == "static" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Return the source text when `value` is a primitive scalar literal node.
|
||||
/// Covers the Java grammar's literal kinds. Returns `None` for compound
|
||||
/// expressions, identifier references, method invocations, and other
|
||||
/// non-literal initializers.
|
||||
fn scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"string_literal"
|
||||
| "decimal_integer_literal"
|
||||
| "hex_integer_literal"
|
||||
| "octal_integer_literal"
|
||||
| "binary_integer_literal"
|
||||
| "decimal_floating_point_literal"
|
||||
| "hex_floating_point_literal"
|
||||
| "character_literal"
|
||||
| "true"
|
||||
| "false"
|
||||
| "null_literal" => text_of(value, code),
|
||||
// Unary `-1`, `+0`, `!true` over a literal child still resolve to a
|
||||
// compile-time constant; recurse into the operand.
|
||||
"unary_expression" => {
|
||||
let operand = value.child_by_field_name("operand")?;
|
||||
scalar_literal_text(operand, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Python scalar literal classifier. Rejects f-strings with interpolation
|
||||
/// (`f"x{var}"` parses as `string` with an `interpolation` child); returns
|
||||
/// the source text otherwise.
|
||||
fn python_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"string" => {
|
||||
if python_string_has_interpolation(value) {
|
||||
None
|
||||
} else {
|
||||
text_of(value, code)
|
||||
}
|
||||
}
|
||||
"integer" | "float" | "true" | "false" | "none" => text_of(value, code),
|
||||
"unary_operator" => {
|
||||
let operand = value.child_by_field_name("argument")?;
|
||||
python_scalar_literal_text(operand, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn python_string_has_interpolation(node: Node<'_>) -> bool {
|
||||
let mut cursor = node.walk();
|
||||
for child in node.children(&mut cursor) {
|
||||
if child.kind() == "interpolation" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Go scalar literal classifier. `interpreted_string_literal` and
|
||||
/// `raw_string_literal` cover both `"x"` and `` `x` `` forms.
|
||||
fn go_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"interpreted_string_literal"
|
||||
| "raw_string_literal"
|
||||
| "int_literal"
|
||||
| "float_literal"
|
||||
| "imaginary_literal"
|
||||
| "rune_literal"
|
||||
| "true"
|
||||
| "false"
|
||||
| "nil" => text_of(value, code),
|
||||
"unary_expression" => {
|
||||
let operand = value.child_by_field_name("operand")?;
|
||||
go_scalar_literal_text(operand, code)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Rust scalar literal classifier. Accepts `string_literal`, `raw_string_literal`
|
||||
/// (both unwrappable to a single text run), integer / float / boolean / char.
|
||||
fn rust_scalar_literal_text(value: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
match value.kind() {
|
||||
"string_literal" | "raw_string_literal" | "integer_literal" | "float_literal"
|
||||
| "char_literal" | "boolean_literal" => text_of(value, code),
|
||||
// `true` / `false` are leaf identifier-ish nodes in some grammars but
|
||||
// tree-sitter-rust gives them the `boolean_literal` kind; defensively
|
||||
// accept the leaf form too in case the grammar is upgraded.
|
||||
"true" | "false" => text_of(value, code),
|
||||
"unary_expression" => {
|
||||
let mut cursor = value.walk();
|
||||
value
|
||||
.named_children(&mut cursor)
|
||||
.find_map(|c| rust_scalar_literal_text(c, code))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn collect_java(root: Node<'_>, code: &[u8], out: &mut HashMap<String, Vec<String>>) {
|
||||
walk(root, &mut |node| {
|
||||
if node.kind() != "field_declaration" {
|
||||
return;
|
||||
}
|
||||
if !has_final_modifier(node) {
|
||||
return;
|
||||
}
|
||||
let Some(decl) = node.child_by_field_name("declarator") else {
|
||||
return;
|
||||
};
|
||||
let Some(name_node) = decl.child_by_field_name("name") else {
|
||||
return;
|
||||
};
|
||||
let Some(field_name) = text_of(name_node, code) else {
|
||||
return;
|
||||
};
|
||||
let Some(value_node) = decl.child_by_field_name("value") else {
|
||||
return;
|
||||
};
|
||||
let Some(values) = extract_map_of_literal_values(value_node, code) else {
|
||||
return;
|
||||
};
|
||||
out.insert(field_name, values);
|
||||
});
|
||||
}
|
||||
|
||||
/// `true` when `field_declaration` carries a `final` modifier (static or
|
||||
/// instance — both block reassignment after construction).
|
||||
fn has_final_modifier(field_decl: Node<'_>) -> bool {
|
||||
let mut cursor = field_decl.walk();
|
||||
for child in field_decl.children(&mut cursor) {
|
||||
if child.kind() != "modifiers" {
|
||||
continue;
|
||||
}
|
||||
let mut sub = child.walk();
|
||||
for mod_child in child.children(&mut sub) {
|
||||
if mod_child.kind() == "final" {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// If `value_node` is `Map.of(LIT, LIT, LIT, LIT, ...)` with at least one
|
||||
/// key/value pair and every argument a `string_literal`, return the
|
||||
/// value-position literals (positions 1, 3, 5, ...).
|
||||
fn extract_map_of_literal_values(value_node: Node<'_>, code: &[u8]) -> Option<Vec<String>> {
|
||||
if value_node.kind() != "method_invocation" {
|
||||
return None;
|
||||
}
|
||||
let object_node = value_node.child_by_field_name("object")?;
|
||||
let method_node = value_node.child_by_field_name("name")?;
|
||||
let method_text = text_of(method_node, code)?;
|
||||
if method_text != "of" {
|
||||
return None;
|
||||
}
|
||||
if !receiver_is_map_class(object_node, code) {
|
||||
return None;
|
||||
}
|
||||
let args_node = value_node.child_by_field_name("arguments")?;
|
||||
let mut cursor = args_node.walk();
|
||||
let args: Vec<Node<'_>> = args_node.named_children(&mut cursor).collect();
|
||||
if args.is_empty() || !args.len().is_multiple_of(2) {
|
||||
return None;
|
||||
}
|
||||
let mut values = Vec::with_capacity(args.len() / 2);
|
||||
for (i, arg) in args.iter().enumerate() {
|
||||
if arg.kind() != "string_literal" {
|
||||
return None;
|
||||
}
|
||||
if i % 2 == 1 {
|
||||
let literal = string_literal_value(*arg, code)?;
|
||||
values.push(literal);
|
||||
}
|
||||
}
|
||||
Some(values)
|
||||
}
|
||||
|
||||
/// `true` when `node` resolves to the `Map` class — either the bare
|
||||
/// identifier `Map` or a `field_access` whose tail segment is `Map`
|
||||
/// (covers `java.util.Map.of(...)`).
|
||||
fn receiver_is_map_class(node: Node<'_>, code: &[u8]) -> bool {
|
||||
match node.kind() {
|
||||
"identifier" => text_of(node, code).as_deref() == Some("Map"),
|
||||
"field_access" => {
|
||||
// tail segment lives on the `field` field
|
||||
let Some(field) = node.child_by_field_name("field") else {
|
||||
return false;
|
||||
};
|
||||
text_of(field, code).as_deref() == Some("Map")
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract the inner content of a Java `string_literal` node. The
|
||||
/// grammar wraps the value in `string_fragment` children between quote
|
||||
/// tokens; concatenate every `string_fragment` so escaped quotes inside
|
||||
/// the literal are not lost. Returns `None` for literals containing
|
||||
/// interpolation / escape-sequence children that do not classify as a
|
||||
/// pure string fragment.
|
||||
fn string_literal_value(node: Node<'_>, code: &[u8]) -> Option<String> {
|
||||
let mut cursor = node.walk();
|
||||
let mut out = String::new();
|
||||
let mut saw_fragment = false;
|
||||
for child in node.named_children(&mut cursor) {
|
||||
match child.kind() {
|
||||
"string_fragment" => {
|
||||
saw_fragment = true;
|
||||
out.push_str(&text_of(child, code)?);
|
||||
}
|
||||
"escape_sequence" => {
|
||||
// A real escape sequence keeps the literal pure-string but
|
||||
// we cannot trivially decode it; return None to be
|
||||
// conservative on header-injection safety.
|
||||
return None;
|
||||
}
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
if saw_fragment {
|
||||
Some(out)
|
||||
} else {
|
||||
// Empty literal `""` — has no `string_fragment` children but is
|
||||
// a valid empty string.
|
||||
let raw = text_of(node, code)?;
|
||||
if raw == "\"\"" {
|
||||
Some(String::new())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn walk<'a, F: FnMut(Node<'a>)>(node: Node<'a>, f: &mut F) {
|
||||
f(node);
|
||||
let mut cursor = node.walk();
|
||||
for child in node.named_children(&mut cursor) {
|
||||
walk(child, f);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
fn collect(src: &str) -> HashMap<String, Vec<String>> {
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_java::LANGUAGE.into()).unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
collect_safe_lookup_fields(tree.root_node(), "java", src.as_bytes())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn static_final_map_of_two_pairs() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of(
|
||||
"a", "x", "b", "y"
|
||||
);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert_eq!(out.get("T"), Some(&vec!["x".to_string(), "y".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn instance_final_map_of_one_pair() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private final java.util.Map<String, String> T = Map.of("a", "x");
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert_eq!(out.get("T"), Some(&vec!["x".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_final_field() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static java.util.Map<String, String> T = Map.of("a", "x");
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_non_literal_value() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final String SAFE = "x";
|
||||
private static final java.util.Map<String, String> T = Map.of("a", SAFE);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
// SAFE is an identifier, not a string_literal — even though const-
|
||||
// foldable, the syntactic check rejects to stay simple.
|
||||
assert!(!out.contains_key("T"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_odd_arg_count() {
|
||||
// Compiler would reject this too, but the extractor must not panic.
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of("a", "x", "b");
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_map_of() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of();
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fully_qualified_map_of() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = java.util.Map.of(
|
||||
"a", "x", "b", "y"
|
||||
);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
assert_eq!(out.get("T"), Some(&vec!["x".to_string(), "y".to_string()]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_escape_sequence_value() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final java.util.Map<String, String> T = Map.of(
|
||||
"a", "with\nnewline"
|
||||
);
|
||||
}
|
||||
"#;
|
||||
let out = collect(src);
|
||||
// `\n` would smuggle a CRLF-style metachar through the static
|
||||
// gate; conservative reject keeps header-injection suppression
|
||||
// honest.
|
||||
assert!(!out.contains_key("T"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignores_non_java_lang() {
|
||||
let src = "const x = 1;";
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_javascript::LANGUAGE.into())
|
||||
.unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
let out = collect_safe_lookup_fields(tree.root_node(), "javascript", src.as_bytes());
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
fn collect_consts(src: &str) -> HashMap<String, String> {
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_java::LANGUAGE.into()).unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
collect_class_constant_scalars(tree.root_node(), "java", src.as_bytes())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_string_int_bool() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final String DRIVER = "com.mysql.cj.jdbc.Driver";
|
||||
public static final int LIMIT = 100;
|
||||
static final boolean DEBUG = false;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert_eq!(
|
||||
out.get("DRIVER"),
|
||||
Some(&"\"com.mysql.cj.jdbc.Driver\"".to_string())
|
||||
);
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("DEBUG"), Some(&"false".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_multi_declarator() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final int A = 1, B = 2, C2 = 3;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert_eq!(out.get("A"), Some(&"1".to_string()));
|
||||
assert_eq!(out.get("B"), Some(&"2".to_string()));
|
||||
assert_eq!(out.get("C2"), Some(&"3".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_unary_negation() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final int OFFSET = -1;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
// text_of returns the operand text, not the wrapper text.
|
||||
assert_eq!(out.get("OFFSET"), Some(&"1".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_reject_non_static() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private final String NAME = "x";
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert!(!out.contains_key("NAME"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_reject_non_final() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static String NAME = "x";
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert!(!out.contains_key("NAME"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_reject_identifier_value() {
|
||||
let src = r#"
|
||||
class C {
|
||||
private static final String OTHER = computed();
|
||||
private static final String COPY = OTHER;
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert!(!out.contains_key("OTHER"));
|
||||
assert!(!out.contains_key("COPY"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_capture_inside_inner_class() {
|
||||
let src = r#"
|
||||
class Outer {
|
||||
static class Inner {
|
||||
private static final String DRIVER = "x";
|
||||
}
|
||||
}
|
||||
"#;
|
||||
let out = collect_consts(src);
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"x\"".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_constants_ignore_non_supported_lang() {
|
||||
let src = "const x = 1;";
|
||||
let mut p = Parser::new();
|
||||
p.set_language(&tree_sitter_javascript::LANGUAGE.into())
|
||||
.unwrap();
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
let out = collect_class_constant_scalars(tree.root_node(), "javascript", src.as_bytes());
|
||||
assert!(out.is_empty());
|
||||
}
|
||||
|
||||
fn collect_consts_lang(src: &str, lang: &str) -> HashMap<String, String> {
|
||||
let mut p = Parser::new();
|
||||
match lang {
|
||||
"python" => p
|
||||
.set_language(&tree_sitter_python::LANGUAGE.into())
|
||||
.unwrap(),
|
||||
"go" => p.set_language(&tree_sitter_go::LANGUAGE.into()).unwrap(),
|
||||
"rust" => p.set_language(&tree_sitter_rust::LANGUAGE.into()).unwrap(),
|
||||
_ => unreachable!("unsupported lang in test helper: {lang}"),
|
||||
};
|
||||
let tree = p.parse(src, None).unwrap();
|
||||
collect_class_constant_scalars(tree.root_node(), lang, src.as_bytes())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_capture_scalars() {
|
||||
let src = "DRIVER = \"sqlite3\"\nLIMIT = 100\nDEBUG = False\nNAME = None\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"sqlite3\"".to_string()));
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("DEBUG"), Some(&"False".to_string()));
|
||||
assert_eq!(out.get("NAME"), Some(&"None".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_capture_unary_negation() {
|
||||
// The recogniser recurses into the operand and returns its text, so
|
||||
// `OFFSET = -1` stores `"1"`. The downstream suppression consumer
|
||||
// only cares about name binding, not the decoded numeric value.
|
||||
let src = "OFFSET = -1\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert_eq!(out.get("OFFSET"), Some(&"1".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_reject_fstring_with_interpolation() {
|
||||
let src = "import os\nVAR = f\"hi {os.getcwd()}\"\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert!(!out.contains_key("VAR"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_reject_call_value() {
|
||||
let src = "from os import getcwd\nPATH = getcwd()\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert!(!out.contains_key("PATH"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_module_constants_skip_inside_function_body() {
|
||||
// An assignment inside a function body is per-function SSA's job.
|
||||
// Only top-level module assignments should land in the map.
|
||||
let src = "def f():\n INNER = \"x\"\n return INNER\n";
|
||||
let out = collect_consts_lang(src, "python");
|
||||
assert!(!out.contains_key("INNER"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_capture_scalars() {
|
||||
let src =
|
||||
"package main\nconst DRIVER = \"postgres\"\nconst LIMIT = 100\nconst FLAG = true\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"postgres\"".to_string()));
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("FLAG"), Some(&"true".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_capture_grouped_const_block() {
|
||||
let src = "package main\nconst (\n A = \"x\"\n B int = 42\n C = false\n)\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert_eq!(out.get("A"), Some(&"\"x\"".to_string()));
|
||||
assert_eq!(out.get("B"), Some(&"42".to_string()));
|
||||
assert_eq!(out.get("C"), Some(&"false".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_reject_non_literal() {
|
||||
let src = "package main\nconst OTHER = foo()\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert!(!out.contains_key("OTHER"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn go_package_constants_skip_inside_function_body() {
|
||||
// `const` inside a function body is per-function SSA's territory.
|
||||
let src = "package main\nfunc f() string { const INNER = \"x\"; return INNER }\n";
|
||||
let out = collect_consts_lang(src, "go");
|
||||
assert!(!out.contains_key("INNER"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module_consts_capture_scalars() {
|
||||
let src = "const DRIVER: &str = \"sqlite\";\nconst LIMIT: i32 = 100;\nstatic FLAG: bool = false;\n";
|
||||
let out = collect_consts_lang(src, "rust");
|
||||
assert_eq!(out.get("DRIVER"), Some(&"\"sqlite\"".to_string()));
|
||||
assert_eq!(out.get("LIMIT"), Some(&"100".to_string()));
|
||||
assert_eq!(out.get("FLAG"), Some(&"false".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module_consts_reject_non_literal() {
|
||||
let src = "const VAL: i32 = some_func();\n";
|
||||
let out = collect_consts_lang(src, "rust");
|
||||
assert!(!out.contains_key("VAL"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rust_module_consts_skip_inside_function_body() {
|
||||
let src = "fn f() -> &'static str { const INNER: &str = \"x\"; INNER }\n";
|
||||
let out = collect_consts_lang(src, "rust");
|
||||
assert!(!out.contains_key("INNER"));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue