nyx/src/cfg/literals.rs

use super::conditions::unwrap_parens;
use super::helpers::{collect_array_pattern_bindings_indexed, collect_rhs_array_literal_elements};
use super::{
    anon_fn_name, collect_idents, collect_idents_with_paths, find_constructor_type_child,
    first_call_ident, root_receiver_text, text_of,
};
use crate::labels::{Cap, Kind, lookup};
use smallvec::SmallVec;
use tree_sitter::Node;

/// Find the inner CallFn/CallMethod/CallMacro node within an AST node.
/// For direct call nodes, returns the node itself. For wrappers, searches
/// up to two levels of children, transparently descending through
/// `await_expression` / `yield_expression` (`Kind::AwaitForward`) wrappers
/// so `const x = await foo(y)` reaches the inner `call_expression` at
/// effective depth 3 (`lexical_declaration > variable_declarator >
/// await_expression > call_expression`).
pub(super) fn find_call_node<'a>(n: Node<'a>, lang: &str) -> Option<Node<'a>> {
    match lookup(lang, n.kind()) {
        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
        Kind::AwaitForward => {
            // Transparent wrapper: descend into the awaited expression.
            let mut cursor = n.walk();
            for c in n.children(&mut cursor) {
                if let Some(found) = find_call_node(c, lang) {
                    return Some(found);
                }
            }
            None
        }
        _ => {
            let mut cursor = n.walk();
            for c in n.children(&mut cursor) {
                match lookup(lang, c.kind()) {
                    Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(c),
                    // Skip past await/yield wrappers without consuming a
                    // recursion level — the wrapper itself is transparent.
                    Kind::AwaitForward => {
                        if let Some(found) = find_call_node(c, lang) {
                            return Some(found);
                        }
                    }
                    _ => {}
                }
            }
            // Recurse one more level (handles `expression_statement > variable_declarator > call`)
            let mut cursor2 = n.walk();
            for c in n.children(&mut cursor2) {
                let mut cursor3 = c.walk();
                for gc in c.children(&mut cursor3) {
                    match lookup(lang, gc.kind()) {
                        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => return Some(gc),
                        Kind::AwaitForward => {
                            if let Some(found) = find_call_node(gc, lang) {
                                return Some(found);
                            }
                        }
                        _ => {}
                    }
                }
            }
            None
        }
    }
}

/// Extract `(field_name, ident_name)` pairs from specified fields of an
/// object-literal argument.
///
/// Returns:
/// * `Some(pairs)` if the positional argument at `index` IS an object literal
///   (JS `object`, TS `object`, Python `dictionary`). Each pair is
///   `(field_name, ident_name)` where `field_name` is the matched key from
///   `fields` and `ident_name` is an identifier lifted from that pair's
///   value expression. When no destination-field pairs are present, returns
///   `Some(vec![])`, the sink is effectively silenced because no destination
///   identifier exists.
/// * `None` if the arg is absent, is not an object literal (plain string
///   / ident / expression), or has splat/spread children that break static
///   per-field reasoning. Callers fall back to the whole-arg positional
///   filter in this case.
pub(super) fn extract_destination_field_pairs(
    call_node: Node,
    arg_index: usize,
    fields: &[&str],
    code: &[u8],
) -> Option<Vec<(String, String)>> {
    if fields.is_empty() {
        return None;
    }
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let arg = args.named_children(&mut cursor).nth(arg_index)?;

    // Only object / dict literal forms carry per-field destination semantics.
    // For anything else (identifier, member expression, string, call), return
    // None so the caller treats the whole arg as destination.
    if !matches!(arg.kind(), "object" | "dictionary") {
        return None;
    }

    let mut out: Vec<(String, String)> = Vec::new();
    let mut c = arg.walk();
    for child in arg.named_children(&mut c) {
        match child.kind() {
            // `spread_element` (JS/TS) / `dictionary_splat` (Python): we can't
            // statically attribute spread contents to specific fields, so
            // bail out, caller falls back to the whole-arg filter, matching
            // the conservative posture used by arg_uses for splats.
            "spread_element" | "dictionary_splat" => {
                return None;
            }
            // Shorthand property `{ url }` binds the `url` field to a binding
            // also named `url`. Treat as destination iff the name matches.
            "shorthand_property_identifier" | "shorthand_property_identifier_pattern" => {
                let Some(name) = text_of(child, code) else {
                    continue;
                };
                if fields.iter().any(|&f| f == name) && !out.iter().any(|(_, v)| v == &name) {
                    out.push((name.clone(), name));
                }
            }
            "pair" => {
                let Some(key_node) = child.child_by_field_name("key") else {
                    continue;
                };
                let key_text = match key_node.kind() {
                    // Strip quotes from string-literal keys so `"url"` and `url`
                    // both match the configured field list.
                    "string" | "string_literal" => text_of(key_node, code).map(|raw| {
                        if raw.len() >= 2 {
                            raw[1..raw.len() - 1].to_string()
                        } else {
                            raw
                        }
                    }),
                    // Computed keys: resolve only when the inner expression
                    // is a pure string literal (`['url']`).  Dynamic forms
                    // (`[someVar]`, `[`url-${i}`]`, ``[`url`]`` with
                    // interpolation) stay conservative-skip.
                    "computed_property_name" => {
                        let mut inner_cursor = key_node.walk();
                        let inner = key_node.named_children(&mut inner_cursor).find(|c| {
                            !matches!(c.kind(), "comment" | "block_comment" | "line_comment")
                        });
                        match inner.map(|n| (n.kind(), n)) {
                            Some(("string" | "string_literal", n)) => text_of(n, code).map(|raw| {
                                if raw.len() >= 2 {
                                    raw[1..raw.len() - 1].to_string()
                                } else {
                                    raw
                                }
                            }),
                            // Template strings only when no interpolation
                            // (no `template_substitution` children).
                            Some(("template_string", n))
                                if {
                                    let mut tc = n.walk();
                                    !n.named_children(&mut tc)
                                        .any(|c| c.kind() == "template_substitution")
                                } =>
                            {
                                text_of(n, code).map(|raw| {
                                    if raw.len() >= 2 {
                                        raw[1..raw.len() - 1].to_string()
                                    } else {
                                        raw
                                    }
                                })
                            }
                            _ => continue,
                        }
                    }
                    _ => text_of(key_node, code),
                };
                let Some(key) = key_text else {
                    continue;
                };
                if !fields.iter().any(|&f| f == key) {
                    continue;
                }
                let Some(val_node) = child.child_by_field_name("value") else {
                    continue;
                };
                let mut idents: Vec<String> = Vec::new();
                let mut paths: Vec<String> = Vec::new();
                collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
                for name in paths.into_iter().chain(idents) {
                    if !out.iter().any(|(_, v)| v == &name) {
                        out.push((key.clone(), name));
                    }
                }
            }
            _ => {}
        }
    }
    Some(out)
}

/// Extract `(field_name, ident_name)` pairs from `keyword_argument` /
/// `named_argument` children of a call whose keyword name matches one of
/// `fields`.  Used for languages where destination-bearing fields are passed
/// as direct kwargs rather than wrapped in a dict literal, e.g. Python
/// `requests.post(url, data=tainted, json=safe)` where `data` and `json` are
/// `keyword_argument` siblings of the positional URL.
///
/// Also covers Ruby, where tree-sitter-ruby emits `pair` nodes (with
/// `key`/`value` fields) directly under `argument_list` for the
/// `Faraday.new(url: x)` / `Net::HTTP.start(host, port, proxy_addr: prx)`
/// kwarg shape.  The `key` is typically a `hash_key_symbol` whose text is the
/// bare identifier (`url`); `simple_symbol` (`:url`) and string keys are
/// normalised by stripping a leading `:` or wrapping quotes.
///
/// Returns the union of matching kwargs, preserving the kwarg name in the
/// `field` slot so callers can still attribute findings per-field.  Empty
/// when no matching kwargs exist or the call has no `arguments` field.
pub(super) fn extract_destination_kwarg_pairs(
    call_node: Node,
    fields: &[&str],
    code: &[u8],
) -> Vec<(String, String)> {
    if fields.is_empty() {
        return Vec::new();
    }
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut out: Vec<(String, String)> = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        let (name_node, value_node) = if kind == "keyword_argument" || kind == "named_argument" {
            let named_count = child.named_child_count();
            (
                child
                    .child_by_field_name("name")
                    .or_else(|| child.named_child(0)),
                child
                    .child_by_field_name("value")
                    .or_else(|| child.named_child(named_count.saturating_sub(1) as u32)),
            )
        } else if kind == "pair" {
            // Ruby `pair` node sits directly under `argument_list` for
            // kwarg-style call args (`f(url: x)`).  `key`/`value` fields
            // are populated; key text is `hash_key_symbol` ("url"),
            // `simple_symbol` (":url"), or a string literal.
            (
                child.child_by_field_name("key"),
                child.child_by_field_name("value"),
            )
        } else {
            continue;
        };
        let (Some(nn), Some(vn)) = (name_node, value_node) else {
            continue;
        };
        let Some(name_raw) = text_of(nn, code) else {
            continue;
        };
        let name = name_raw
            .trim_start_matches(':')
            .trim_matches(['"', '\''])
            .to_string();
        if !fields.iter().any(|&f| f == name) {
            continue;
        }
        let mut idents = Vec::new();
        let mut paths = Vec::new();
        collect_idents_with_paths(vn, code, &mut idents, &mut paths);
        for ident in paths.into_iter().chain(idents) {
            if !out.iter().any(|(_, v)| v == &ident) {
                out.push((name.clone(), ident));
            }
        }
    }
    out
}

/// Extract the string-literal content at argument position `index` (0-based).
/// Returns `None` if the argument is not a string literal or the index is out of range.
/// True when `call_node` is `Object.create(null)` (or its parenthesised /
/// awaited / type-cast wrappers).  Strict literal-`null` first-arg match,
/// no aliasing through intermediate variables.  Caller restricts to JS/TS.
pub(super) fn is_object_create_null_call(call_node: Node, code: &[u8]) -> bool {
    if !matches!(call_node.kind(), "call_expression") {
        return false;
    }
    let callee = call_node
        .child_by_field_name("function")
        .and_then(|f| text_of(f, code))
        .unwrap_or_default();
    if callee != "Object.create" {
        return false;
    }
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    let named: Vec<Node> = args.named_children(&mut cursor).collect();
    if named.len() != 1 {
        return false;
    }
    let mut arg = named[0];
    // Unwrap parens / await / TS type-assertions.
    for _ in 0..4 {
        match arg.kind() {
            "parenthesized_expression" => {
                if let Some(inner) = arg.named_child(0) {
                    arg = inner;
                    continue;
                }
            }
            "await_expression" => {
                if let Some(inner) = arg.child_by_field_name("argument") {
                    arg = inner;
                    continue;
                }
            }
            "as_expression" | "type_assertion" => {
                if let Some(inner) = arg.named_child(0) {
                    arg = inner;
                    continue;
                }
            }
            _ => break,
        }
    }
    arg.kind() == "null" || text_of(arg, code).as_deref() == Some("null")
}

pub(super) fn extract_const_string_arg(
    call_node: Node,
    index: usize,
    code: &[u8],
) -> Option<String> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let mut arg = args.named_children(&mut cursor).nth(index)?;
    // PHP / Go wrap each positional argument in an `argument` node; unwrap so
    // the kind-match below sees the inner literal.
    if arg.kind() == "argument" && arg.named_child_count() == 1 {
        if let Some(inner) = arg.named_child(0) {
            arg = inner;
        }
    }
    match arg.kind() {
        // `string` / `string_literal` cover JS/TS, Python, Java, PHP, C/C++, Ruby, Rust;
        // `interpreted_string_literal` / `raw_string_literal` cover Go's
        // tree-sitter grammar (double-quoted vs. backtick-quoted forms).
        "string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
            let raw = text_of(arg, code)?;
            if raw.len() >= 2 {
                Some(raw[1..raw.len() - 1].to_string())
            } else {
                None
            }
        }
        // Boolean literals — JS/TS `true`/`false` are their own node kinds; some
        // grammars wrap them as identifiers carrying the keyword text.  Returned
        // verbatim so `dangerous_values` matching can detect deep-flag forms
        // like `extend(true, target, src)`.
        "true" | "false" => Some(arg.kind().to_string()),
        // PHP double-quoted strings parse as `encapsed_string` whose body is
        // a sequence of `string_content` / `escape_sequence` / interpolation
        // nodes.  Treat the string as constant only when every child is a
        // pure-literal segment (no `variable_name` / `subscript_expression`
        // interpolations); the returned value is the concatenation of the
        // literal segments verbatim.
        "encapsed_string" => {
            let mut c = arg.walk();
            let mut buf = String::new();
            for ch in arg.named_children(&mut c) {
                match ch.kind() {
                    "string_content" => {
                        if let Some(s) = text_of(ch, code) {
                            buf.push_str(&s);
                        }
                    }
                    "escape_sequence" => {
                        if let Some(s) = text_of(ch, code) {
                            buf.push_str(&s);
                        }
                    }
                    _ => return None,
                }
            }
            Some(buf)
        }
        "template_string" => {
            // Only treat as constant if no interpolation (no template_substitution children)
            let mut c = arg.walk();
            if arg
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
            {
                return None; // dynamic
            }
            let raw = text_of(arg, code)?;
            if raw.len() >= 2 {
                Some(raw[1..raw.len() - 1].to_string())
            } else {
                None
            }
        }
        // Concat-style binary expression with a leading string literal, e.g.
        // PHP `"Location: " . $url`, JS/TS `"Location: " + url`.  Returns the
        // left-most literal so prefix-driven gates (`dangerous_prefixes`) can
        // activate on partially-dynamic concatenations; falls through to
        // `None` when the leading segment is not a string literal so
        // exact-`dangerous_values` matching keeps its strict semantics.
        "binary_expression" => {
            let left = arg.child_by_field_name("left")?;
            match left.kind() {
                "string"
                | "string_literal"
                | "interpreted_string_literal"
                | "raw_string_literal" => {
                    let raw = text_of(left, code)?;
                    if raw.len() >= 2 {
                        Some(raw[1..raw.len() - 1].to_string())
                    } else {
                        None
                    }
                }
                "encapsed_string" => {
                    let mut c = left.walk();
                    let mut buf = String::new();
                    for ch in left.named_children(&mut c) {
                        match ch.kind() {
                            "string_content" | "escape_sequence" => {
                                if let Some(s) = text_of(ch, code) {
                                    buf.push_str(&s);
                                }
                            }
                            _ => return None,
                        }
                    }
                    Some(buf)
                }
                _ => None,
            }
        }
        _ => None,
    }
}

/// Extract a macro-constant or `define`d identifier name at argument position
/// `index` (0-based).  Used for languages where activation values are
/// preprocessor symbols rather than string literals — currently C, C++, and
/// PHP define-constants like `CURLOPT_POSTFIELDS` whose syntactic form is an
/// `identifier` / `name` node, not a `string`.
///
/// Returns `None` for any non-identifier shape so dynamic-activation
/// semantics still apply when the activation arg is a runtime value
/// (variable, expression, function call).
pub(super) fn extract_const_macro_arg(
    call_node: Node,
    index: usize,
    code: &[u8],
) -> Option<String> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let mut arg = args.named_children(&mut cursor).nth(index)?;
    if arg.kind() == "argument" && arg.named_child_count() == 1 {
        if let Some(inner) = arg.named_child(0) {
            arg = inner;
        }
    }
    match arg.kind() {
        // C/C++ identifier / PHP `name` node for define-style constants.
        // Scoped C++ identifiers (`Curl::OPT_POSTFIELDS`) and PHP namespaced
        // names also surface here so the dangerous_values match catches them.
        "identifier" | "name" | "qualified_name" | "scoped_identifier" => text_of(arg, code),
        // Ruby bare constant (`NOENT`) — leaf form.
        "constant" => text_of(arg, code),
        // Ruby scope-qualified constant (`Nokogiri::XML::ParseOptions::NOENT`).
        // Return only the rightmost `name` segment so the gate's
        // `dangerous_values` list can stay identifier-bare instead of
        // enumerating every possible namespacing.  Falls back to the full
        // text if the `name` field is missing for any reason.
        "scope_resolution" => arg
            .child_by_field_name("name")
            .and_then(|n| text_of(n, code))
            .or_else(|| text_of(arg, code)),
        // Integer literals at the activation arg position.  PHP / C / C++
        // commonly use plain `0` to opt into the safe-default option set
        // (e.g. `simplexml_load_string($xml, "SimpleXMLElement", 0)`).  The
        // gate's `dangerous_values` list is identifier-only, so returning
        // the literal text lets the comparison fail against `LIBXML_NOENT`
        // and suppresses the conservative-fire branch.
        "integer" | "integer_literal" | "number_literal" | "decimal_integer_literal" => {
            text_of(arg, code)
        }
        _ => None,
    }
}

/// Extract the value of a keyword argument from a call node (e.g. Python `shell=True`).
/// Walks argument children looking for `keyword_argument` nodes, matches the keyword
/// name, and extracts the value node text for literals.
pub(super) fn extract_const_keyword_arg(
    call_node: Node,
    keyword_name: &str,
    code: &[u8],
) -> Option<String> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    for child in args.named_children(&mut cursor) {
        if child.kind() == "keyword_argument" || child.kind() == "named_argument" {
            // keyword_argument has a "name" field and a "value" field in Python tree-sitter
            let Some(name_node) = child.child_by_field_name("name") else {
                continue;
            };
            let Some(name_text) = text_of(name_node, code) else {
                continue;
            };
            if name_text != keyword_name {
                continue;
            }
            let value_node = child.child_by_field_name("value")?;
            // Only return a literal, identifiers / calls / complex exprs are
            // "dynamic" and must be reported as `None` so the gate can
            // distinguish literal-safe from dynamic.
            return match value_node.kind() {
                "true" | "false" | "none" | "integer" | "float" | "string" | "string_literal"
                | "identifier" => text_of(value_node, code),
                _ => None,
            }
            .filter(|_| {
                // identifiers are only "literal" when they're the Python
                // booleans True/False/None (tree-sitter-python classifies
                // these as identifiers in older grammar versions).
                match value_node.kind() {
                    "identifier" => text_of(value_node, code)
                        .as_deref()
                        .is_some_and(|s| matches!(s, "True" | "False" | "None")),
                    _ => true,
                }
            });
        }
    }
    None
}

/// Return `true` if the call node has a keyword/named argument whose name
/// matches `keyword_name` (regardless of whether the value is a literal).
/// Used by gated-sink classification to distinguish an absent kwarg (language
/// default) from a present-but-dynamic kwarg (conservative).
pub(super) fn has_keyword_arg(call_node: Node, keyword_name: &str, code: &[u8]) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    for child in args.named_children(&mut cursor) {
        if child.kind() != "keyword_argument" && child.kind() != "named_argument" {
            continue;
        }
        let Some(name_node) = child.child_by_field_name("name") else {
            continue;
        };
        if text_of(name_node, code).as_deref() == Some(keyword_name) {
            return true;
        }
    }
    false
}

/// Extract the literal value of a property `prop_name` from the object
/// literal at positional argument `arg_index`.  Returns `None` if the
/// arg is absent, is not an object literal, the prop key isn't found,
/// or the prop value isn't a literal (so callers can distinguish
/// "present but dynamic" from "absent" only via [`has_object_arg_property`]).
///
/// Used by JS/TS-style "options object as kwargs" gates — e.g.
/// `_.template(tpl, { evaluate: false })` — where the safe-flag lives
/// in an inline object literal rather than as a dedicated kwarg node
/// (which JS does not have).  Strict-additive: returns `None` for any
/// non-JS-object shape, including bare identifiers passed as the
/// options arg, so the gate falls back to the conservative dynamic
/// branch.
pub(super) fn extract_object_arg_property(
    call_node: Node,
    arg_index: usize,
    prop_name: &str,
    code: &[u8],
) -> Option<String> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let arg = args.named_children(&mut cursor).nth(arg_index)?;
    let arg = unwrap_parens(arg);
    if !matches!(arg.kind(), "object" | "dictionary") {
        return None;
    }
    let mut c = arg.walk();
    for child in arg.named_children(&mut c) {
        if child.kind() != "pair" {
            continue;
        }
        let Some(key_node) = child.child_by_field_name("key") else {
            continue;
        };
        let key_text = match key_node.kind() {
            "string" | "string_literal" => text_of(key_node, code).map(|raw| {
                if raw.len() >= 2 {
                    raw[1..raw.len() - 1].to_string()
                } else {
                    raw
                }
            }),
            "computed_property_name" => continue,
            _ => text_of(key_node, code),
        };
        if key_text.as_deref() != Some(prop_name) {
            continue;
        }
        let val_node = child.child_by_field_name("value")?;
        let val_node = unwrap_parens(val_node);
        return match val_node.kind() {
            "true" | "false" | "null" | "undefined" | "number" | "string" | "string_literal" => {
                text_of(val_node, code)
            }
            // JS booleans true/false are their own node kinds (above), but
            // some grammar versions wrap them as identifier literals; surface
            // `undefined` similarly.
            "identifier" => text_of(val_node, code)
                .filter(|s| matches!(s.as_str(), "true" | "false" | "null" | "undefined")),
            _ => None,
        };
    }
    None
}

/// Return `true` if the call node's positional arg at `arg_index` is an
/// object literal containing a property named `prop_name` (whether the
/// value is a literal or a dynamic expression).  Used alongside
/// [`extract_object_arg_property`] so gated-sink classification can
/// distinguish "options key absent" (language default) from "options
/// key present with dynamic value" (conservative dangerous).
pub(super) fn has_object_arg_property(
    call_node: Node,
    arg_index: usize,
    prop_name: &str,
    code: &[u8],
) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    let Some(arg) = args.named_children(&mut cursor).nth(arg_index) else {
        return false;
    };
    let arg = unwrap_parens(arg);
    if !matches!(arg.kind(), "object" | "dictionary") {
        return false;
    }
    let mut c = arg.walk();
    for child in arg.named_children(&mut c) {
        match child.kind() {
            "shorthand_property_identifier" | "shorthand_property_identifier_pattern"
                if text_of(child, code).as_deref() == Some(prop_name) =>
            {
                return true;
            }
            "pair" => {
                if let Some(key_node) = child.child_by_field_name("key") {
                    let key_text = match key_node.kind() {
                        "string" | "string_literal" => text_of(key_node, code).map(|raw| {
                            if raw.len() >= 2 {
                                raw[1..raw.len() - 1].to_string()
                            } else {
                                raw
                            }
                        }),
                        "computed_property_name" => continue,
                        _ => text_of(key_node, code),
                    };
                    if key_text.as_deref() == Some(prop_name) {
                        return true;
                    }
                }
            }
            _ => {}
        }
    }
    false
}

/// Inspect the first positional argument of a call node and return its
/// tree-sitter `kind()` plus a flag indicating whether any descendant is an
/// `interpolation` node.  Skips parenthesisation (`(arg0)` is treated as
/// `arg0`).  Returns `None` when the call has no arguments.
///
/// Used by per-language shape-aware sink suppression, for example, Ruby
/// ActiveRecord query methods (`where`, `order`, `pluck`, …) are intrinsically
/// parameterised when arg 0 is a hash/symbol/array/non-interpolated string,
/// regardless of taint reaching that argument.
pub(super) fn arg0_kind_and_interpolation(call_node: Node) -> Option<(String, bool)> {
    let args = call_node.child_by_field_name("arguments")?;
    let mut cursor = args.walk();
    let arg0 = args.named_children(&mut cursor).next()?;
    let arg0 = unwrap_parens(arg0);
    let kind = arg0.kind().to_string();
    let has_interp = subtree_has_interpolation(arg0);
    Some((kind, has_interp))
}

/// Walk a Java method-chain receiver looking for an inner `method_invocation`
/// whose method name matches one of `target_methods` (e.g. `createQuery`,
/// `prepareStatement`).  Returns the kind of that inner call's arg 0, used
/// to verify the SQL-bearing call up-chain was given a string literal rather
/// than a concatenation / method call.
///
/// Conservative: returns `None` when no matching call is found in the chain.
/// Stops drilling into args of an unrelated call, so the chain walk is
/// strictly down the receiver spine.
pub(super) fn java_chain_arg0_kind_for_method(
    expr: Node,
    target_methods: &[&str],
    code: &[u8],
) -> Option<String> {
    let n = unwrap_parens(expr);
    if n.kind() == "method_invocation"
        && let Some(name_node) = n.child_by_field_name("name")
        && let Some(name) = text_of(name_node, code)
        && target_methods.iter().any(|m| *m == name)
    {
        let args = n.child_by_field_name("arguments")?;
        let mut cursor = args.walk();
        let arg0 = args.named_children(&mut cursor).next()?;
        let arg0 = unwrap_parens(arg0);
        return Some(arg0.kind().to_string());
    }
    // Drill down the receiver spine.  Java grammar uses `object` for the
    // receiver of a `method_invocation`.
    if n.kind() == "method_invocation"
        && let Some(recv) = n.child_by_field_name("object")
        && let Some(found) = java_chain_arg0_kind_for_method(recv, target_methods, code)
    {
        return Some(found);
    }
    None
}

/// Walk a Ruby method-chain receiver-side looking for the inner call whose
/// method identifier matches one of `target_methods`, then return that
/// inner call's [`arg0_kind_and_interpolation`].  Used when the CFG node
/// represents a chained expression like `Model.where(...).preload(...).to_a`
///, the outermost call (`to_a`) has no arguments, so the shape suppressor
/// must reach down the chain to inspect `where`'s arg 0.
///
/// Conservative: returns `None` if the chain doesn't contain a matching
/// method, so callers fall through to the no-suppression path.
pub(super) fn ruby_chain_arg0_for_method(
    expr: Node,
    target_methods: &[&str],
    code: &[u8],
) -> Option<(String, bool)> {
    let n = unwrap_parens(expr);
    if n.kind() == "call"
        && let Some(method) = n.child_by_field_name("method")
        && let Some(name) = text_of(method, code)
        && target_methods.iter().any(|m| *m == name)
    {
        return arg0_kind_and_interpolation(n);
    }
    // Recurse into the receiver chain (`call.receiver` → next call up).
    if n.kind() == "call"
        && let Some(recv) = n
            .child_by_field_name("receiver")
            .or_else(|| n.child_by_field_name("object"))
        && let Some(found) = ruby_chain_arg0_for_method(recv, target_methods, code)
    {
        return Some(found);
    }
    // Also descend into named children to handle wrapping (assignment RHS,
    // begin-end blocks, parenthesised expressions, etc.).
    let mut cursor = n.walk();
    for c in n.named_children(&mut cursor) {
        if let Some(found) = ruby_chain_arg0_for_method(c, target_methods, code) {
            return Some(found);
        }
    }
    None
}

fn subtree_has_interpolation(n: Node) -> bool {
    if n.kind() == "interpolation" || n.kind() == "string_interpolation" {
        return true;
    }
    let mut cursor = n.walk();
    n.named_children(&mut cursor).any(subtree_has_interpolation)
}

/// Walk a JS/TS method-chain receiver-side to find an inner `call_expression`
/// whose member-property name matches one of `target_methods` (e.g. `query`,
/// `execute`).  Returns the `(kind, has_interp)` of that inner call's arg 0.
///
/// Used to recognise ORM-accessor chains where a labelled SQL sink sits on
/// the receiver side of a parameterised execute method:
/// `strapi.db.query('admin::api-token').findOne({...})`.  The outer call
/// (`findOne`) is the CFG node; the inner labelled `db.query` call carries
/// the literal model UID that proves the chain is parameterised.
///
/// Conservative: returns `None` when no matching inner call is found, so
/// callers fall through to the no-suppression path.
pub(super) fn js_chain_arg0_kind_for_method(
    expr: Node,
    target_methods: &[&str],
    code: &[u8],
) -> Option<(String, bool)> {
    let n = unwrap_parens(expr);
    // tree-sitter-typescript / -javascript: call_expression with fields
    // `function` (member_expression / identifier) and `arguments`.
    if n.kind() == "call_expression" {
        // Check this call's callee: if its property name (or full text) ends
        // with one of `target_methods`, this is the inner labelled call.
        if let Some(function) = n.child_by_field_name("function") {
            // Property of a member_expression; falls back to the function
            // text itself for bare-identifier calls.
            let prop_text = function
                .child_by_field_name("property")
                .and_then(|p| text_of(p, code));
            let full_text = text_of(function, code);
            let leaf_text = full_text
                .as_ref()
                .map(|s| s.rsplit('.').next().unwrap_or(s).to_string());
            let matched = target_methods.iter().any(|m| {
                prop_text.as_deref() == Some(*m)
                    || leaf_text.as_deref() == Some(*m)
                    || full_text.as_deref() == Some(*m)
                    || full_text
                        .as_deref()
                        .is_some_and(|s| s.ends_with(&format!(".{m}")))
            });
            if matched {
                return arg0_kind_and_interpolation(n);
            }
            // Drill down the receiver spine: function.object is the prior
            // call in the chain.
            if let Some(object) = function.child_by_field_name("object")
                && let Some(found) = js_chain_arg0_kind_for_method(object, target_methods, code)
            {
                return Some(found);
            }
        }
    }
    None
}

/// Walk the receiver chain of a JS/TS call to count *non-execute* method
/// calls between the outer call and an inner labelled call to
/// `target_inner` (e.g. `query`, `execute`).  Returns the immediate outer
/// chain method name (e.g. `findOne`) when an inner-call to `target_inner`
/// exists somewhere on the receiver spine, otherwise `None`.
///
/// Used alongside [`js_chain_arg0_kind_for_method`] to verify the chain
/// shape `<inner>.query(LITERAL).<orm_method>(...)`, bare
/// `connection.query("SELECT ...")` returns `None` because there is no
/// outer chain method.
pub(super) fn js_chain_outer_method_for_inner<'a>(
    outer: Node<'a>,
    target_inner: &[&str],
    code: &'a [u8],
) -> Option<String> {
    let n = unwrap_parens(outer);
    if n.kind() != "call_expression" {
        return None;
    }
    let function = n.child_by_field_name("function")?;
    let object = function.child_by_field_name("object")?;
    // If `object` itself is a call_expression whose property matches
    // `target_inner`, the immediate outer is `function.property`.
    if object.kind() == "call_expression" {
        let inner_function = object.child_by_field_name("function");
        if let Some(inner_function) = inner_function {
            let prop_text = inner_function
                .child_by_field_name("property")
                .and_then(|p| text_of(p, code));
            let full_text = text_of(inner_function, code);
            let leaf_text = full_text
                .as_ref()
                .map(|s| s.rsplit('.').next().unwrap_or(s).to_string());
            let inner_matched = target_inner.iter().any(|m| {
                prop_text.as_deref() == Some(*m)
                    || leaf_text.as_deref() == Some(*m)
                    || full_text.as_deref() == Some(*m)
                    || full_text
                        .as_deref()
                        .is_some_and(|s| s.ends_with(&format!(".{m}")))
            });
            if inner_matched {
                return function
                    .child_by_field_name("property")
                    .and_then(|p| text_of(p, code));
            }
        }
        // Recurse: outer chain may have more depth (`a.b().c().d()` ,
        // d is outermost, c is next, target may be at b or further in).
        return js_chain_outer_method_for_inner(object, target_inner, code);
    }
    None
}

/// For a chained method call (`a.b().c().d()`), walk down the receiver
/// chain (`function.object`) and return the innermost call_expression
/// alongside its callee text (e.g. `"http.get"`).
///
/// Returns `None` when:
/// * `outer` is not itself a CallFn / CallMethod node, or
/// * its `function`/`method` field is not a member-style expression whose
///   `object` field is itself a call (i.e. there is no chained receiver).
///
/// Motivated by CVE-2025-64430 (Parse Server SSRF via
/// `http.get(uri, cb).on('error', e => ...)`).  Without this, the outer
/// `.on(...)` call swallows classification of the inner gated sink.
pub(super) fn find_chained_inner_call<'a>(
    outer: Node<'a>,
    lang: &str,
    code: &[u8],
) -> Option<(Node<'a>, String)> {
    if !matches!(lookup(lang, outer.kind()), Kind::CallFn | Kind::CallMethod) {
        return None;
    }
    let function = outer
        .child_by_field_name("function")
        .or_else(|| outer.child_by_field_name("method"))?;
    // Direct double-call form (`f()(x)`): the outer call's `function`
    // field IS itself a call_expression, with no intermediate
    // member-chain.  Treat the inner call as the chain's innermost.
    // Without this, lodash-style template-render chains like
    // `_.template(t)(data)` evade the chained-inner rebinding because
    // the outer's function field is a `call_expression`, not the
    // `member_expression` shape the original branch below expects.
    if matches!(
        lookup(lang, function.kind()),
        Kind::CallFn | Kind::CallMethod
    ) {
        // Recurse: the inner call may itself be chained.
        if let Some(inner) = find_chained_inner_call(function, lang, code) {
            return Some(inner);
        }
        let inner_func = function
            .child_by_field_name("function")
            .or_else(|| function.child_by_field_name("method"))
            .or_else(|| function.child_by_field_name("name"))?;
        let raw = text_of(inner_func, code)?;
        let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
        return Some((function, inner_text));
    }
    // The function/method field for a chained call is a member_expression
    // (JS/TS), attribute (Python), or field_expression (Rust); its
    // receiver is the `object` field (JS/TS/Python) or `value` field
    // (Rust).  Only proceed when that receiver is itself a call.
    let object = function
        .child_by_field_name("object")
        .or_else(|| function.child_by_field_name("value"))?;
    if !matches!(lookup(lang, object.kind()), Kind::CallFn | Kind::CallMethod) {
        return None;
    }
    // Decide whether `object` is itself a chained method call (its
    // function/method field is a member-style expression). When yes,
    // recurse one more level so deeper chains resolve to their innermost
    // method (e.g. `axios.get(u).then(h).catch(h)` → `axios.get`).
    // When no — the receiver is a plain function/constructor call like
    // Rust's `HttpResponse::Found()` — descending one more level would
    // strand us on the non-method leaf whose text would not match any
    // gate matcher. Stop here and return the current `outer` level,
    // which IS the innermost method call.
    let object_function = object
        .child_by_field_name("function")
        .or_else(|| object.child_by_field_name("method"));
    let object_is_chained_method = object_function
        .map(|f| {
            matches!(
                f.kind(),
                "member_expression"
                    | "attribute"
                    | "field_expression"
                    | "scoped_identifier"
                    | "scope_resolution"
            ) && f
                .child_by_field_name("object")
                .or_else(|| f.child_by_field_name("value"))
                .is_some()
        })
        .unwrap_or(false);
    if object_is_chained_method {
        // Recurse: the inner call may itself be chained.
        if let Some(inner) = find_chained_inner_call(object, lang, code) {
            return Some(inner);
        }
        // `object` is the innermost call_expression in the chain.  Extract
        // its callee identifier the same way `first_call_ident_with_span`
        // does for a CallFn (member_expression text → "http.get").
        let inner_func = object
            .child_by_field_name("function")
            .or_else(|| object.child_by_field_name("method"))
            .or_else(|| object.child_by_field_name("name"))?;
        // Multi-line dotted member expressions (`http\n  .get`) include
        // formatting whitespace in the source-text slice. The labels map
        // keys are literal `"http.get"` etc., strip whitespace so the
        // chained-call inner-gate rebinding fires for both single-line and
        // multi-line chain styles. Also strips `\r` for CRLF sources.
        // Motivated by upstream Parse Server CVE-2025-64430 which uses the
        // multi-line `http\n  .get(uri, ...)\n  .on(...)` form.
        let raw = text_of(inner_func, code)?;
        let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
        return Some((object, inner_text));
    }
    // Receiver is a non-chained call (Rust constructor `Foo::new()` /
    // `HttpResponse::Found()`, JS bare `f()`).  Outer level IS the
    // innermost method call — return its own function text so gate
    // matching sees the method name.
    let raw = text_of(function, code)?;
    let inner_text: String = raw.chars().filter(|c| !c.is_whitespace()).collect();
    Some((outer, inner_text))
}

/// Recursively walk the receiver chain of `outer` (a CallFn / CallMethod
/// node) and yield each *named argument* of every inner call along the
/// way.  Outer's own arguments are NOT included, the caller already
/// handles those via the standard `pre_emit_arg_source_nodes` pass over
/// `outer.arguments`.
///
/// For `json.NewDecoder(r.Body).Decode(emoji)`:
///   outer  = `.Decode(emoji)`          , caller iterates `emoji`
///   inner  = `json.NewDecoder(r.Body)` , yielded arg: `r.Body`
///
/// We only pull from each inner call's `arguments` field, never from its
/// `function`/`method`/receiver expressions.  That distinction matters
/// because chained source-receivers like `r.URL.Query()` expose a
/// member-text path that classifies as a Source, but it's the OUTER
/// chain text (`r.URL.Query.Get`) that already classifies, so emitting
/// a synth source for the inner-call's own callee would double-count.
///
/// Used by Go (where chain shapes like `json.NewDecoder(r.Body).Decode`
/// hide source-labeled args inside parens between dots, leaving the
/// outer callee text un-classifiable).  The helper itself is
/// language-neutral, but callers should gate per-language until each
/// language's regression coverage catches up.
pub(super) fn walk_chain_inner_call_args<'a>(outer: Node<'a>, lang: &str, out: &mut Vec<Node<'a>>) {
    if !matches!(lookup(lang, outer.kind()), Kind::CallFn | Kind::CallMethod) {
        return;
    }
    let function = outer
        .child_by_field_name("function")
        .or_else(|| outer.child_by_field_name("method"));
    let Some(function) = function else { return };
    let object = function
        .child_by_field_name("object")
        .or_else(|| function.child_by_field_name("operand"))
        .or_else(|| function.child_by_field_name("value"));
    let Some(inner) = object else { return };
    if !matches!(lookup(lang, inner.kind()), Kind::CallFn | Kind::CallMethod) {
        return;
    }
    if let Some(args) = inner.child_by_field_name("arguments") {
        let mut cursor = args.walk();
        for arg in args.named_children(&mut cursor) {
            out.push(arg);
        }
    }
    walk_chain_inner_call_args(inner, lang, out);
}

/// Recursively find a call-expression node within an AST subtree (up to
/// 4 levels deep).  Unlike `find_call_node` which only checks 2 levels,
/// this handles `await`-wrapped calls inside declarations.
pub(super) fn find_call_node_deep<'a>(n: Node<'a>, lang: &str, depth: u8) -> Option<Node<'a>> {
    if depth == 0 {
        return None;
    }
    match lookup(lang, n.kind()) {
        Kind::CallFn | Kind::CallMethod | Kind::CallMacro => Some(n),
        _ => {
            let mut cursor = n.walk();
            for c in n.children(&mut cursor) {
                if let Some(found) = find_call_node_deep(c, lang, depth - 1) {
                    return Some(found);
                }
            }
            None
        }
    }
}

/// Detect whether a call node is a parameterized SQL query.
///
/// Returns `true` when:
/// 1. The first argument (arg 0) is a string literal (including template
///    strings without interpolation) containing SQL placeholder patterns:
///    `$1`..`$N`, `?`, `%s`, or `:identifier`.
/// 2. The call has at least 2 arguments (the second being the params
///    array/tuple).
///
/// This is intentionally conservative: if arg 0 is dynamic (variable,
/// concatenation, template with interpolation), returns `false`.
pub(super) fn is_parameterized_query_call(call_node: Node, code: &[u8]) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    let named: Vec<_> = args.named_children(&mut cursor).collect();
    // Need at least 2 arguments: query string + params
    if named.len() < 2 {
        return false;
    }
    let first_arg = named[0];
    // Extract the raw text of arg 0, must be a string literal or
    // template string without interpolation.
    let query_text = match first_arg.kind() {
        "string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
            text_of(first_arg, code)
        }
        "template_string" => {
            // Only constant templates (no interpolation)
            let mut c = first_arg.walk();
            if first_arg
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
            {
                return false; // dynamic, not safe
            }
            text_of(first_arg, code)
        }
        // Python concatenated strings: "SELECT" "..." are implicit concat
        "concatenated_string" => {
            // If it's a concatenated_string, get the full text
            text_of(first_arg, code)
        }
        _ => return false, // not a literal
    };
    let Some(qt) = query_text else {
        return false;
    };
    has_sql_placeholders(&qt)
}

/// Check whether a string contains SQL parameterized-query placeholders.
///
/// Recognised patterns:
/// - `$1`, `$2`, …, `$N` (PostgreSQL positional)
/// - `?` (MySQL / SQLite positional)
/// - `%s` (Python DB-API / psycopg2)
/// - `:identifier` (Oracle / named parameters), requires the colon to be
///   preceded by a space or `=` (to avoid matching JS ternary / object
///   literals).
pub(super) fn has_sql_placeholders(s: &str) -> bool {
    let bytes = s.as_bytes();
    let len = bytes.len();
    let mut i = 0;
    while i < len {
        match bytes[i] {
            b'$' if i + 1 < len && bytes[i + 1].is_ascii_digit() && bytes[i + 1] != b'0' => {
                // $N where N is 1..9 (at minimum)
                return true;
            }
            b'?' => return true,
            b'%' if i + 1 < len && bytes[i + 1] == b's' => {
                return true;
            }
            b':' if i > 0
                && (bytes[i - 1] == b' '
                    || bytes[i - 1] == b'='
                    || bytes[i - 1] == b'('
                    || bytes[i - 1] == b',')
                && i + 1 < len
                && bytes[i + 1].is_ascii_alphabetic() =>
            {
                // :identifier, must be preceded by whitespace/= to avoid
                // false positives on object literals or ternary operators.
                return true;
            }
            _ => {}
        }
        i += 1;
    }
    false
}

/// Returns true when a tree-sitter node is a syntactic literal value.
///
/// Intentionally conservative: if in doubt, returns false. It is better
/// to miss a suppression opportunity than to suppress a real tainted flow.
///
/// NOTE: Literal-kind classification also exists in `ast.rs::is_literal_node`.
/// The two must stay aligned across languages. TODO: consider extracting a
/// shared literal-kind helper if a third call site appears.
#[allow(clippy::only_used_in_recursion)]
pub(super) fn is_syntactic_literal(node: Node, code: &[u8]) -> bool {
    match node.kind() {
        // Scalar strings, but reject if they contain interpolation
        // (e.g. Ruby `"hello #{name}"`, Python f-strings).
        "string"
        | "string_literal"
        | "interpreted_string_literal"
        | "raw_string_literal"
        | "string_content"
        | "string_fragment" => !has_string_interpolation(node),

        // Numbers
        "integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
            true
        }

        // Booleans / null / nil / none
        "true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"
        | "boolean_literal" => true,

        // PHP encapsed_string: safe only if no variable interpolation
        "encapsed_string" => !has_interpolation_cfg(node),

        // Wrapper: PHP/Go wrap each arg in an `argument` node, unwrap
        "argument" => {
            node.named_child_count() == 1
                && node
                    .named_child(0)
                    .is_some_and(|c| is_syntactic_literal(c, code))
        }

        // Unary minus on a number literal: `-42`
        "unary_expression" | "unary_op" => {
            node.named_child_count() == 1
                && node
                    .named_child(0)
                    .is_some_and(|c| is_syntactic_literal(c, code))
        }

        // String concatenation of literals: `"a" + "b"` or `"a" . "b"`
        "binary_expression" | "concatenated_string" => {
            let count = node.named_child_count();
            count >= 2
                && (0..count).all(|i| {
                    node.named_child(i as u32)
                        .is_some_and(|c| is_syntactic_literal(c, code))
                })
        }

        // JS/TS template string: only if no interpolation substitution
        "template_string" => {
            let mut c = node.walk();
            !node
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
        }

        // Containers: all elements must be syntactic literals
        "list"
        | "array"
        | "array_expression"
        | "array_creation_expression"
        | "tuple"
        | "tuple_expression" => {
            let mut c = node.walk();
            node.named_children(&mut c)
                .all(|ch| is_syntactic_literal(ch, code))
        }

        // Container entries: `{"key": "value"}` style pairs
        "pair" => {
            let mut c = node.walk();
            node.named_children(&mut c)
                .all(|ch| is_syntactic_literal(ch, code))
        }

        _ => false,
    }
}

/// Check if a string node contains interpolation children
/// (e.g. Ruby `"hello #{name}"` has `interpolation` children,
/// Python f-strings may have `interpolation` children).
pub(super) fn has_string_interpolation(node: Node) -> bool {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        if child.kind().contains("interpolation") {
            return true;
        }
    }
    false
}

/// Check if an encapsed_string node contains interpolation (PHP).
pub(super) fn has_interpolation_cfg(node: Node) -> bool {
    for i in 0..node.child_count() as u32 {
        if let Some(child) = node.child(i) {
            let kind = child.kind();
            if kind == "variable_name"
                || kind == "simple_variable"
                || kind.contains("interpolation")
            {
                return true;
            }
        }
    }
    false
}

/// Extract the raw literal text from the RHS of a declaration/assignment AST node.
///
/// Walks the same value/right child paths as `def_use` and returns the text
/// if the RHS is a syntactic literal. Used to populate `NodeInfo::const_text`.
pub(super) fn extract_literal_rhs(ast: Node, lang: &str, code: &[u8]) -> Option<String> {
    use crate::labels::lookup;

    // Direct value/right field (Rust let, Go short_var, etc.)
    let val_node = ast
        .child_by_field_name("value")
        .or_else(|| ast.child_by_field_name("right"));

    if let Some(val) = val_node {
        if is_syntactic_literal(val, code) {
            return text_of(val, code);
        }
    }

    // Nested declarator pattern (JS let/const → variable_declarator, etc.)
    if matches!(
        lookup(lang, ast.kind()),
        Kind::CallWrapper | Kind::Assignment
    ) {
        let mut cursor = ast.walk();
        for child in ast.children(&mut cursor) {
            let child_val = child.child_by_field_name("value").or_else(|| {
                if matches!(lookup(lang, child.kind()), Kind::Assignment) {
                    child.child_by_field_name("right")
                } else {
                    None
                }
            });
            if let Some(val) = child_val {
                if is_syntactic_literal(val, code) {
                    return text_of(val, code);
                }
            }
        }
    }

    // Return statement with a literal argument (`return []`, `return {}`).
    // Lets SSA's const-return path ([`crate::ssa::lower`] line ~1066) emit
    // `SsaOp::Const(Some(text))` instead of `Const(None)` so downstream
    // container-literal detection (heap points-to, fresh-alloc summary)
    // can recognise the fresh allocation.
    if matches!(lookup(lang, ast.kind()), Kind::Return) {
        let mut cursor = ast.walk();
        for child in ast.named_children(&mut cursor) {
            if is_syntactic_literal(child, code) {
                return text_of(child, code);
            }
        }
    }

    None
}

/// Returns true when every argument in the call's argument list is a
/// syntactic literal (per `is_syntactic_literal`). Returns true for calls
/// with zero arguments (no argument-carried taint vector). Returns false
/// when the argument list cannot be found.
///
/// For method chains like `a("x").b(y).c()`, the outermost call node
/// represents the entire chain. This function walks nested call expressions
/// to verify ALL argument lists in the chain contain only literals.
pub(super) fn has_only_literal_args(call_node: Node, code: &[u8]) -> bool {
    let Some(args) = call_node.child_by_field_name("arguments") else {
        return false;
    };
    let mut cursor = args.walk();
    let mut any_arg = false;
    for ch in args.named_children(&mut cursor) {
        any_arg = true;
        if !is_syntactic_literal(ch, code) {
            return false;
        }
    }
    // Zero-arg calls are not "all literal", taint can still flow via a
    // non-literal receiver (e.g. `tainted.readObject()`), and the sink-
    // suppression gate (`info.all_args_literal`) must not skip these.
    if !any_arg {
        return false;
    }
    // Walk nested call expressions in the callee chain.
    check_inner_call_args(call_node, code)
}

/// Recursively check nested call expressions in a method chain for
/// non-literal arguments.
pub(super) fn check_inner_call_args(node: Node, code: &[u8]) -> bool {
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        let kind = child.kind();
        // Skip argument lists, those are checked by the caller.
        if kind == "arguments" || kind == "argument_list" || kind == "actual_parameters" {
            continue;
        }
        // If this child is itself a call expression, check its arguments.
        if child.child_by_field_name("arguments").is_some() {
            if !has_only_literal_args(child, code) {
                return false;
            }
        } else {
            // Recurse through non-call structural nodes (field_expression, etc.)
            if !check_inner_call_args(child, code) {
                return false;
            }
        }
    }
    true
}

/// Extract identifiers captured by Rust format-string named-argument syntax
/// (`format!("…{name}…")`, stable since 1.58) from a `macro_invocation`
/// node.  Returns the identifier names referenced by `{name}` /
/// `{name:fmt-spec}` patterns inside the first `string_literal` child of
/// the macro's `token_tree`.
///
/// Without this lifting, `let q = format!("...{x}...")` carries no `x` in
/// its `uses` because `x` lives in the format string's bytes rather than
/// as a separate AST argument node, so taint stops at the macro
/// boundary.  Mirrors the Python f-string interpolation lifting in
/// `patterns/python.rs`.
///
/// Conservative recognition: only fires for known format-style macros
/// (`format`, `print`/`println`, `eprint`/`eprintln`, `write`/`writeln`,
/// `panic`, `format_args`, `assert`/`debug_assert`, the common `log`
/// crate severity macros).  Empty for any non-Rust call node, any other
/// macro, or a token_tree whose first string is not present.
pub(super) fn extract_rust_format_macro_named_idents(call_node: Node, code: &[u8]) -> Vec<String> {
    if call_node.kind() != "macro_invocation" {
        return Vec::new();
    }
    let Some(macro_node) = call_node.child_by_field_name("macro") else {
        return Vec::new();
    };
    let Some(macro_text) = text_of(macro_node, code) else {
        return Vec::new();
    };
    let leaf = macro_text
        .rsplit("::")
        .next()
        .unwrap_or(macro_text.as_str());
    if !is_rust_format_style_macro(leaf) {
        return Vec::new();
    }
    let tt = match call_node.child_by_field_name("token_tree") {
        Some(t) => t,
        None => {
            let mut cursor = call_node.walk();
            match call_node
                .children(&mut cursor)
                .find(|c| c.kind() == "token_tree")
            {
                Some(t) => t,
                None => return Vec::new(),
            }
        }
    };
    let mut cursor = tt.walk();
    let fmt_lit = match tt
        .children(&mut cursor)
        .find(|c| matches!(c.kind(), "string_literal" | "raw_string_literal"))
    {
        Some(n) => n,
        None => return Vec::new(),
    };
    let raw = match text_of(fmt_lit, code) {
        Some(s) => s,
        None => return Vec::new(),
    };
    let content = strip_literal_quotes(&raw, fmt_lit, code).unwrap_or_else(|| raw.clone());
    parse_rust_format_named_idents(&content)
}

/// Walk `n` and any descendants, accumulating named-format-arg idents from
/// every Rust `macro_invocation` reachable through structural expression
/// children (calls, fields, await, references, blocks, ...).  Lets the
/// def-use collectors lift `format!("...{x}...")` named args through one
/// or two levels of expression wrapping (e.g.
/// `let q = format!("{x}").to_owned();` or RHS chained method calls).
pub(super) fn extract_rust_format_macro_named_idents_in(n: Node, code: &[u8]) -> Vec<String> {
    let mut out = Vec::new();
    collect_format_macro_idents_recursive(n, code, &mut out, 0);
    out
}

fn collect_format_macro_idents_recursive(n: Node, code: &[u8], out: &mut Vec<String>, depth: u32) {
    if depth > 6 {
        return;
    }
    if n.kind() == "macro_invocation" {
        for ident in extract_rust_format_macro_named_idents(n, code) {
            out.push(ident);
        }
    }
    let mut cursor = n.walk();
    for child in n.children(&mut cursor) {
        collect_format_macro_idents_recursive(child, code, out, depth + 1);
    }
}

fn is_rust_format_style_macro(name: &str) -> bool {
    matches!(
        name,
        "format"
            | "print"
            | "println"
            | "eprint"
            | "eprintln"
            | "write"
            | "writeln"
            | "panic"
            | "format_args"
            | "assert"
            | "debug_assert"
            | "todo"
            | "unimplemented"
            | "unreachable"
            | "info"
            | "warn"
            | "error"
            | "debug"
            | "trace"
    )
}

fn parse_rust_format_named_idents(s: &str) -> Vec<String> {
    let bytes = s.as_bytes();
    let mut out: Vec<String> = Vec::new();
    let mut i = 0;
    while i < bytes.len() {
        let b = bytes[i];
        if b == b'{' {
            if i + 1 < bytes.len() && bytes[i + 1] == b'{' {
                i += 2;
                continue;
            }
            let start = i + 1;
            let mut j = start;
            while j < bytes.len() && bytes[j] != b'}' && bytes[j] != b':' {
                j += 1;
            }
            let ident_bytes = &bytes[start..j];
            if is_valid_rust_format_ident(ident_bytes) {
                if let Ok(name) = std::str::from_utf8(ident_bytes) {
                    out.push(name.to_string());
                }
            }
            while j < bytes.len() && bytes[j] != b'}' {
                j += 1;
            }
            i = j + 1;
        } else if b == b'}' && i + 1 < bytes.len() && bytes[i + 1] == b'}' {
            i += 2;
        } else {
            i += 1;
        }
    }
    out
}

fn is_valid_rust_format_ident(b: &[u8]) -> bool {
    if b.is_empty() {
        return false;
    }
    let first = b[0];
    if !(first.is_ascii_alphabetic() || first == b'_') {
        return false;
    }
    if b.iter().all(|c| c.is_ascii_digit()) {
        return false;
    }
    b.iter().all(|c| c.is_ascii_alphanumeric() || *c == b'_')
}

/// Extract per-argument identifiers from a call node's argument list.
/// Returns one `Vec<String>` per argument (in parameter-position order).
/// Returns empty if argument list can't be found or contains spread/keyword args.
pub(super) fn extract_arg_uses(call_node: Node, code: &[u8]) -> Vec<Vec<String>> {
    // Ruby `subshell` (backticks) has no `arguments` field, its children are
    // string fragments and `interpolation` nodes. Lift each interpolation's
    // identifiers into a positional arg so taint flows from `#{var}` into the
    // synthetic "subshell" sink.
    if call_node.kind() == "subshell" {
        let mut result = Vec::new();
        let mut cursor = call_node.walk();
        for child in call_node.named_children(&mut cursor) {
            if child.kind() == "interpolation" {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(child, code, &mut idents, &mut paths);
                let mut combined = paths;
                combined.extend(idents);
                if !combined.is_empty() {
                    result.push(combined);
                }
            }
        }
        return result;
    }

    // Rust `tokio::join!` / `futures::join!` (and their `try_*` variants).
    // tree-sitter-rust models macro args as a `token_tree` rather than an
    // `arguments` field, so a vanilla extraction returns nothing.  Walk the
    // top-level token_tree splitting on `,` separators, lifting identifiers
    // out of each chunk so the existing PromiseCombinator transfer can union
    // arg-side taint into the resulting tuple value.
    if call_node.kind() == "macro_invocation"
        && let Some(arg_uses) = extract_rust_macro_join_arg_uses(call_node, code)
    {
        return arg_uses;
    }

    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut result = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        // Named / keyword arguments are tracked separately in `CallMeta.kwargs`
        // and do not participate in positional indexing, skip them here so
        // `arg_uses` remains strictly positional.  Splats (spread/dict splat)
        // still invalidate positional mapping; bail out in that case.
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
        {
            return Vec::new();
        }
        if kind == "keyword_argument" || kind == "named_argument" {
            continue;
        }
        let mut idents = Vec::new();
        let mut paths = Vec::new();
        collect_idents_with_paths(child, code, &mut idents, &mut paths);
        // Dotted paths first, then individual idents as fallback
        let mut combined = paths;
        combined.extend(idents);
        result.push(combined);
    }
    result
}

/// `tokio::join!` / `futures::join!` (and their `try_*` variants) bundle
/// concurrently-awaited futures into a tuple result.  tree-sitter-rust
/// represents the args as a `token_tree` whose children alternate between
/// expressions and `,` separators (`token_tree` itself nests on every
/// parenthesised group, e.g. the `(x)` inside `fetch(x)`).  Walk the
/// top-level token_tree, segment by `,` leaves, and lift identifiers out
/// of each chunk so the SSA Call op carries one positional arg per future.
///
/// Returns `Some(arg_uses)` only when the macro is one of the recognised
/// join macros, so `extract_arg_uses` can fall through to its normal
/// `arguments`-field path for every other macro shape (`format!`,
/// `println!`, custom DSL macros) where arg lifting could disturb existing
/// label / SSA flow.
pub(super) fn extract_rust_macro_join_arg_uses(
    call_node: Node,
    code: &[u8],
) -> Option<Vec<Vec<String>>> {
    let macro_node = call_node.child_by_field_name("macro")?;
    let macro_text = text_of(macro_node, code)?;
    if !is_rust_join_macro(&macro_text) {
        return None;
    }
    let tt = match call_node.child_by_field_name("token_tree") {
        Some(t) => t,
        None => {
            let mut cursor = call_node.walk();
            call_node
                .children(&mut cursor)
                .find(|c| c.kind() == "token_tree")?
        }
    };
    let mut chunks: Vec<Vec<Node>> = vec![Vec::new()];
    let mut cursor = tt.walk();
    for child in tt.children(&mut cursor) {
        // Skip the surrounding `(`/`)` punctuation.
        if !child.is_named() {
            let kind = child.kind();
            if kind == "," {
                chunks.push(Vec::new());
                continue;
            }
            if kind == "(" || kind == ")" {
                continue;
            }
        }
        chunks.last_mut().unwrap().push(child);
    }
    let mut result = Vec::new();
    for chunk in chunks {
        if chunk.is_empty() {
            continue;
        }
        let mut idents = Vec::new();
        let mut paths = Vec::new();
        for n in chunk {
            collect_idents_with_paths(n, code, &mut idents, &mut paths);
        }
        let mut combined = paths;
        combined.extend(idents);
        result.push(combined);
    }
    Some(result)
}

fn is_rust_join_macro(macro_text: &str) -> bool {
    matches!(
        macro_text,
        "tokio::join"
            | "tokio::try_join"
            | "futures::join"
            | "futures::try_join"
            | "join"
            | "try_join"
    )
}

/// Extract keyword / named argument bindings for a call node.
///
/// Returns `Vec<(name, uses)>` where `uses` are the identifier references
/// from the keyword's value expression, in the same shape used by
/// `arg_uses` entries.  Empty for calls with no named arguments, or for
/// languages whose grammar does not produce `keyword_argument` / `named_argument`
/// children (C, Java, Go, …).
pub(super) fn extract_kwargs(call_node: Node, code: &[u8]) -> Vec<(String, Vec<String>)> {
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut out = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        // JS/TS object-literal positional arg: `f(x, { a: true, b: 'str' })`.
        // The pairs inside the object are not tree-sitter
        // `keyword_argument` nodes (those are Python/Ruby), but
        // downstream consumers (xml_config's
        // `lookup_kwargs(inst.cfg_node)` JS branch checking
        // `processEntities`) expect these fields in the kwargs vector.
        // Lift each `pair` (and `shorthand_property_identifier`) into
        // the kwargs list using the property name as kwarg name and the
        // raw text of the value expression as the single value.
        // Boolean / numeric / string / identifier values all surface as
        // their textual form, which is what xml_config's kwarg-value
        // matchers (e.g. `v == "true"`) compare against.
        if kind == "object" {
            let mut oc = child.walk();
            for pair in child.named_children(&mut oc) {
                let pk = pair.kind();
                if pk == "pair" {
                    let Some(kn) = pair.child_by_field_name("key") else {
                        continue;
                    };
                    let Some(vn) = pair.child_by_field_name("value") else {
                        continue;
                    };
                    let Some(raw_name) = text_of(kn, code) else {
                        continue;
                    };
                    let name = raw_name
                        .trim_start_matches(['"', '\''])
                        .trim_end_matches(['"', '\''])
                        .to_string();
                    if let Some(val_text) = text_of(vn, code) {
                        out.push((name, vec![val_text.to_string()]));
                    }
                } else if pk == "shorthand_property_identifier" {
                    if let Some(name) = text_of(pair, code) {
                        out.push((name.to_string(), vec![name.to_string()]));
                    }
                }
            }
            continue;
        }
        if kind != "keyword_argument" && kind != "named_argument" {
            continue;
        }
        // Python `keyword_argument` uses `name`/`value`; Ruby `named_argument`
        // uses `name`/`value` as well (with `:` syntax in source).  Fall back
        // to the first/last named children if fields are absent.
        let named_count = child.named_child_count();
        let name_node = child
            .child_by_field_name("name")
            .or_else(|| child.named_child(0));
        let value_node = child
            .child_by_field_name("value")
            .or_else(|| child.named_child(named_count.saturating_sub(1) as u32));
        let (Some(nn), Some(vn)) = (name_node, value_node) else {
            continue;
        };
        let Some(name) = text_of(nn, code) else {
            continue;
        };
        let mut idents = Vec::new();
        let mut paths = Vec::new();
        collect_idents_with_paths(vn, code, &mut idents, &mut paths);
        let mut combined = paths;
        combined.extend(idents);
        // Boolean / numeric literal kwarg values (Python `True`/`False`,
        // Ruby `true`/`false`/integer/float, JS `true`/`false`/number)
        // do not surface through `collect_idents_with_paths` — the value
        // node's kind is `true`/`false`/`integer`/`float`/`number`, not
        // an identifier kind.  Capture the raw text so consumers like
        // `xml_config::classify_call` (which checks
        // `values.iter().any(|v| v == "True" || v == "true")` for the
        // lxml `resolve_entities=True` opt-in) can match.
        if combined.is_empty() {
            if matches!(
                vn.kind(),
                "true"
                    | "false"
                    | "integer"
                    | "float"
                    | "number"
                    | "string"
                    | "string_literal"
                    | "true_constant"
                    | "false_constant"
            ) {
                if let Some(txt) = text_of(vn, code) {
                    combined.push(txt.trim_matches(['"', '\'']).to_string());
                }
            }
        }
        out.push((name, combined));
    }
    out
}

/// Caps that a search literal is known to strip, provided the replacement
/// itself does not reintroduce any dangerous sequence.
///
/// Policy is deliberately narrow and conservative: only literals that contain
/// *known-dangerous* payloads earn a strip credit, so an arbitrary
/// `.replace("foo", "bar")` is never promoted to a sanitizer.
///   * `..`, `/`, `\\`         → path-traversal     → `Cap::FILE_IO`
///   * `<`, `>`                → HTML metachars     → `Cap::HTML_ESCAPE`
///   * `;`, `|`, `&`, `$`, `\`` → shell metachars   → `Cap::SHELL_ESCAPE`
///   * `'`, `"`, `--`          → SQL metachars      → `Cap::SQL_QUERY`
pub(super) fn caps_stripped_by_literal_pattern(search: &str) -> Cap {
    let mut caps = Cap::empty();
    if search.contains("..") || search.contains('/') || search.contains('\\') {
        caps |= Cap::FILE_IO;
    }
    if search.contains('<') || search.contains('>') {
        caps |= Cap::HTML_ESCAPE;
    }
    if search.contains(';')
        || search.contains('|')
        || search.contains('&')
        || search.contains('$')
        || search.contains('`')
    {
        caps |= Cap::SHELL_ESCAPE;
    }
    if search.contains('\'') || search.contains('"') || search.contains("--") {
        caps |= Cap::SQL_QUERY;
    }
    caps
}

/// Maximum number of `.replace(LIT, LIT)` hops we'll walk on a single chain.
const MAX_REPLACE_CHAIN_HOPS: usize = 16;

/// Recognise a Rust `param.replace(LIT, LIT)[.replace(LIT, LIT)]*` chain whose
/// receiver bottoms out at a plain identifier, and infer which caps the chain
/// provably strips.
///
/// In tree-sitter-rust a method call is encoded as a `call_expression` whose
/// `function` field is a `field_expression` (`receiver.method`). Chained method
/// calls therefore nest `call_expression` nodes recursively through the
/// `field_expression.value` slot.  The detector walks that nest, requiring
/// every hop to be a pure literal-to-literal `replace` / `replacen` call and
/// the innermost receiver to be a bare identifier.  Returns the union of caps
/// stripped across the chain when at least one literal contains a recognised
/// dangerous pattern, or `None` when the pattern doesn't apply (so the caller
/// falls back to normal unresolved-call propagation).
pub(super) fn detect_rust_replace_chain_sanitizer(call_ast: Node, code: &[u8]) -> Option<Cap> {
    fn is_rust_str_literal(k: &str) -> bool {
        matches!(k, "string_literal" | "raw_string_literal")
    }

    fn extract_rust_str_content<'a>(n: Node<'a>, code: &'a [u8]) -> Option<String> {
        // A `string_literal` node in tree-sitter-rust has a `string_content`
        // child that holds the unquoted bytes.  Fall back to whole-node text
        // with outer-character trimming only as a last resort.
        let mut cur = n.walk();
        for c in n.named_children(&mut cur) {
            if c.kind() == "string_content" {
                return text_of(c, code);
            }
        }
        let raw = text_of(n, code)?;
        if raw.len() >= 2 {
            Some(
                raw.trim_start_matches('r')
                    .trim_start_matches('#')
                    .trim_end_matches('#')
                    .trim_matches('"')
                    .to_string(),
            )
        } else {
            None
        }
    }

    let mut current = call_ast;
    let mut earned = Cap::empty();

    for _ in 0..MAX_REPLACE_CHAIN_HOPS {
        if current.kind() != "call_expression" {
            // Chain base: must be a plain identifier (parameter / local) to
            // qualify.  A base that's another expression (field access,
            // nested non-method call, …) breaks the sanitizer invariant.
            if current.kind() == "identifier" && !earned.is_empty() {
                return Some(earned);
            }
            return None;
        }

        // Must be a method-style call: function is a field_expression whose
        // `field` names a `replace`-like method.
        let func = current.child_by_field_name("function")?;
        if func.kind() != "field_expression" {
            return None;
        }
        let method_ident = func.child_by_field_name("field")?;
        let method_name = text_of(method_ident, code)?;
        if method_name != "replace" && method_name != "replacen" {
            return None;
        }

        let args_node = current.child_by_field_name("arguments")?;
        let mut cursor = args_node.walk();
        let positional: Vec<Node<'_>> = args_node
            .named_children(&mut cursor)
            .filter(|c| {
                !matches!(
                    c.kind(),
                    "keyword_argument"
                        | "named_argument"
                        | "spread_element"
                        | "list_splat"
                        | "dictionary_splat"
                        | "splat_argument"
                        | "hash_splat_argument"
                )
            })
            .collect();
        let (arg0, arg1) = match positional.as_slice() {
            [a, b, ..] => (*a, *b),
            _ => return None,
        };
        if !is_rust_str_literal(arg0.kind()) || !is_rust_str_literal(arg1.kind()) {
            return None;
        }
        let search = extract_rust_str_content(arg0, code)?;
        let replacement = extract_rust_str_content(arg1, code)?;

        // If the replacement itself contains a dangerous sequence, this hop
        // can reintroduce the pattern that a later hop tries to strip.  Be
        // conservative: abandon all credit.
        if !caps_stripped_by_literal_pattern(&replacement).is_empty() {
            return None;
        }
        earned |= caps_stripped_by_literal_pattern(&search);

        // Walk to receiver via field_expression.value.
        current = func.child_by_field_name("value")?;
    }

    None
}

/// Recognise a Go `strings.Replace(s, OLD, NEW, n)` /
/// `strings.ReplaceAll(s, OLD, NEW)` call that provably strips one of the
/// known-dangerous metacharacter classes from its first argument.
///
/// Returns the union of caps stripped, or `None` when the pattern doesn't
/// apply (so the caller falls back to normal unresolved-call propagation).
///
/// Mirrors [`detect_rust_replace_chain_sanitizer`] but for the single-call
/// (non-method-chain) Go shape.  The caller wires the resulting cap into
/// the call's [`crate::labels::DataLabel::Sanitizer`] label, which the
/// taint engine consumes via the standard sanitizer pathway, taint flows
/// in on `s`, the matching cap is stripped from the result.
pub(super) fn detect_go_replace_call_sanitizer(call_ast: Node, code: &[u8]) -> Option<Cap> {
    if call_ast.kind() != "call_expression" {
        return None;
    }
    // The call's `function` field is a `selector_expression`, `operand`
    // is the package ident (`strings`), `field` is the method ident.
    let func = call_ast.child_by_field_name("function")?;
    if func.kind() != "selector_expression" {
        return None;
    }
    let operand = func.child_by_field_name("operand")?;
    if text_of(operand, code).as_deref() != Some("strings") {
        return None;
    }
    let field = func.child_by_field_name("field")?;
    let method_name = text_of(field, code)?;
    if method_name != "Replace" && method_name != "ReplaceAll" {
        return None;
    }
    // Args layout: (s, old, new[, n]).  Need positional args 1 (old) and
    // 2 (new) to be string literals.
    let old_lit = extract_const_string_arg(call_ast, 1, code)?;
    let new_lit = extract_const_string_arg(call_ast, 2, code)?;

    // If the replacement itself reintroduces a dangerous sequence, don't
    // credit the strip, matches the Rust chain detector's policy.
    if !caps_stripped_by_literal_pattern(&new_lit).is_empty() {
        return None;
    }
    let caps = caps_stripped_by_literal_pattern(&old_lit);
    if caps.is_empty() { None } else { Some(caps) }
}

/// Like `first_call_ident`, but also checks if `n` itself is a call node.
/// `first_call_ident` only searches children, so when `n` IS the call
/// expression (e.g. the argument `sanitize(cmd)`), this function catches it.
pub(super) fn call_ident_of<'a>(n: Node<'a>, lang: &str, code: &'a [u8]) -> Option<String> {
    // C++ new/delete: normalize callee before field extraction.
    if lang == "cpp" && n.kind() == "new_expression" {
        return Some("new".to_string());
    }
    if lang == "cpp" && n.kind() == "delete_expression" {
        return Some("delete".to_string());
    }
    match lookup(lang, n.kind()) {
        Kind::Function => {
            // Function/closure expression passed as argument, return the same
            // synthetic anon name used by build_sub so callback_bindings and
            // source_to_callback can match it to the extracted BodyCfg.
            n.child_by_field_name("name")
                .and_then(|nm| text_of(nm, code))
                .or_else(|| Some(anon_fn_name(n.start_byte())))
        }
        Kind::CallFn => n
            .child_by_field_name("function")
            .or_else(|| n.child_by_field_name("method"))
            .or_else(|| n.child_by_field_name("name"))
            .or_else(|| n.child_by_field_name("type"))
            .or_else(|| find_constructor_type_child(n))
            .and_then(|f| {
                let unwrapped = unwrap_parens(f);
                if lookup(lang, unwrapped.kind()) == Kind::Function {
                    Some(anon_fn_name(unwrapped.start_byte()))
                } else {
                    text_of(f, code)
                }
            }),
        Kind::CallMethod => {
            let func = n
                .child_by_field_name("method")
                .or_else(|| n.child_by_field_name("name"))
                .and_then(|f| text_of(f, code));
            let recv_node = n
                .child_by_field_name("object")
                .or_else(|| n.child_by_field_name("receiver"))
                .or_else(|| n.child_by_field_name("scope"));
            let recv = recv_node.and_then(|f| root_receiver_text(f, lang, code));
            // Preserve Java `.getClass()` segment in the chained callee text
            // so downstream predicates (e.g.
            // [`crate::ssa::type_facts::is_safe_string_producing_callee`])
            // can recognise idiomatic `obj.getClass().<accessor>()` chains.
            // Without this, `root_receiver_text` collapses the chain to
            // `obj.<accessor>`, indistinguishable from a user-defined method.
            let recv = if lang == "java"
                && let Some(rn) = recv_node
                && lookup(lang, rn.kind()) == Kind::CallMethod
                && let Some(inner_method) = rn
                    .child_by_field_name("method")
                    .or_else(|| rn.child_by_field_name("name"))
                    .and_then(|f| text_of(f, code))
                && inner_method == "getClass"
                && let Some(r) = recv
            {
                Some(format!("{r}.getClass"))
            } else {
                recv
            };
            match (recv, func) {
                (Some(r), Some(f)) => Some(format!("{r}.{f}")),
                (_, Some(f)) => Some(f),
                _ => None,
            }
        }
        Kind::CallMacro => n
            .child_by_field_name("macro")
            .and_then(|f| text_of(f, code)),
        _ => first_call_ident(n, lang, code),
    }
}

/// For each argument of `call_node`, return `Some(s)` when the argument is a
/// syntactic string literal (unquoted contents) and `None` otherwise.  The
/// returned vector is parallel to [`extract_arg_uses`] / [`extract_arg_callees`].
///
/// Bails on splats so that a variadic call (`f(*args)`, `f(...xs)`) produces
/// an empty vector, positional indices past the splat are meaningless and
/// downstream passes already treat an empty vector as "no info".
pub(super) fn extract_arg_string_literals(call_node: Node, code: &[u8]) -> Vec<Option<String>> {
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut result = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        let kind = child.kind();
        // Splat → positional indexing breaks; bail.
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
        {
            return Vec::new();
        }
        // Named / keyword arguments are tracked separately in `kwargs` and
        // don't participate in positional indexing, skip them here so this
        // vector stays aligned with `arg_uses`.
        if kind == "keyword_argument" || kind == "named_argument" {
            continue;
        }
        // PHP wraps each call argument in an `argument` node whose first
        // named child is the actual expression.  Unwrap one level so the
        // string-literal arm below sees the literal directly rather than
        // the wrapper kind, otherwise PHP `f("https://…")` records
        // `None` for arg 0 and downstream prefix-aware suppressions miss.
        let target = if kind == "argument" {
            child.named_child(0).unwrap_or(child)
        } else {
            child
        };
        let target_kind = target.kind();
        let literal = match target_kind {
            "string"
            | "string_literal"
            | "interpreted_string_literal"
            | "raw_string_literal"
            // PHP's double-quoted form (single-quoted maps to `string`).
            // Only safe to lift when there is no `encapsed_string` /
            // `embedded_expression` interpolation child, checked below.
            | "encapsed_string" => {
                let raw = text_of(target, code);
                raw.and_then(|s| strip_literal_quotes(&s, target, code))
            }
            // Boolean / null / numeric literal tokens — capture verbatim so
            // downstream pattern-aware analysis (e.g. the XXE config-fact
            // pass that needs to read the boolean polarity arg of
            // `setFeature(NAME, true)`) can recover the literal text without
            // re-walking the AST.  Existing string-only consumers (URL
            // prefix matching, etc.) are unaffected: a "true" / "false"
            // token never satisfies their matching predicates.
            "true"
            | "false"
            | "null"
            | "null_literal"
            | "nil"
            | "nil_literal"
            | "none"
            | "boolean_literal"
            | "true_literal"
            | "false_literal"
            | "decimal_integer_literal"
            | "integer_literal"
            | "integer"
            | "number"
            | "number_literal"
            | "decimal_literal" => text_of(target, code),
            _ => None,
        };
        result.push(literal);
    }
    result
}

/// Strip surrounding quotes from a syntactic string literal, resolving the
/// `string_content` child for Rust-style two-level string nodes.  Returns the
/// raw inner text (no escape-sequence processing), sufficient for whitelist
/// matching against shell-metachar sets.
pub(super) fn strip_literal_quotes(raw: &str, node: Node, code: &[u8]) -> Option<String> {
    // Rust/tree-sitter-rust: `string_literal` wraps a `string_content` child.
    // Prefer the content text so the caller doesn't have to deal with quote
    // pairing for raw strings (`r"..."`, `r#"..."#`, etc.).
    let mut cursor = node.walk();
    for child in node.named_children(&mut cursor) {
        if child.kind() == "string_content" {
            return text_of(child, code);
        }
    }
    if raw.len() >= 2 {
        let bytes = raw.as_bytes();
        let first = bytes[0];
        let last = bytes[raw.len() - 1];
        if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
            return Some(raw[1..raw.len() - 1].to_string());
        }
    }
    None
}

/// For each argument of `call_node`, find the callee name if that argument
/// is itself a call expression (e.g. `sanitize(x)` in `os.system(sanitize(x))`).
/// Returns a `Vec<Option<String>>` parallel to `extract_arg_uses` output.
pub(super) fn extract_arg_callees(call_node: Node, lang: &str, code: &[u8]) -> Vec<Option<String>> {
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return Vec::new();
    };
    let mut result = Vec::new();
    let mut cursor = args_node.walk();
    for child in args_node.named_children(&mut cursor) {
        // Bail on spread/splat like extract_arg_uses does
        let kind = child.kind();
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "keyword_argument"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
            || kind == "named_argument"
        {
            return Vec::new();
        }
        result.push(call_ident_of(child, lang, code));
    }
    result
}

/// Return `(defines, uses, extra_defines, array_pattern_indices,
/// rhs_array_elements)` for the AST fragment `ast`.
///
/// `extra_defines` captures additional bindings from destructuring patterns
/// beyond the primary define. `array_pattern_indices`, when non-empty, gives
/// the source-order position of each binding in `iter::once(defines).chain(
/// extra_defines)` for `array_pattern` / `tuple_pattern` LHS shapes. Empty
/// for non-array destructures and for non-skip array patterns where callers
/// can derive sequential 0..N indices implicitly.
///
/// `rhs_array_elements`, when non-empty, gives source-order RHS slots for
/// destructure-from-array-literal shapes (`const [a, b] = [safe, tainted]`,
/// `let (a, b) = (safe, tainted)`, Python `a, b = safe, tainted`). Each slot
/// is `Some(ident)` for a bare-ident element or `None` for a syntactic
/// literal. Empty when RHS isn't an array-literal shape or any element is
/// too complex; callers fall back to scalar union in that case.
#[allow(clippy::type_complexity)]
pub(super) fn def_use(
    ast: Node,
    lang: &str,
    code: &[u8],
    extra_labels: Option<&[crate::labels::RuntimeLabelRule]>,
) -> (
    Option<String>,
    Vec<String>,
    Vec<String>,
    SmallVec<[usize; 4]>,
    SmallVec<[crate::cfg::RhsArraySlot; 4]>,
) {
    match lookup(lang, ast.kind()) {
        // Declaration wrappers (let, var, short_var_declaration, etc.)
        Kind::CallWrapper => {
            let mut defs = None;
            let mut extra_defs = Vec::new();
            let mut uses = Vec::new();
            let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
            let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();

            // Try direct field names first (Rust `let_declaration`, Go `short_var_declaration`)
            let def_node = ast
                .child_by_field_name("pattern")
                .or_else(|| ast.child_by_field_name("name"))
                .or_else(|| ast.child_by_field_name("left"))
                // Python `with_item`: value is `as_pattern` whose `alias` holds the target
                .or_else(|| {
                    ast.child_by_field_name("value")
                        .and_then(|v| v.child_by_field_name("alias"))
                });

            let val_node = ast
                .child_by_field_name("value")
                .or_else(|| ast.child_by_field_name("right"));

            if def_node.is_some() || val_node.is_some() {
                if let Some(pat) = def_node {
                    let bindings = collect_array_pattern_bindings_indexed(pat, code);
                    if !bindings.is_empty() {
                        let mut iter = bindings.into_iter();
                        if let Some((first_name, first_idx)) = iter.next() {
                            defs = Some(first_name);
                            pattern_indices.push(first_idx);
                        }
                        for (name, idx) in iter {
                            extra_defs.push(name);
                            pattern_indices.push(idx);
                        }
                    } else {
                        let mut idents = Vec::new();
                        let mut paths = Vec::new();
                        collect_idents_with_paths(pat, code, &mut idents, &mut paths);
                        let first = paths.pop().or_else(|| idents.first().cloned());
                        // Remaining idents are extra defines (for destructuring)
                        for ident in &idents {
                            if first.as_ref() != Some(ident) {
                                extra_defs.push(ident.clone());
                            }
                        }
                        defs = first;
                    }
                }
                if let Some(val) = val_node {
                    let mut idents = Vec::new();
                    let mut paths = Vec::new();
                    collect_idents_with_paths(val, code, &mut idents, &mut paths);
                    uses.extend(paths);
                    uses.extend(idents);
                    // Rust format-string named-arg capture: `let q =
                    // format!("...{x}...")` reads `x`, but `x` lives in
                    // the format-string bytes, not as a separate AST
                    // argument node, so collect_idents misses it.
                    uses.extend(extract_rust_format_macro_named_idents_in(val, code));
                    // When the LHS is a recognised destructure pattern AND
                    // the RHS is a bare array-literal shape (no call), record
                    // per-element idents so the SSA destructure rewrite can
                    // map each binding to its specific RHS slot.
                    if !pattern_indices.is_empty() {
                        rhs_array_elements =
                            collect_rhs_array_literal_elements(val, lang, code, extra_labels);
                    }
                }
            } else {
                // Try nested declarator pattern (JS/TS `lexical_declaration` → `variable_declarator`,
                // Java `local_variable_declaration` → `variable_declarator`,
                // C/C++ `declaration` → `init_declarator`,
                // Python/Ruby `expression_statement` → `assignment`)
                let mut cursor = ast.walk();
                for child in ast.children(&mut cursor) {
                    // Only use left/right fields for actual assignment nodes, binary
                    // expressions also have left/right but are not definitions.
                    let is_assign = matches!(lookup(lang, child.kind()), Kind::Assignment);
                    let child_name = child
                        .child_by_field_name("name")
                        .or_else(|| child.child_by_field_name("declarator"))
                        .or_else(|| {
                            if is_assign {
                                child.child_by_field_name("left")
                            } else {
                                None
                            }
                        });
                    let child_value = child.child_by_field_name("value").or_else(|| {
                        if is_assign {
                            child.child_by_field_name("right")
                        } else {
                            None
                        }
                    });

                    // Only treat this child as a declarator if it has BOTH a name
                    // and a value (or at least a value). This prevents method_invocation
                    // nodes (which have a `name` field) from being misinterpreted.
                    if child_value.is_some() {
                        if let Some(name_node) = child_name
                            && defs.is_none()
                        {
                            let bindings = collect_array_pattern_bindings_indexed(name_node, code);
                            if !bindings.is_empty() {
                                let mut iter = bindings.into_iter();
                                if let Some((first_name, first_idx)) = iter.next() {
                                    defs = Some(first_name);
                                    pattern_indices.push(first_idx);
                                }
                                for (name, idx) in iter {
                                    extra_defs.push(name);
                                    pattern_indices.push(idx);
                                }
                            } else {
                                let mut idents = Vec::new();
                                let mut paths = Vec::new();
                                collect_idents_with_paths(name_node, code, &mut idents, &mut paths);
                                let first = paths.pop().or_else(|| idents.first().cloned());
                                for ident in &idents {
                                    if first.as_ref() != Some(ident) {
                                        extra_defs.push(ident.clone());
                                    }
                                }
                                defs = first;
                            }
                        }
                        if let Some(val_node) = child_value {
                            let mut idents = Vec::new();
                            let mut paths = Vec::new();
                            collect_idents_with_paths(val_node, code, &mut idents, &mut paths);
                            uses.extend(paths);
                            uses.extend(idents);
                            uses.extend(extract_rust_format_macro_named_idents_in(val_node, code));
                            if !pattern_indices.is_empty() && rhs_array_elements.is_empty() {
                                rhs_array_elements = collect_rhs_array_literal_elements(
                                    val_node,
                                    lang,
                                    code,
                                    extra_labels,
                                );
                            }
                        }
                    }
                }

                // Fallback: if still nothing found, collect all idents as uses.
                // This handles expression_statement wrappers.
                if defs.is_none() && uses.is_empty() {
                    let mut idents = Vec::new();
                    let mut paths = Vec::new();
                    collect_idents_with_paths(ast, code, &mut idents, &mut paths);
                    uses.extend(paths);
                    uses.extend(idents);
                    uses.extend(extract_rust_format_macro_named_idents_in(ast, code));
                }
            }
            (defs, uses, extra_defs, pattern_indices, rhs_array_elements)
        }

        // Plain assignment `x = y` or destructuring assignment such as
        // Python `a, b = await asyncio.gather(...)` whose LHS surfaces as
        // a `pattern_list` / `tuple_pattern`. When the LHS is a
        // destructure pattern that the indexed helper recognises, the
        // primary binding lands in `defs`, the rest land in `extra_defs`,
        // and `pattern_indices` carries source-order positions so the
        // SSA lowering's destructure-promise rewrite can paint each
        // binding from the matching combinator argument.
        Kind::Assignment => {
            let mut defs = None;
            let mut extra_defs = Vec::new();
            let mut pattern_indices: SmallVec<[usize; 4]> = SmallVec::new();
            let mut rhs_array_elements: SmallVec<[crate::cfg::RhsArraySlot; 4]> = SmallVec::new();
            let mut uses = Vec::new();
            if let Some(lhs) = ast.child_by_field_name("left") {
                let bindings = collect_array_pattern_bindings_indexed(lhs, code);
                if !bindings.is_empty() {
                    let mut iter = bindings.into_iter();
                    if let Some((first_name, first_idx)) = iter.next() {
                        defs = Some(first_name);
                        pattern_indices.push(first_idx);
                    }
                    for (name, idx) in iter {
                        extra_defs.push(name);
                        pattern_indices.push(idx);
                    }
                } else {
                    let mut idents = Vec::new();
                    let mut paths = Vec::new();
                    collect_idents_with_paths(lhs, code, &mut idents, &mut paths);
                    // Prefer dotted path (member expression) over last ident
                    defs = paths.pop().or_else(|| idents.pop());
                }
            }
            if let Some(rhs) = ast.child_by_field_name("right") {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(rhs, code, &mut idents, &mut paths);
                uses.extend(paths);
                uses.extend(idents);
                uses.extend(extract_rust_format_macro_named_idents_in(rhs, code));
                // When the LHS is a recognised destructure pattern AND the
                // RHS is a bare array-literal shape, record per-element
                // idents so the SSA destructure rewrite can map each
                // binding to its specific RHS slot.
                if !pattern_indices.is_empty() {
                    rhs_array_elements =
                        collect_rhs_array_literal_elements(rhs, lang, code, extra_labels);
                }
            }
            (defs, uses, extra_defs, pattern_indices, rhs_array_elements)
        }

        // if‑let / while‑let, the `let_condition` binds a variable from
        // the value expression.  E.g. `if let Ok(cmd) = env::var("CMD")`
        // defines `cmd` and uses `env`, `var`, `CMD`.
        Kind::If | Kind::While => {
            let cond = ast.child_by_field_name("condition");
            if let Some(c) = cond
                && c.kind() == "let_condition"
            {
                let mut defs = None;
                let mut uses = Vec::new();

                if let Some(pat) = c.child_by_field_name("pattern") {
                    let mut tmp = Vec::<String>::new();
                    collect_idents(pat, code, &mut tmp);
                    // The first plain identifier in the pattern is the binding.
                    // Skip type identifiers (e.g. "Ok" in Ok(cmd)), take the
                    // last ident which is the inner binding name.
                    defs = tmp.into_iter().last();
                }
                if let Some(val) = c.child_by_field_name("value") {
                    collect_idents(val, code, &mut uses);
                }
                return (defs, uses, vec![], SmallVec::new(), SmallVec::new());
            }

            let mut idents = Vec::new();
            let mut paths = Vec::new();
            collect_idents_with_paths(ast, code, &mut idents, &mut paths);
            let mut uses = paths;
            uses.extend(idents);
            (None, uses, vec![], SmallVec::new(), SmallVec::new())
        }

        // for-in / for-of / Python `for x in iter:` ─────────────────────────
        //
        // Tree-sitter classifies these as `Kind::For` with a `left`/`right`
        // field pair (binding pattern + iterable).  Without an explicit
        // arm here, the default branch collects every ident as a `use` and
        // never registers the iteration binding as a `define`, so taint
        // entering the iterable does not propagate into the body's
        // references to the binding (`for (const [a, b] of obj) { sink(a) }`
        // would lose the flow at `a`).
        //
        // C-style `for_statement` has no `left`/`right` fields (it uses
        // `initializer`/`condition`/`increment`), so this path falls through
        // to the default-collecting behaviour for those, preserving today's
        // semantics.
        //
        // Go's `for ident := range iter` shape places the binding pattern
        // and iterable on a `range_clause` child of the `for_statement`
        // rather than as direct fields.  Without the range_clause lookup
        // below, taint from the iterable never reaches the loop binding
        // (CVE-2026-41422 daptin: `c.QueryArray("col")` loop var `project`
        // flows into `goqu.L(project)` SQL_QUERY sink).
        Kind::For => {
            let mut left = ast.child_by_field_name("left");
            let mut right = ast.child_by_field_name("right");
            if left.is_none() && right.is_none() {
                let mut cursor = ast.walk();
                for child in ast.children(&mut cursor) {
                    if child.kind() == "range_clause" {
                        left = child.child_by_field_name("left");
                        right = child.child_by_field_name("right");
                        break;
                    }
                }
            }
            if left.is_none() && right.is_none() {
                // C-style for, defer to default ident collection.
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(ast, code, &mut idents, &mut paths);
                let mut uses = paths;
                uses.extend(idents);
                return (None, uses, vec![], SmallVec::new(), SmallVec::new());
            }

            let mut defs: Option<String> = None;
            let mut extra_defs: Vec<String> = Vec::new();
            let mut uses: Vec<String> = Vec::new();

            if let Some(pat) = left {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(pat, code, &mut idents, &mut paths);
                let first = paths.pop().or_else(|| idents.first().cloned());
                for ident in &idents {
                    if first.as_ref() != Some(ident) {
                        extra_defs.push(ident.clone());
                    }
                }
                defs = first;
            }
            if let Some(val) = right {
                let mut idents = Vec::new();
                let mut paths = Vec::new();
                collect_idents_with_paths(val, code, &mut idents, &mut paths);
                uses.extend(paths);
                uses.extend(idents);
            }
            (defs, uses, extra_defs, SmallVec::new(), SmallVec::new())
        }

        // everything else – no definition, but may read vars
        _ => {
            let mut idents = Vec::new();
            let mut paths = Vec::new();
            collect_idents_with_paths(ast, code, &mut idents, &mut paths);
            let mut uses = paths;
            uses.extend(idents);
            (None, uses, vec![], SmallVec::new(), SmallVec::new())
        }
    }
}

/// One match from [`extract_shell_array_payload_idents`].
///
/// `arg_position` is the positional argument index of the call where the
/// shell-array literal was found.  `payload_idents` is the union of
/// identifiers (and dotted paths) lifted from the array's payload elements
/// (positions 2+ for POSIX `sh -c <cmd>` form; positions 2+ for `cmd /c <cmd>`
/// likewise).  Empty `payload_idents` means the payload is a constant string,
/// which the caller should treat as benign (no SHELL_ESCAPE finding possible).
#[derive(Debug, Clone)]
pub(super) struct ShellArrayMatch {
    pub arg_position: usize,
    pub payload_idents: Vec<String>,
}

/// Detect inline shell-execution array literals at a call site.
///
/// Recognises the pattern `[<shell>, "-c", <payload>]` (POSIX shells) and
/// `[<cmd-shell>, "/c"|"/C", <payload>]` (Windows `cmd.exe`) appearing as
/// either:
///   * a direct positional argument of `call_node`, or
///   * the value of any field within an object-literal positional argument
///     (covers `container.exec({Cmd: ["bash", "-c", x]})` form).
///
/// Returns one [`ShellArrayMatch`] per detected shell-array.  Empty when the
/// call has no shell-array literals.
///
/// The shell-name list is intentionally narrow (POSIX shells + Windows
/// `cmd.exe`/`powershell`) to avoid false positives on benign array literals
/// like `["ls", "-la"]` or `["git", "rev-parse", "HEAD"]`, where element 0 is
/// not a shell.  Element 1 must be a literal `-c` (POSIX) or `/c`/`/C` (cmd);
/// otherwise the array is not in shell-exec form regardless of element 0.
///
/// Identifiers from elements at positions 2+ are lifted via
/// [`collect_idents_with_paths`] so template-literal interpolations
/// (`` `echo ${x}` ``), member-expressions (`obj.field`), and bare idents are
/// all captured.  Dedup is preserved across array elements so a single ident
/// referenced in multiple payload positions appears once.
pub(super) fn extract_shell_array_payload_idents(
    call_node: Node,
    code: &[u8],
) -> Vec<ShellArrayMatch> {
    let mut out = Vec::new();
    let Some(args_node) = call_node.child_by_field_name("arguments") else {
        return out;
    };
    let mut cursor = args_node.walk();
    for (idx, child) in args_node.named_children(&mut cursor).enumerate() {
        let kind = child.kind();
        // Splats break positional indexing; bail conservatively on the whole call.
        if kind == "spread_element"
            || kind == "dictionary_splat"
            || kind == "list_splat"
            || kind == "splat_argument"
            || kind == "hash_splat_argument"
        {
            return Vec::new();
        }
        if kind == "keyword_argument" || kind == "named_argument" {
            continue;
        }

        // Direct array-literal arg.
        if let Some(idents) = shell_array_payload_idents_of(child, code) {
            out.push(ShellArrayMatch {
                arg_position: idx,
                payload_idents: idents,
            });
            continue;
        }

        // Object-literal arg whose field value is a shell-array literal.
        // Covers `container.exec({Cmd: [...]})` form.  Field name is not
        // restricted to `Cmd` / `cmd`: the shell-shape itself is the gate,
        // and the payload extraction is per-array.
        if matches!(kind, "object" | "dictionary") {
            let mut cc = child.walk();
            for pair in child.named_children(&mut cc) {
                if pair.kind() != "pair" {
                    continue;
                }
                let Some(val_node) = pair.child_by_field_name("value") else {
                    continue;
                };
                let val_node = unwrap_parens(val_node);
                if let Some(idents) = shell_array_payload_idents_of(val_node, code) {
                    out.push(ShellArrayMatch {
                        arg_position: idx,
                        payload_idents: idents,
                    });
                }
            }
        }
    }
    out
}

/// If `node` is an array literal of shape `[<shell>, "-c", *]` (POSIX shells)
/// or `[<cmd-shell>, "/c", *]` (Windows cmd.exe), return the identifiers
/// referenced in the payload elements (positions 2+).  Otherwise return
/// `None`.  Returning `Some(vec![])` means the payload is a constant string
/// — caller should still skip emitting a sink (no taint can reach a literal).
fn shell_array_payload_idents_of(node: Node, code: &[u8]) -> Option<Vec<String>> {
    let node = unwrap_parens(node);
    if node.kind() != "array" {
        return None;
    }
    // Walk named children to skip commas and other trivia.
    let mut cursor = node.walk();
    let elems: Vec<Node> = node.named_children(&mut cursor).collect();
    if elems.len() < 3 {
        return None;
    }
    let shell = const_string_value(elems[0], code)?;
    if !is_known_shell(&shell) {
        return None;
    }
    let flag = const_string_value(elems[1], code)?;
    if !is_shell_command_flag(&shell, &flag) {
        return None;
    }
    // Lift identifiers from the payload elements (positions 2+).  Constants
    // contribute nothing.  An empty result means the entire payload is
    // statically benign.
    let mut idents: Vec<String> = Vec::new();
    let mut paths: Vec<String> = Vec::new();
    for elem in &elems[2..] {
        collect_idents_with_paths(*elem, code, &mut idents, &mut paths);
    }
    let mut combined = paths;
    combined.extend(idents);
    // Dedup (preserve first-seen order).
    let mut seen = std::collections::HashSet::new();
    combined.retain(|s| seen.insert(s.clone()));
    if combined.is_empty() {
        // Static payload — no taint can reach it. Return None so the caller
        // does not emit a useless sink filter.
        return None;
    }
    Some(combined)
}

/// Extract a constant string value from `node`, handling JS/TS `string` /
/// `template_string` (no interpolation) forms.  Returns `None` for dynamic
/// values, identifiers, or expressions.
fn const_string_value(node: Node, code: &[u8]) -> Option<String> {
    let node = unwrap_parens(node);
    match node.kind() {
        "string" | "string_literal" | "interpreted_string_literal" | "raw_string_literal" => {
            let raw = text_of(node, code)?;
            if raw.len() >= 2 {
                Some(raw[1..raw.len() - 1].to_string())
            } else {
                None
            }
        }
        "template_string" => {
            let mut c = node.walk();
            if node
                .named_children(&mut c)
                .any(|ch| ch.kind() == "template_substitution")
            {
                return None;
            }
            let raw = text_of(node, code)?;
            if raw.len() >= 2 {
                Some(raw[1..raw.len() - 1].to_string())
            } else {
                None
            }
        }
        _ => None,
    }
}

/// Known shell executable names that activate the shell-array detector.
/// Scoped narrowly to POSIX shells + Windows command interpreters, listing
/// only canonical names so benign arrays like `["ls", ...]`, `["git", ...]`,
/// or `["python", ...]` do not match.
fn is_known_shell(name: &str) -> bool {
    // Strip directory prefix for matching: `/bin/bash` → `bash`.
    let leaf = name.rsplit('/').next().unwrap_or(name);
    matches!(
        leaf,
        "bash"
            | "sh"
            | "zsh"
            | "dash"
            | "ksh"
            | "fish"
            | "ash"
            | "tcsh"
            | "csh"
            | "cmd"
            | "cmd.exe"
            | "powershell"
            | "powershell.exe"
            | "pwsh"
            | "pwsh.exe"
    )
}

/// True when `flag` is the "execute the following string as a shell command"
/// switch for the given `shell`.  POSIX shells use `-c`; cmd.exe accepts
/// `/c` / `/C`; PowerShell uses `-Command` (also `-c` as alias) and
/// `-EncodedCommand`.
fn is_shell_command_flag(shell: &str, flag: &str) -> bool {
    let leaf = shell.rsplit('/').next().unwrap_or(shell);
    let is_cmd = matches!(leaf, "cmd" | "cmd.exe");
    let is_powershell = matches!(leaf, "powershell" | "powershell.exe" | "pwsh" | "pwsh.exe");
    if is_cmd {
        return matches!(flag, "/c" | "/C" | "/k" | "/K");
    }
    if is_powershell {
        return matches!(
            flag,
            "-c" | "-Command" | "-command" | "-EncodedCommand" | "-encodedcommand"
        );
    }
    // POSIX shells.
    flag == "-c"
}