nyx/src/ast.rs

//! Tree-sitter parsing and two-pass analysis for all supported languages.
//!
//! The core type is `ParsedSource`, a thin wrapper around a parsed tree-sitter
//! tree that carries the source bytes and language. Parsing reuses a thread-local
//! [`tree_sitter::Parser`] so each worker thread keeps one live parser instance.
//!
//! ## Two-pass pipeline
//!
//! **Pass 1** (`extract_summaries_from_file`): builds the CFG, lowers to SSA,
//! and extracts a [`crate::summary::FuncSummary`] per function. Summaries
//! describe boundary behaviour: which arguments flow to sinks, which sources
//! the function reads, what taint it strips, and what it returns.
//!
//! **Pass 2** (`run_rules_on_file`): reanalyses each file with the merged
//! [`crate::summary::GlobalSummaries`] from pass 1. The taint engine runs a
//! forward dataflow worklist over SSA, resolving cross-file calls via summaries.
//!
//! Parse timeouts are tracked per-thread via [`take_last_parse_timeout_ms`]
//! so callers can surface the event as an informational diagnostic instead
//! of silently skipping the file.

#![allow(clippy::only_used_in_recursion, clippy::type_complexity)]

use crate::auth_analysis;
use crate::cfg::{Cfg, FileCfg, FuncSummaries, build_cfg, export_summaries};
use crate::cfg_analysis;
use crate::commands::scan::Diag;
use crate::errors::{NyxError, NyxResult};
use crate::evidence::{Evidence, FlowStep, SpanEvidence, StateEvidence};
use crate::labels::{
    Cap, DataLabel, LangAnalysisRules, build_lang_rules, severity_for_source_kind,
};
use crate::patterns::{FindingCategory, PatternCategory, Severity};
use crate::state;
use crate::summary::ssa_summary::SsaFuncSummary;
use crate::summary::{FuncSummary, GlobalSummaries};
use crate::symbol::Lang;
use crate::utils::config::AnalysisMode;
use crate::utils::ext::lowercase_ext;
use crate::utils::{Config, query_cache};
use petgraph::graph::NodeIndex;
use std::borrow::Cow;
use std::cell::{OnceCell, RefCell};
use std::collections::{HashMap, HashSet};
use std::ops::ControlFlow;
use std::path::Path;
use std::time::Instant;
use tree_sitter::{Language, QueryCursor, StreamingIterator};

thread_local! {
    static PARSER: RefCell<tree_sitter::Parser> = RefCell::new(tree_sitter::Parser::new());
    /// Records the timeout budget (in ms) when a tree-sitter parse is
    /// aborted due to [`parse_timeout_ms`].  Callers that want to surface
    /// the event as a synthetic informational [`Diag`] read this slot
    /// immediately after [`ParsedSource::try_new`] returns `Ok(None)`
    /// and clear it with `take_last_parse_timeout_ms`.
    static LAST_PARSE_TIMEOUT_MS: std::cell::Cell<Option<u64>> = const {
        std::cell::Cell::new(None)
    };
}

/// Consume and return the most recent parse-timeout event on this thread
/// (set by `ParsedSource::try_new`).  Used to lift the event into a
/// synthetic [`Diag`] carrying an [`crate::engine_notes::EngineNote::ParseTimeout`].
pub fn take_last_parse_timeout_ms() -> Option<u64> {
    LAST_PARSE_TIMEOUT_MS.with(|c| c.take())
}

/// Synthesize an informational diagnostic surfacing a parse-timeout event
/// for `path`.  The diag carries an [`crate::engine_notes::EngineNote::ParseTimeout`]
/// in its evidence so downstream tooling can distinguish "found nothing"
/// from "parse was aborted before we could look".
fn parse_timeout_diag(path: &Path, timeout_ms: u64) -> Diag {
    let mut evidence = Evidence::default();
    evidence.notes.push(format!(
        "tree-sitter parse exceeded timeout budget ({timeout_ms} ms); file skipped"
    ));
    evidence
        .engine_notes
        .push(crate::engine_notes::EngineNote::ParseTimeout {
            timeout_ms: timeout_ms.min(u32::MAX as u64) as u32,
        });
    Diag {
        path: path.to_string_lossy().into_owned(),
        line: 0,
        col: 0,
        severity: Severity::Low,
        id: "engine.parse_timeout".into(),
        category: FindingCategory::Quality,
        path_validated: false,
        guard_kind: None,
        message: Some(format!(
            "tree-sitter parse exceeded timeout budget ({timeout_ms} ms); file skipped"
        )),
        labels: vec![],
        confidence: None,
        evidence: Some(evidence),
        rank_score: None,
        rank_reason: None,
        suppressed: false,
        suppression: None,
        rollup: None,
        finding_id: String::new(),
        alternative_finding_ids: Vec::new(),
        stable_hash: 0,
    }
}

/// Resolve the effective parse-timeout budget in milliseconds.  Tree-sitter
/// is generally fast, but adversarially-crafted inputs (deeply ambiguous
/// grammar constructs, pathological backtracking) can drive it into slow
/// parses; the default 10 s ceiling lets a 10 000-file scan survive even if
/// every file is hostile.  Configured via `analysis.engine.parse_timeout_ms`
/// in `nyx.conf` (or `--parse-timeout-ms` on the CLI); `0` disables the cap.
fn parse_timeout_ms() -> u64 {
    crate::utils::analysis_options::current().parse_timeout_ms
}

/// Test-only: when the `NYX_TEST_FORCE_PANIC_PATH` env var is set, any file
/// path containing that substring triggers a deterministic panic here.  Used
/// by `tests/panic_recovery_tests.rs` to exercise per-file panic behaviour in
/// the scan pipeline.  The env var is re-read each call so successive tests
/// in the same process can toggle injection; `std::env::var` is an in-memory
/// lookup on supported platforms so the overhead is negligible.
fn maybe_inject_test_panic(path: &Path) {
    if let Ok(marker) = std::env::var("NYX_TEST_FORCE_PANIC_PATH")
        && !marker.is_empty()
        && path.to_string_lossy().contains(marker.as_str())
    {
        panic!(
            "NYX_TEST_FORCE_PANIC_PATH injection: {} matches {:?}",
            path.display(),
            marker
        );
    }
}

/// Convenience alias for node indices.
fn byte_offset_to_point(tree: &tree_sitter::Tree, byte: usize) -> tree_sitter::Point {
    tree.root_node()
        .descendant_for_byte_range(byte, byte)
        .map(|n| n.start_position())
        .unwrap_or_else(|| tree_sitter::Point { row: 0, column: 0 })
}

use crate::utils::snippet::line_snippet as extract_line_snippet;

/// Resolve a `file_rel` (relative to `scan_root` per
/// [`normalize_namespace`] convention) back to the absolute path the
/// diagnostic pipeline expects.
///
/// * Empty `file_rel`, single-file scans normalize every namespace to
///   `""`; treat that as "the file under analysis" and return
///   `fallback.to_string_lossy()`.
/// * `scan_root` absent, we have no workspace root to resolve against;
///   return `file_rel` verbatim (it may already be absolute).
/// * Otherwise, join `scan_root` with `file_rel`.
fn resolve_file_rel(file_rel: &str, scan_root: Option<&Path>, fallback: &Path) -> String {
    if file_rel.is_empty() {
        return fallback.to_string_lossy().into_owned();
    }
    match scan_root {
        Some(root) => root.join(file_rel).to_string_lossy().into_owned(),
        None => file_rel.to_string(),
    }
}

/// Build a [`Diag`] from a taint [`Finding`], the CFG that produced it,
/// the parsed tree (for byte→line/col conversion) and the file path.
///
/// Returns `None` when source-sensitivity gating fully suppresses the
/// finding (the canonical case is a multi-gate `DATA_EXFIL` event whose
/// contributing source is plain user input — see the
/// `effective_caps` strip below).
fn build_taint_diag(
    finding: &crate::taint::Finding,
    cfg_graph: &crate::cfg::Cfg,
    tree: &tree_sitter::Tree,
    path: &Path,
    src: &[u8],
    scan_root: Option<&Path>,
) -> Option<Diag> {
    let call_site_byte = cfg_graph[finding.sink].classification_span().0;
    let call_site_point = byte_offset_to_point(tree, call_site_byte);
    // `finding.source` should be a NodeIndex valid in this body's CFG, but
    // cross-body / cross-file inline analysis has historically leaked
    // callee-NodeIndex origins (see `extract_inline_return_taint`).  Guard
    // the lookup so a stray out-of-bounds index degrades the diagnostic
    // rather than panicking the worker thread.
    let source_info = cfg_graph.node_weight(finding.source);
    // The reconstructed flow path is the authoritative view of where the
    // taint started *in this body*. When present, prefer its first step's
    // CFG span over `finding.source_span`, which can be stale across
    // multi-hop cross-body remaps (e.g. JS two-level solve where a
    // callee-interior source gets its span rewritten to the enclosing
    // body's entry node). Fall back to `source_span`, then to the source
    // NodeIndex, then finally to the sink byte.
    let source_byte = finding
        .flow_steps
        .first()
        .and_then(|s| {
            cfg_graph
                .node_weight(s.cfg_node)
                .map(|i| i.classification_span().0)
        })
        .or(finding.source_span)
        .or_else(|| source_info.map(|i| i.classification_span().0))
        .unwrap_or(call_site_byte);
    let source_point = byte_offset_to_point(tree, source_byte);

    // Prefer the source CFG node's callee string when it's a call expression
    // (e.g. `os.getenv("X")`). For property-access sources like
    // `navigator.userAgent` there is no callee, fall back to the first flow
    // step's `variable` (the SSA var name, e.g. "userAgent"), then to the
    // source node's `taint.defines` / first `taint.uses` entry, before
    // finally giving up and rendering "(unknown)".
    let source_callee = source_info
        .and_then(|i| i.call.callee.as_deref())
        .map(sanitize_desc)
        .or_else(|| {
            finding
                .flow_steps
                .first()
                .and_then(|s| s.var_name.as_deref())
                .map(sanitize_desc)
        })
        .or_else(|| {
            source_info
                .and_then(|i| i.taint.defines.as_deref())
                .map(sanitize_desc)
        })
        .or_else(|| {
            source_info
                .and_then(|i| i.taint.uses.first().map(String::as_str))
                .map(sanitize_desc)
        })
        .unwrap_or_else(|| "(unknown)".into());
    // Sink-callee attribution: when the sink node is an *argument* of a call
    // (e.g. PHP `header("location: " . $_GET['x'])` — the `$_GET[...]` subscript
    // carries `callee = "$_GET"` but `outer_callee = "header"`), the enclosing
    // call is the real sink and should be displayed, not the source token.
    // `outer_callee` is only populated for nested/argument positions, so for a
    // plain call node it is None and we fall back to the node's own callee.
    let call_site_callee = cfg_graph[finding.sink]
        .call
        .outer_callee
        .as_deref()
        .or(cfg_graph[finding.sink].call.callee.as_deref())
        .map(sanitize_desc)
        .unwrap_or_else(|| "(unknown)".into());
    let kind_label = source_kind_label(finding.source_kind);

    let file_path_owned = path.to_string_lossy().into_owned();

    // Primary-location attribution: when the sink was resolved via a
    // callee summary that carried a [`SinkSite`], `finding.primary_location`
    // names the dangerous instruction inside the callee body.  Use those
    // coordinates as the diag's primary (file, line, col); otherwise fall
    // back to the caller's call-site position.
    let (primary_path, primary_line, primary_col, primary_snippet_hint) =
        if let Some(loc) = finding.primary_location.as_ref() {
            let abs = resolve_file_rel(&loc.file_rel, scan_root, path);
            if abs != file_path_owned {
                tracing::debug!(
                    caller_file = %file_path_owned,
                    primary_file = %abs,
                    primary_line = loc.line,
                    "taint finding attributed to a cross-file primary sink location",
                );
            }
            let snippet = if loc.snippet.is_empty() {
                None
            } else {
                Some(loc.snippet.clone())
            };
            (abs, loc.line as usize, loc.col as usize, snippet)
        } else {
            (
                file_path_owned.clone(),
                call_site_point.row + 1,
                call_site_point.column + 1,
                None,
            )
        };

    let short_source = crate::fmt::shorten_callee(&source_callee);
    let short_call_site = crate::fmt::shorten_callee(&call_site_callee);
    let sink_display = primary_snippet_hint
        .as_deref()
        .map(crate::fmt::shorten_callee)
        .unwrap_or_else(|| short_call_site.clone());
    let sink_label_display = if finding.primary_location.is_some() {
        format!("{call_site_callee} \u{2192} {sink_display}")
    } else {
        call_site_callee.clone()
    };

    let mut labels = vec![
        (
            "Source".into(),
            format!(
                "{source_callee} ({}:{})",
                source_point.row + 1,
                source_point.column + 1
            ),
        ),
        ("Sink".into(), sink_label_display),
    ];
    if let Some(guard) = finding.guard_kind {
        labels.push(("Path guard".into(), format!("{guard:?}")));
    }

    let mut evidence_notes = Vec::new();
    if finding.path_validated {
        evidence_notes.push("path_validated".into());
    }
    evidence_notes.push(format!("source_kind:{:?}", finding.source_kind));
    evidence_notes.push(format!("hop_count:{}", finding.hop_count));
    evidence_notes.push(format!("cap_specificity:{}", finding.cap_specificity));
    if finding.uses_summary {
        evidence_notes.push("uses_summary".into());
    }

    // Convert raw flow steps to display FlowSteps.  When the finding has a
    // primary_location distinct from the call site, the last raw step is
    // really the Call, reclassify it and append a synthetic Sink step
    // pointing at the callee-internal dangerous instruction so analysts
    // see both the call site and the final sink in the trace.
    let mut flow_steps: Vec<FlowStep> = finding
        .flow_steps
        .iter()
        .enumerate()
        .map(|(i, raw)| {
            let step_byte = cfg_graph[raw.cfg_node].classification_span().0;
            let point = byte_offset_to_point(tree, step_byte);
            let snippet = extract_line_snippet(src, step_byte);
            let callee = cfg_graph[raw.cfg_node].call.callee.clone();
            let function = cfg_graph[raw.cfg_node].ast.enclosing_func.clone();
            FlowStep {
                step: (i + 1) as u32,
                kind: raw.op_kind.clone(),
                file: file_path_owned.clone(),
                line: (point.row + 1) as u32,
                col: (point.column + 1) as u32,
                snippet,
                variable: raw.var_name.clone(),
                callee,
                function,
                is_cross_file: false,
            }
        })
        .collect();

    if let Some(loc) = finding.primary_location.as_ref() {
        if let Some(last) = flow_steps.last_mut()
            && matches!(last.kind, crate::evidence::FlowStepKind::Sink)
        {
            last.kind = crate::evidence::FlowStepKind::Call;
        }
        let is_cross_file = primary_path != file_path_owned;
        let synthetic_snippet = if loc.snippet.is_empty() {
            None
        } else {
            Some(loc.snippet.clone())
        };
        let next_step = (flow_steps.len() + 1) as u32;
        flow_steps.push(FlowStep {
            step: next_step,
            kind: crate::evidence::FlowStepKind::Sink,
            file: primary_path.clone(),
            line: loc.line,
            col: loc.col,
            snippet: synthetic_snippet,
            variable: None,
            callee: None,
            function: None,
            is_cross_file,
        });
    }

    let sink_evidence_snippet = primary_snippet_hint.or(Some(short_call_site));

    // Resolved sink capability bits, used by deduplication to distinguish
    // sinks with different cap types on the same source line (e.g.
    // `sink_sql(x); sink_shell(x);`).
    //
    // Prefer the per-finding `effective_sink_caps` (set by the SSA dispatch
    // when receiver-type qualification, gated rules, or other late-binding
    // resolvers contribute caps that the CFG node's static labels do not
    // carry).  Fall back to the union of `Sink(cap)` labels on the CFG
    // node when the SSA dispatch did not narrow.
    let sink_caps_bits: u32 = if !finding.effective_sink_caps.is_empty() {
        finding.effective_sink_caps.bits()
    } else {
        cfg_graph[finding.sink]
            .taint
            .labels
            .iter()
            .filter_map(|l| match l {
                crate::labels::DataLabel::Sink(c) => Some(c.bits()),
                _ => None,
            })
            .fold(0u32, |acc, b| acc | b)
    };

    // Cap-specific rule-id routing.
    //
    // 1. `UNAUTHORIZED_ID`: namespace alongside the standalone `auth_analysis`
    //    subsystem's output so cross-tool aggregation lines up.
    // 2. `DATA_EXFIL`: route to `taint-data-exfiltration` so SARIF surfaces a
    //    distinct rule id from SSRF, the two share callees (e.g. `fetch`)
    //    but represent different vulnerability classes.
    //
    // Prefer the per-finding `effective_sink_caps` (set by the multi-gate
    // SSA dispatch) when populated; fall back to the union of all sink-label
    // caps on the CFG node so legacy paths that build findings without
    // setting `effective_sink_caps` still pick the right rule id.
    let mut effective_caps = if finding.effective_sink_caps.is_empty() {
        crate::labels::Cap::from_bits_truncate(sink_caps_bits)
    } else {
        finding.effective_sink_caps
    };

    // Source-sensitivity gate for `DATA_EXFIL`.  Plain attacker input echoed
    // back into an outbound request body / headers / json is not data
    // exfiltration, the user already controls the value, surfacing it as a
    // leak is noise (the canonical false-positive class for API gateways
    // and telemetry forwarders that proxy `req.body`).  A `DATA_EXFIL`
    // finding requires the contributing source to be at least `Sensitive`
    // (cookies, headers, env, db rows, file reads).  Plain user-input
    // sources have the cap stripped so the finding either drops entirely
    // or downgrades to whatever non-`DATA_EXFIL` cap also applies (e.g.
    // SSRF on the URL position of the same `fetch` call).
    if effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
        && finding.source_kind.sensitivity() < crate::labels::Sensitivity::Sensitive
    {
        effective_caps.remove(crate::labels::Cap::DATA_EXFIL);
        // The multi-gate dispatch produces one finding per (source, sink-cap)
        // pair, a body-flow finding's `effective_sink_caps` is exactly the
        // cap that fired (e.g. `DATA_EXFIL`).  When that single cap is the
        // sensitivity-stripped one, the finding has no surviving rationale
        // and we drop it entirely rather than reroute it to the generic
        // `taint-unsanitised-flow` bucket (which would just re-emit the same
        // false positive under a different rule id).  Findings with a
        // multi-cap `effective_sink_caps` keep their non-DATA_EXFIL caps and
        // are routed normally below.
        if finding.effective_sink_caps == crate::labels::Cap::DATA_EXFIL {
            return None;
        }
    }

    // DATA_EXFIL routing.
    //
    // Multi-gate dispatch (JS / Go) emits one event per cap, so by this
    // point each finding's `effective_sink_caps` carries exactly one bit
    // and the simple `DATA_EXFIL && !SSRF` test routes correctly.  Flat-
    // rule paths (Java HTTP clients where type-qualified resolution
    // attaches both `SSRF` and `DATA_EXFIL` Sink labels to the same call,
    // e.g. `client.send(req)` covering both URL and body channels of the
    // request value) produce a single dual-cap event.  Disambiguate using
    // the flow path: when a body-bind verb (`.body(`, `.json(`, `.form(`,
    // `.multipart(`, `BodyPublishers`, `setEntity`, `bodyValue`, etc.)
    // appears anywhere in the SSA flow steps or the sink chain text, the
    // taint reached an outbound payload field, route to DATA_EXFIL.  When
    // no body-bind verb is on the path (Sensitive-tier source flowing
    // straight into the URL position via `.get`/`.post`/`.send`), this is
    // a real SSRF and routes to taint-unsanitised-flow regardless of
    // source sensitivity.  Source sensitivity is still required for the
    // DATA_EXFIL route, plain user input echoed into a request body is
    // not exfiltration.
    let flow_has_body_bind = {
        let body_bind_substrings = [
            ".body(",
            ".json(",
            ".form(",
            ".multipart(",
            ".bodyvalue(",
            ".setentity(",
            "bodypublishers",
            "body_string",
            "body_json",
            "body_bytes",
            "send_string",
            "send_json",
            "send_form",
            // Spring RestTemplate one-shot verbs that take a body argument
            // inline (no separate `BodyPublishers` / `setEntity` step in the
            // chain).  Method-name suffixes are unique enough that bare
            // substring matching is safe.
            "postforobject",
            "postforentity",
            "patchforobject",
        ];
        let chain_lower = call_site_callee.to_ascii_lowercase();
        let in_sink = body_bind_substrings.iter().any(|m| chain_lower.contains(m));
        let in_steps = finding.flow_steps.iter().any(|step| {
            cfg_graph[step.cfg_node]
                .call
                .callee
                .as_deref()
                .map(|c| {
                    let lc = c.to_ascii_lowercase();
                    body_bind_substrings.iter().any(|m| lc.contains(m))
                })
                .unwrap_or(false)
        });
        in_sink || in_steps
    };
    // Java HTTP-client builder pattern hides the body-bind step inside a
    // builder chain whose intermediate calls collapse to `HttpRequest.build`
    // in the flow.  When the source is unambiguously credential-bearing
    // (cookies, session attributes, caught exceptions carrying stack
    // frames) and the sink fires DATA_EXFIL, treat that as exfil even
    // when no body-bind verb is visible in the flow.  Env vars stay
    // ambiguous (they often carry URL config) so they still require an
    // explicit body-bind hit on the path.
    let source_is_credential_bearing = matches!(
        finding.source_kind,
        crate::labels::SourceKind::Cookie | crate::labels::SourceKind::CaughtException
    );
    let is_data_exfil_rule = effective_caps.contains(crate::labels::Cap::DATA_EXFIL)
        && !effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID)
        && (!effective_caps.contains(crate::labels::Cap::SSRF)
            || (finding.source_kind.sensitivity() >= crate::labels::Sensitivity::Sensitive
                && (flow_has_body_bind || source_is_credential_bearing)));

    // Cap-specific rule routing.  Auth-as-taint and data-exfil keep their
    // pre-existing branches so the routing rules they encode (auth-finding
    // namespace alignment; body-bind / source-sensitivity gate) stay
    // exactly as before.  New cap classes (LDAP / XPath / Header / Open
    // redirect / SSTI / XXE / Prototype pollution) route through
    // `cap_rule_meta()` so the canonical rule ids in the registry are the
    // single source of truth.  Legacy generic taint findings continue to
    // emit `taint-unsanitised-flow`.
    let diag_id = if effective_caps.contains(crate::labels::Cap::UNAUTHORIZED_ID) {
        "rs.auth.missing_ownership_check.taint".to_string()
    } else if is_data_exfil_rule {
        format!(
            "taint-data-exfiltration (source {}:{})",
            source_point.row + 1,
            source_point.column + 1
        )
    } else if let Some(meta) = [
        crate::labels::Cap::LDAP_INJECTION,
        crate::labels::Cap::XPATH_INJECTION,
        crate::labels::Cap::HEADER_INJECTION,
        crate::labels::Cap::OPEN_REDIRECT,
        crate::labels::Cap::SSTI,
        crate::labels::Cap::XXE,
        crate::labels::Cap::PROTOTYPE_POLLUTION,
    ]
    .iter()
    .find(|c| effective_caps.contains(**c))
    .and_then(|c| crate::labels::cap_rule_meta(*c))
    {
        format!(
            "{} (source {}:{})",
            meta.rule_id,
            source_point.row + 1,
            source_point.column + 1
        )
    } else {
        format!(
            "taint-unsanitised-flow (source {}:{})",
            source_point.row + 1,
            source_point.column + 1
        )
    };

    // For `DATA_EXFIL` rules, look up which destination object-literal field
    // (`body` / `headers` / `json`) the tainted value reached.  Each
    // [`crate::cfg::GateFilter`] carries `destination_uses` (var names) in
    // parallel with `destination_fields` (the field each var was bound to),
    // so we walk the gate filter whose `label_caps` includes `DATA_EXFIL`
    // and match the tainted var name from the last flow step.  Falls back
    // to the first non-empty destination field on the matching filter when
    // the var-name match fails (e.g. the SSA sink event is reported on a
    // copy-propagated value whose name no longer matches the original
    // destination ident).  `None` when the sink wasn't a destination-aware
    // gate (no object literal, or non-fetch sink).
    let data_exfil_field: Option<String> = if is_data_exfil_rule {
        let last_var = finding
            .flow_steps
            .last()
            .and_then(|s| s.var_name.as_deref());
        let filters = &cfg_graph[finding.sink].call.gate_filters;
        filters
            .iter()
            .find(|f| f.label_caps.contains(crate::labels::Cap::DATA_EXFIL))
            .and_then(|f| {
                if let (Some(uses), Some(var)) = (f.destination_uses.as_ref(), last_var)
                    && let Some(idx) = uses.iter().position(|u| u == var)
                {
                    return f.destination_fields.get(idx).cloned();
                }
                f.destination_fields.first().cloned()
            })
    } else {
        None
    };

    // DATA_EXFIL severity calibration (Phase: detector ranking).
    //
    // Generic taint severity comes from `severity_for_source_kind`, which
    // maps Cookie/Header/Env to High because those sources are spicy
    // *as taint roots*.  For `DATA_EXFIL` we are scoring the leak class,
    // not the source itself: not every Sensitive-tier source is a Secret.
    // Cookies and env carry credential / session material whose leakage
    // is an immediate disclosure (Secret-tier); request headers, file
    // reads, db rows, and caught exceptions are Sensitive but not
    // automatically secret, so they downgrade to Medium.  Plain user
    // input is already stripped above by the source-sensitivity gate, so
    // the `_` arm here is reached only by Sensitive sources that are not
    // explicit secrets.
    let severity = if is_data_exfil_rule {
        match finding.source_kind {
            crate::labels::SourceKind::Cookie | crate::labels::SourceKind::EnvironmentConfig => {
                crate::patterns::Severity::High
            }
            _ => crate::patterns::Severity::Medium,
        }
    } else if let Some(meta) = [
        crate::labels::Cap::LDAP_INJECTION,
        crate::labels::Cap::XPATH_INJECTION,
        crate::labels::Cap::HEADER_INJECTION,
        crate::labels::Cap::OPEN_REDIRECT,
        crate::labels::Cap::SSTI,
        crate::labels::Cap::XXE,
        crate::labels::Cap::PROTOTYPE_POLLUTION,
    ]
    .iter()
    .find(|c| effective_caps.contains(**c))
    .and_then(|c| crate::labels::cap_rule_meta(*c))
    {
        // New cap classes draw severity from the rule registry so a single
        // edit to `CAP_RULE_REGISTRY` cascades through SARIF, the dashboard,
        // and the integration suite without per-language source-kind nudges.
        meta.severity
    } else {
        severity_for_source_kind(finding.source_kind)
    };

    // DATA_EXFIL: surface the destination field in the message so analysts
    // see at a glance whether the leak reached the request body, headers,
    // or json payload.  Generic taint findings stay on the existing
    // "unsanitised … flows from … → …" template.
    let message = if is_data_exfil_rule {
        let suffix = data_exfil_field
            .as_deref()
            .map(|f| format!(" ({f} field)"))
            .unwrap_or_default();
        format!("sensitive data flows from {short_source} \u{2192} {sink_display}{suffix}")
    } else {
        format!("unsanitised {kind_label} flows from {short_source} \u{2192} {sink_display}")
    };

    let mut diag = Diag {
        path: primary_path.clone(),
        line: primary_line,
        col: primary_col,
        severity,
        id: diag_id,
        category: FindingCategory::Security,
        path_validated: finding.path_validated,
        guard_kind: finding.guard_kind.map(|k| format!("{k:?}")),
        message: Some(message),
        labels,
        confidence: None,
        evidence: Some(Evidence {
            source: Some(SpanEvidence {
                path: file_path_owned,
                line: (source_point.row + 1) as u32,
                col: (source_point.column + 1) as u32,
                kind: "source".into(),
                snippet: Some(short_source),
            }),
            sink: Some(SpanEvidence {
                path: primary_path.clone(),
                line: primary_line as u32,
                col: primary_col as u32,
                kind: "sink".into(),
                snippet: sink_evidence_snippet,
            }),
            guards: finding
                .guard_kind
                .map(|g| {
                    vec![SpanEvidence {
                        path: primary_path.clone(),
                        line: primary_line as u32,
                        col: 0,
                        kind: "guard".into(),
                        snippet: Some(format!("{g:?}")),
                    }]
                })
                .unwrap_or_default(),
            sanitizers: vec![],
            state: None,
            notes: evidence_notes,
            source_kind: Some(finding.source_kind),
            hop_count: Some(finding.hop_count),
            uses_summary: finding.uses_summary,
            cap_specificity: Some(finding.cap_specificity),
            flow_steps,
            symbolic: finding.symbolic.clone(),
            sink_caps: sink_caps_bits,
            engine_notes: finding.engine_notes.clone(),
            data_exfil_field,
            ..Default::default()
        }),
        rank_score: None,
        rank_reason: None,
        suppressed: false,
        suppression: None,
        rollup: None,
        finding_id: finding.finding_id.clone(),
        alternative_finding_ids: finding.alternative_finding_ids.to_vec(),
        stable_hash: 0,
    };

    // Post-fill explanation and confidence limiters
    let explanation = crate::evidence::generate_explanation(&diag);
    let limiters = crate::evidence::compute_confidence_limiters(&diag);
    if let Some(ref mut ev) = diag.evidence {
        ev.explanation = explanation;
        ev.confidence_limiters = limiters;
    }

    Some(diag)
}

/// Resolve a file extension to a language slug (e.g. `"rust"`,
/// `"javascript"`).  Public façade over `lang_for_path` for callers
/// that only need the slug, used by the debug API to look up
/// per-language rule enablement without re-parsing the file.
pub fn lang_slug_for_path(path: &Path) -> Option<&'static str> {
    lang_for_path(path).map(|(_, slug)| slug)
}

/// Resolve a file extension to a (tree‑sitter Language, slug) pair.
fn lang_for_path(path: &Path) -> Option<(Language, &'static str)> {
    // Distinguish `.tsx` from `.ts` before normalising via `lowercase_ext` —
    // the latter merges both into the `"ts"` slug, which would lose the
    // information needed to pick the JSX-aware TSX grammar.  The slug returned
    // here stays `"typescript"` for both so all downstream KINDS / RULES /
    // PARAM_CONFIG entries apply uniformly.
    let raw_ext = path
        .extension()
        .and_then(|s| s.to_str())
        .map(|s| s.to_ascii_lowercase());
    if matches!(raw_ext.as_deref(), Some("tsx")) {
        return Some((
            Language::from(tree_sitter_typescript::LANGUAGE_TSX),
            "typescript",
        ));
    }
    if matches!(raw_ext.as_deref(), Some("jsx")) {
        return Some((
            Language::from(tree_sitter_javascript::LANGUAGE),
            "javascript",
        ));
    }
    match lowercase_ext(path) {
        Some("rs") => Some((Language::from(tree_sitter_rust::LANGUAGE), "rust")),
        Some("c") => Some((Language::from(tree_sitter_c::LANGUAGE), "c")),
        // Real-world C++ codebases (gRPC, rocksdb, LLVM, …) overwhelmingly
        // use `.cc` / `.cxx` / `.hpp` / `.hh` / `.h++` rather than the
        // `.cpp` synthetic-fixture extension.  Without these mappings,
        // the scanner silently skipped them.  Headers (`.h` is omitted
        // intentionally, it's also valid C and disambiguating without a
        // build system is brittle).
        Some("cpp" | "cc" | "cxx" | "c++" | "hpp" | "hxx" | "hh" | "h++") => {
            Some((Language::from(tree_sitter_cpp::LANGUAGE), "cpp"))
        }
        Some("java") => Some((Language::from(tree_sitter_java::LANGUAGE), "java")),
        Some("go") => Some((Language::from(tree_sitter_go::LANGUAGE), "go")),
        Some("php") => Some((Language::from(tree_sitter_php::LANGUAGE_PHP), "php")),
        Some("py") => Some((Language::from(tree_sitter_python::LANGUAGE), "python")),
        Some("ts") => Some((
            Language::from(tree_sitter_typescript::LANGUAGE_TYPESCRIPT),
            "typescript",
        )),
        Some("js") => Some((
            Language::from(tree_sitter_javascript::LANGUAGE),
            "javascript",
        )),
        Some("rb") => Some((Language::from(tree_sitter_ruby::LANGUAGE), "ruby")),
        _ => None,
    }
}

/// All language slugs the scanner can parse, paired with the file extensions
/// that map to them. Single source of truth shared with [`lang_for_path`]; the
/// `supported_extensions_resolve_to_their_slug` test asserts they stay in sync.
pub(crate) const SUPPORTED_LANGUAGE_EXTENSIONS: &[(&str, &[&str])] = &[
    ("rust", &["rs"]),
    ("c", &["c"]),
    (
        "cpp",
        &["cpp", "cc", "cxx", "c++", "hpp", "hxx", "hh", "h++"],
    ),
    ("java", &["java"]),
    ("go", &["go"]),
    ("php", &["php"]),
    ("python", &["py"]),
    ("typescript", &["ts", "tsx"]),
    ("javascript", &["js", "jsx"]),
    ("ruby", &["rb"]),
];

/// File extensions associated with a language slug (case-insensitive). Returns
/// an empty slice if `slug` is not a supported language.
pub fn extensions_for_lang(slug: &str) -> &'static [&'static str] {
    SUPPORTED_LANGUAGE_EXTENSIONS
        .iter()
        .find(|(s, _)| s.eq_ignore_ascii_case(slug))
        .map(|(_, exts)| *exts)
        .unwrap_or(&[])
}

/// Fast binary-file guard: skip if >1% NUL bytes.
fn is_binary(bytes: &[u8]) -> bool {
    bytes.iter().filter(|b| **b == 0).count() * 100 / bytes.len().max(1) > 1
}

/// Check if a file path indicates a test file. Matches filename-based
/// conventions across the languages the engine supports, plus the
/// `__tests__` directory convention used by JS/TS tooling.
///
/// Directory-only checks (`test/`, `tests/`, `fixtures/`) are
/// intentionally excluded because they are too broad when scanning
/// absolute paths.  Severity-downgrade for those directories lives in
/// [`is_nonprod_path`].
pub(crate) fn is_test_file(path: &Path) -> bool {
    // Filename-suffix conventions that are unambiguous markers of a test
    // module.  Each entry must end with a `.<ext>` suffix so PHP
    // `*Test.php` does not match a class file named `MyContestTest.php`
    // — the engine's recogniser matches on the filename, not class
    // declarations.
    static TEST_SUFFIXES: &[&str] = &[
        // JS / TS
        ".test.js",
        ".test.ts",
        ".test.jsx",
        ".test.tsx",
        ".test.mjs",
        ".test.cjs",
        ".spec.js",
        ".spec.ts",
        ".spec.jsx",
        ".spec.tsx",
        ".spec.mjs",
        ".spec.cjs",
        // Python (`pytest` and `unittest` conventions)
        "_test.py",
        "_tests.py",
        // Java (JUnit / TestNG)
        "Test.java",
        "Tests.java",
        "IT.java",
        // PHP (PHPUnit)
        "Test.php",
        // Ruby (RSpec / Minitest)
        "_spec.rb",
        "_test.rb",
        // Go
        "_test.go",
        // Rust (uncommon but used by some crates)
        "_test.rs",
        "_tests.rs",
        // C / C++ (varies; cover the common shapes)
        "_test.c",
        "_test.cc",
        "_test.cpp",
        "_test.cxx",
        "_test.h",
        "_test.hpp",
    ];

    // Filename-prefix conventions for languages whose convention puts
    // the `test_` marker at the start instead of the end.
    static TEST_PREFIXES: &[&str] = &[
        // Python (`pytest`)
        "test_",
        // C / C++ test runners
    ];

    // Exact filenames that are always test infrastructure.
    static TEST_EXACT: &[&str] = &[
        // Pytest fixture entry point (always a test helper, never prod)
        "conftest.py",
    ];

    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
        for suffix in TEST_SUFFIXES {
            if name.ends_with(suffix) {
                return true;
            }
        }
        for prefix in TEST_PREFIXES {
            if name.starts_with(prefix)
                && (name.ends_with(".py")
                    || name.ends_with(".c")
                    || name.ends_with(".cc")
                    || name.ends_with(".cpp")
                    || name.ends_with(".cxx"))
            {
                return true;
            }
        }
        if TEST_EXACT.contains(&name) {
            return true;
        }
    }

    // `__tests__` is specific enough (React/Jest convention) to match on
    // directory.  Other test directories (`tests/`, `test/`, `spec/`)
    // overlap with production paths in some real codebases (e.g.
    // django apps that ship a `tests` submodule alongside production
    // code under the same package), so the broad directory check stays
    // in [`is_nonprod_path`] for severity downgrade only.
    for component in path.components() {
        if let std::path::Component::Normal(c) = component
            && c == "__tests__"
        {
            return true;
        }
    }

    false
}

/// Detect bundled or minified third-party assets that the engine should not
/// analyse.  These files are produced by build tooling, ship verbatim from
/// upstream packages, and can never be remediated by the codebase author, so
/// any finding raised against them is signal-less noise.
///
/// Triggers (any one is sufficient):
///   * Filename ends in `.min.js`, `.min.css`, `.bundle.js`, `.umd.js`,
///     `.umd.min.js`, `.iife.js`, `.iife.min.js`, or `.bundled.js`.
///   * Path component `bower_components` (legacy front-end package dir).
///   * Path component `vendor` AND filename has a front-end asset extension
///     (`.js`, `.mjs`, `.cjs`, `.jsx`, `.ts`, `.tsx`, `.css`).  Restricted to
///     web assets so Go module vendoring (`vendor/<pkg>/*.go`) is not
///     suppressed.
///
/// The check is conservative: it skips files only when the evidence is
/// unambiguous.  Hand-authored vendored plugins that lack a `.min` suffix and
/// live outside `vendor/` (e.g. `webapp/.../scripts/jquery-ui-plugin.js`) are
/// still parsed; their findings flow through `is_nonprod_path` for severity
/// downgrade instead.
pub(crate) fn is_vendored_asset_path(path: &Path) -> bool {
    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
        let lower: String = name.to_ascii_lowercase();
        const SUFFIXES: &[&str] = &[
            ".min.js",
            ".min.css",
            ".bundle.js",
            ".bundled.js",
            ".umd.js",
            ".umd.min.js",
            ".iife.js",
            ".iife.min.js",
        ];
        if SUFFIXES.iter().any(|s| lower.ends_with(s)) {
            return true;
        }
    }

    let mut has_vendor_component = false;
    for component in path.components() {
        if let std::path::Component::Normal(c) = component
            && let Some(s) = c.to_str()
        {
            if s.eq_ignore_ascii_case("bower_components") {
                return true;
            }
            if s.eq_ignore_ascii_case("vendor") || s.eq_ignore_ascii_case("vendors") {
                has_vendor_component = true;
            }
        }
    }

    if has_vendor_component && let Some(ext) = path.extension().and_then(|e| e.to_str()) {
        let ext_lower: String = ext.to_ascii_lowercase();
        const FRONT_END_EXTS: &[&str] = &[
            "js", "mjs", "cjs", "jsx", "ts", "tsx", "css", "scss", "less",
        ];
        if FRONT_END_EXTS.iter().any(|e| *e == ext_lower) {
            return true;
        }
    }

    false
}

/// Pattern IDs that are noise-prone in test files (fixture credentials,
/// non-crypto randomness, plain HTTP in test harnesses).
fn is_test_suppressible_pattern(id: &str) -> bool {
    // Suffix-match so a single rule covers the per-language prefixes
    // (`js.`, `ts.`, `go.`, `php.`, `py.`, `rb.`, `java.`).  Each entry
    // is a class of finding that is informational at best in a test
    // module: hardcoded test API tokens, weak hashes used for fast
    // deterministic test data, insecure RNG used for fixture seeding.
    id.ends_with(".secrets.hardcoded_secret")
        || id.ends_with(".secrets.hardcoded_key")
        || id.ends_with(".crypto.hardcoded_key")
        || id.ends_with(".crypto.math_random")
        || id.ends_with(".crypto.insecure_random")
        || id.ends_with(".crypto.weak_digest")
        || id.ends_with(".crypto.weak_algorithm")
        || id.ends_with(".crypto.md5")
        || id.ends_with(".crypto.sha1")
        || id.ends_with(".crypto.rand")
        || id.ends_with(".transport.fetch_http")
}

/// Check if a file path belongs to a non-production context (tests, vendor,
/// benchmarks, etc.).  Used to downgrade severity for findings in paths that
/// are unlikely to represent attack surface.
fn is_nonprod_path(path: &Path) -> bool {
    static NONPROD_DIRS: &[&str] = &[
        "tests",
        "test",
        "__tests__",
        "benches",
        "benchmarks",
        "examples",
        "build",
        "scripts",
        "docs",
        "js_tests",
        "fixtures",
        "vendor",
    ];
    static NONPROD_FILES: &[&str] = &["build.rs"];

    if let Some(name) = path.file_name().and_then(|n| n.to_str())
        && (NONPROD_FILES.contains(&name) || name.ends_with(".min.js"))
    {
        return true;
    }

    for component in path.components() {
        if let std::path::Component::Normal(c) = component
            && let Some(s) = c.to_str()
            && NONPROD_DIRS.contains(&s)
        {
            return true;
        }
    }

    false
}

/// Normalize a callee description for display.
fn sanitize_desc(s: &str) -> String {
    crate::fmt::normalize_snippet(s)
}

/// Human-readable label for a `SourceKind`.
fn source_kind_label(sk: crate::labels::SourceKind) -> &'static str {
    use crate::labels::SourceKind;
    match sk {
        SourceKind::UserInput => "user input",
        SourceKind::Cookie => "cookie value",
        SourceKind::Header => "request header",
        SourceKind::EnvironmentConfig => "environment config",
        SourceKind::FileSystem => "file system data",
        SourceKind::Database => "database result",
        SourceKind::CaughtException => "caught exception",
        SourceKind::Unknown => "tainted data",
    }
}

/// Downgrade severity by one tier: High→Medium, Medium→Low, Low→Low.
fn downgrade_severity(s: Severity) -> Severity {
    match s {
        Severity::High => Severity::Medium,
        Severity::Medium => Severity::Low,
        Severity::Low => Severity::Low,
    }
}

//  ParsedSource + ParsedFile: shared parse/CFG pipeline

/// Level 1: parsed tree + lang info. No CFG construction.
struct ParsedSource<'a> {
    tree: tree_sitter::Tree,
    ts_lang: Language,
    lang_slug: &'static str,
    bytes: &'a [u8],
    path: &'a Path,
    file_path_str: Cow<'a, str>,
}

impl<'a> ParsedSource<'a> {
    /// Parse bytes into a tree-sitter AST. Returns `None` for binary files,
    /// parse timeouts, or unsupported languages.  File-size filtering is
    /// handled at the walker boundary via
    /// [`ScannerConfig::max_file_size_mb`]; the timeout check here defends
    /// against hostile inputs (pathological grammar ambiguities) that could
    /// tie up a worker indefinitely even for files within the size cap.
    fn try_new(bytes: &'a [u8], path: &'a Path) -> NyxResult<Option<Self>> {
        // Clear any stale parse-timeout signal from a prior `try_new` on
        // this thread that the caller did not consume.  Ensures the slot
        // always reflects "this parse" by the time we return.
        LAST_PARSE_TIMEOUT_MS.with(|c| c.set(None));
        if is_vendored_asset_path(path) {
            return Ok(None);
        }
        if is_binary(bytes) {
            return Ok(None);
        }
        let Some((ts_lang, lang_slug)) = lang_for_path(path) else {
            return Ok(None);
        };
        let timeout_ms = parse_timeout_ms();
        let start = Instant::now();
        let mut timed_out = false;
        let parsed = PARSER.with(|cell| -> NyxResult<Option<tree_sitter::Tree>> {
            let mut parser = cell.borrow_mut();
            parser.set_language(&ts_lang)?;
            if timeout_ms == 0 {
                return Ok(parser.parse(bytes, None));
            }
            let len = bytes.len();
            let mut input = |i: usize, _pt: tree_sitter::Point| -> &[u8] {
                if i < len { &bytes[i..] } else { &[] }
            };
            let mut progress = |_state: &tree_sitter::ParseState| -> ControlFlow<()> {
                if start.elapsed().as_millis() as u64 >= timeout_ms {
                    timed_out = true;
                    ControlFlow::Break(())
                } else {
                    ControlFlow::Continue(())
                }
            };
            let options = tree_sitter::ParseOptions::new().progress_callback(&mut progress);
            Ok(parser.parse_with_options(&mut input, None, Some(options)))
        })?;
        let Some(tree) = parsed else {
            if timed_out {
                tracing::warn!(
                    file = %path.display(),
                    timeout_ms,
                    "tree-sitter parse timed out; skipping file",
                );
                LAST_PARSE_TIMEOUT_MS.with(|c| c.set(Some(timeout_ms)));
                return Ok(None);
            }
            return Err(NyxError::Other("tree-sitter failed".into()));
        };
        let file_path_str = path.to_string_lossy();
        Ok(Some(Self {
            tree,
            ts_lang,
            lang_slug,
            bytes,
            path,
            file_path_str,
        }))
    }

    /// Run AST pattern queries and return diagnostics.
    fn run_ast_queries(&self, cfg: &Config) -> Vec<Diag> {
        let root = self.tree.root_node();
        let compiled = query_cache::for_lang(self.lang_slug, self.ts_lang.clone());
        let mut cursor = QueryCursor::new();
        let mut out = Vec::new();
        let in_test_file = is_test_file(self.path);

        for cq in compiled.iter() {
            if cq.meta.severity > cfg.scanner.min_severity {
                continue;
            }
            // Suppress noise-prone patterns in test files
            if in_test_file && is_test_suppressible_pattern(cq.meta.id) {
                continue;
            }
            let mut matches = cursor.matches(&cq.query, root, self.bytes);
            while let Some(m) = matches.next() {
                if let Some(cap) = m.captures.iter().find(|c| c.index == 0) {
                    // Layer A: suppress Security findings on calls with all-literal args.
                    //
                    // Carve-outs for categories where the literal argument IS
                    // the bug (algorithm choice, hardcoded secret, insecure
                    // protocol scheme, unsafe config flag): suppression would
                    // silence the actual signal.  Hash algorithms picked from
                    // string literals (`MessageDigest.getInstance("MD5")`,
                    // `hashlib.md5(b"…")`) are weak regardless of caller-side
                    // data flow.
                    if cq.meta.category.finding_category() == FindingCategory::Security
                        && !matches!(
                            cq.meta.category,
                            PatternCategory::Crypto
                                | PatternCategory::Secrets
                                | PatternCategory::InsecureConfig
                                | PatternCategory::InsecureTransport
                        )
                        && is_call_all_args_literal(cap.node, self.bytes, self.lang_slug)
                    {
                        continue;
                    }
                    // Layer B: PHP `include $var` where $var is a formal parameter
                    // of the immediately enclosing function/method/closure and is
                    // not reassigned before the include.  This is the canonical
                    // PHP autoloader / scope-isolated-include shape (composer's
                    // ClassLoader, PSR-4 loaders, route-file loaders); the
                    // pattern rule is heuristic without taint and over-fires
                    // here.  A taint-aware sink check (the engine's
                    // taint-unsanitised-flow rule) still catches the case where
                    // a tainted value reaches the parameter at the call site.
                    if cq.meta.id == "php.path.include_variable"
                        && self.lang_slug == "php"
                        && is_php_include_param_passthrough(cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer C: PHP `unserialize($x, ['allowed_classes' => [...]])`
                    // or `unserialize($x, ['allowed_classes' => false])` ,
                    // PHP 7+ structural mitigation against object injection.
                    // When the call passes an `allowed_classes` option set to
                    // either `false` (no class instantiation) or an array
                    // literal of explicit class names, the deserialised data
                    // cannot construct arbitrary user classes.  Skip
                    // `allowed_classes => true` (the unsafe default) and
                    // dynamic / variable values (let those fire).
                    if cq.meta.id == "php.deser.unserialize"
                        && self.lang_slug == "php"
                        && is_php_unserialize_allowed_classes_restricted(cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer C2: PHP `Serializable::unserialize($input)` magic
                    // method body — `public function unserialize($x) { ...
                    // unserialize($x) ... }`.  This is the legacy
                    // `Serializable` interface contract (deprecated since PHP
                    // 8.1).  PHP itself invokes the method when restoring an
                    // instance, so the body's `\unserialize($x)` call cannot
                    // be removed without breaking the interface.  The
                    // actionable signal is at the class level (the class
                    // implements Serializable — fix is to migrate to
                    // `__serialize` / `__unserialize`), not at this call
                    // site.  Genuine deserialization sinks (free-function
                    // `unserialize($_GET[..])`, helpers reading from session
                    // / cache, etc.) keep firing because they are not inside
                    // a method declaration named `unserialize` with a single
                    // formal parameter passed straight to the call.
                    if cq.meta.id == "php.deser.unserialize"
                        && self.lang_slug == "php"
                        && is_php_unserialize_magic_method_passthrough(cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer C3: PHP `unserialize($x)` inside a PHPUnit
                    // assertion of the form
                    // `$this->assertSame(LITERAL, unserialize($x))`
                    // (or `assertEquals` / `assertNull` / static / self
                    // / parent dispatch variants).  The literal expected
                    // value bounds the unserialize result so the
                    // call-site cannot release attacker-controlled
                    // object graphs into the test process — failed
                    // assertions abort the test rather than leak side
                    // effects.  Drupal / Joomla / Nextcloud each carry
                    // tens of these `Serializable` round-trip
                    // assertions in their test trees and every firing
                    // is noise.
                    if cq.meta.id == "php.deser.unserialize"
                        && self.lang_slug == "php"
                        && is_php_unserialize_inside_phpunit_assertion(cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer C4: Python `pickle.loads` / `yaml.load` /
                    // `shelve.open` / kindred deserialization sinks
                    // wrapped in a `unittest.TestCase` assertion whose
                    // other argument is a literal expected value (or
                    // whose verb itself constrains the result, e.g.
                    // `assertIsNone(pickle.loads(blob))`).  The
                    // assertion bounds the deser result so attacker-
                    // controlled blobs would fail loudly rather than
                    // leak side effects out of the test boundary.
                    // Mirrors the PHP Layer C3 recogniser; deferred
                    // note in `project_realrepo_*.md` flagged the same
                    // FP shape on Python test trees.
                    if matches!(
                        cq.meta.id,
                        "py.deser.pickle_loads" | "py.deser.yaml_load" | "py.deser.shelve_open"
                    ) && self.lang_slug == "python"
                        && is_python_deser_inside_unittest_assertion(cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer C5: Ruby `Marshal.load` / `YAML.load` /
                    // `Psych.load` wrapped in a Minitest assertion
                    // (`assert_equal LIT, deser`, `assert_nil deser`,
                    // `assert deser`, `refute_equal LIT, deser`, ...) or
                    // an RSpec matcher chain (`expect(deser).to eq(LIT)`,
                    // `expect(deser).to be_nil`, `be_a(TYPE)`, ...).
                    // Same bounding semantics as the PHP / Python paths:
                    // a poisoned blob fails the assertion loudly rather
                    // than leak object-injection side effects out of
                    // the test boundary.
                    if matches!(cq.meta.id, "rb.deser.marshal_load" | "rb.deser.yaml_load")
                        && self.lang_slug == "ruby"
                        && is_ruby_deser_inside_test_assertion(cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer D: C/C++ buffer-overflow pattern rules
                    // (`{c,cpp}.memory.strcpy`, `strcat`, `sprintf`) fire
                    // syntactically on every call regardless of argument
                    // bounds.  The pattern's stated danger ("no bounds
                    // checking on destination buffer" / "no length limit on
                    // output buffer") is only realisable when the source /
                    // format-string contributes attacker-controlled length.
                    // When the source argument is a string literal (or a
                    // ternary of two string literals), the contributed length
                    // is statically bounded, there is no overflow vector
                    // for an attacker even if the destination buffer is
                    // mis-sized.  Same principle for `sprintf` when the
                    // format string is a literal containing no bare `%s`
                    // (only width-bounded numeric / char specifiers, or
                    // precision-bounded `%.<N>s` / `%.*s`).
                    if (self.lang_slug == "c" || self.lang_slug == "cpp")
                        && is_c_buffer_call_literal_safe(cq.meta.id, cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer E: C++ `reinterpret_cast<T>(x)` when T is a
                    // type explicitly defined as safe by the C++ aliasing
                    // rules — byte-pointer family (`char*`, `unsigned
                    // char*`, `uint8_t*`, `std::byte*`, etc., per
                    // [basic.lval]/11), `void*`, the integer round-trip
                    // types `uintptr_t` / `intptr_t`, and the BSD-socket
                    // `sockaddr` family (POSIX intentionally type-puns
                    // `sockaddr*` <-> `sockaddr_in*` etc.).  A pattern
                    // rule cannot tell these from genuinely dangerous
                    // strict-aliasing UB casts, so it over-fires
                    // dramatically on serialization, hashing, and
                    // socket-API code where the cast is the canonical
                    // (and standard-blessed) idiom.
                    if self.lang_slug == "cpp"
                        && is_cpp_cast_target_type_safe(cq.meta.id, cap.node, self.bytes)
                    {
                        continue;
                    }
                    // Layer F: PHP `md5()` / `sha1()` flagged as weak hash
                    // functions, but used in a non-cryptographic context
                    // (ETag generation, cache-key / array-index hashing,
                    // identifier fingerprinting, deduplication).  The
                    // pattern rule cannot distinguish weak-hash crypto
                    // misuse from these idiomatic uses, so it over-fires
                    // on every `md5(...)` callsite regardless of the
                    // surrounding consuming context.  Suppress when the
                    // call's *consuming context* yields a name that
                    // matches a recognised non-cryptographic identifier
                    // pattern (variable / field / array-key / method
                    // suffix).  Genuine weak-hash crypto misuse —
                    // `$password_hash = md5(...)`, `$signature = md5(...)`,
                    // `$tokenHash = md5(...)` — keeps firing because the
                    // name contains an excluded crypto-keyword substring.
                    if (cq.meta.id == "php.crypto.md5" || cq.meta.id == "php.crypto.sha1")
                        && self.lang_slug == "php"
                        && is_php_weak_hash_non_crypto_use(cap.node, self.bytes)
                    {
                        continue;
                    }
                    let point = cap.node.start_position();
                    out.push(Diag {
                        path: self.path.to_string_lossy().into_owned(),
                        line: point.row + 1,
                        col: point.column + 1,
                        severity: cq.meta.severity,
                        id: cq.meta.id.to_owned(),
                        category: cq.meta.category.finding_category(),
                        path_validated: false,
                        guard_kind: None,
                        message: Some(cq.meta.description.to_owned()),
                        labels: vec![],
                        confidence: Some(cq.meta.confidence),
                        evidence: Some(Evidence {
                            source: None,
                            sink: Some(SpanEvidence {
                                path: self.path.to_string_lossy().into_owned(),
                                line: (point.row + 1) as u32,
                                col: (point.column + 1) as u32,
                                kind: "sink".into(),
                                snippet: None,
                            }),
                            guards: vec![],
                            sanitizers: vec![],
                            state: None,
                            notes: vec![],
                            ..Default::default()
                        }),
                        rank_score: None,
                        rank_reason: None,
                        suppressed: false,
                        suppression: None,
                        rollup: None,
                        finding_id: String::new(),
                        alternative_finding_ids: Vec::new(),
                        stable_hash: 0,
                    });
                }
            }
        }
        out
    }

    /// Sort, dedup, and optionally downgrade severity for non-production paths.
    ///
    /// Dedup key matches the `issues` table PRIMARY KEY `(file_id, rule_id,
    /// line, col)`, severity is NOT part of the key.  Two diags that agree
    /// on (line, col, id) but differ in severity (e.g. a pattern-rule finding
    /// plus a taint-pipeline finding on the same call) would otherwise survive
    /// dedup here and crash the indexer with a UNIQUE constraint violation.
    /// Sorting severity ascending (Severity::High < Medium < Low) means
    /// `dedup_by` keeps the first occurrence, preserving the highest severity.
    fn finalize_diags(&self, out: &mut Vec<Diag>, cfg: &Config) {
        out.sort_by(|a, b| {
            (a.line, a.col, &a.id, a.severity).cmp(&(b.line, b.col, &b.id, b.severity))
        });
        out.dedup_by(|a, b| a.line == b.line && a.col == b.col && a.id == b.id);

        if !cfg.scanner.include_nonprod && is_nonprod_path(self.path) {
            for d in out.iter_mut() {
                d.severity = downgrade_severity(d.severity);
            }
        }
    }
}

/// Level 2: adds CFG graph, summaries, lang rules on top of ParsedSource.
struct ParsedFile<'a> {
    source: ParsedSource<'a>,
    file_cfg: FileCfg,
    lang_rules: LangAnalysisRules,
    has_lang_rules: bool,
    /// Per-body SSA + const-prop + type-fact cache, lazily populated on first
    /// request and indexed by `BodyId.0`.  Was being recomputed 2-3× per body
    /// across `run_cfg_analyses_with_lowered` (cfg analyses + state analyses)
    /// and `run_auth_analyses` (`collect_file_var_types`); on the gin profile
    /// `build_body_const_facts` accounted for 13.6% of wall-clock and a
    /// single-pass cache collapses that to ~4.5%.
    body_const_facts_cache: OnceCell<Vec<Option<cfg_analysis::BodyConstFacts>>>,
}

impl<'a> ParsedFile<'a> {
    /// Build CFG + lang rules from a parsed source.
    fn from_source(source: ParsedSource<'a>, cfg: &Config) -> Self {
        let mut lang_rules = build_lang_rules(cfg, source.lang_slug);
        // Single-file scans rarely have a nearby package.json, so the
        // project-level `FrameworkContext` misses frameworks the file
        // obviously imports. Augment the per-file rule set with any
        // framework-conditional rules keyed off in-file import specifiers
        // (e.g. `import fastify from 'fastify'`). Idempotent, skips
        // frameworks already active from the manifest pass.
        let in_file_fws =
            crate::utils::project::detect_in_file_frameworks(source.bytes, source.lang_slug);
        let missing: Vec<_> = in_file_fws
            .into_iter()
            .filter(|fw| !lang_rules.frameworks.contains(fw))
            .collect();
        if !missing.is_empty() {
            let aug_ctx = crate::utils::project::FrameworkContext {
                frameworks: missing.clone(),
                inspected_langs: std::collections::HashSet::new(),
            };
            lang_rules
                .extra_labels
                .extend(crate::labels::framework_rules_for_lang_pub(
                    source.lang_slug,
                    &aug_ctx,
                ));
            lang_rules.frameworks.extend(missing);
        }
        let has_lang_rules = !lang_rules.extra_labels.is_empty()
            || !lang_rules.terminators.is_empty()
            || !lang_rules.event_handlers.is_empty();
        let rules_ref = if has_lang_rules {
            Some(&lang_rules)
        } else {
            None
        };
        let mut file_cfg = build_cfg(
            &source.tree,
            source.bytes,
            source.lang_slug,
            &source.file_path_str,
            rules_ref,
        );

        // Phase 04: when the scan paths produced a project ModuleGraph,
        // resolve this file's imports against it and stash both on the
        // FileCfg (for local consumers) and on the global per-file
        // ImportTable (for cross-file lookups in phases 05/09/10). The
        // wiring is no-op for non-JS/TS files and for direct callers of
        // `analyse_file_fused` that pass a `Config` without a resolver
        // (e.g. unit tests).
        if let Some(graph) = cfg.module_graph.as_deref() {
            let bindings = crate::resolve::extract_resolved_imports(
                &source.tree,
                source.bytes,
                source.path,
                graph,
                source.lang_slug,
            );
            if !bindings.is_empty() {
                graph.record_imports_for_file(source.path.to_path_buf(), bindings.clone());
                file_cfg.resolved_imports = bindings;
            }
        }

        Self {
            source,
            file_cfg,
            lang_rules,
            has_lang_rules,
            body_const_facts_cache: OnceCell::new(),
        }
    }

    /// Per-body const-fact cache, computed once on first request and shared
    /// across every per-body iteration in this file's analysis.  Indexed by
    /// `BodyId.0` so callers can look up by body identity.
    fn body_const_facts_all(&self) -> &[Option<cfg_analysis::BodyConstFacts>] {
        self.body_const_facts_cache.get_or_init(|| {
            let lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
            self.file_cfg
                .bodies
                .iter()
                .map(|b| cfg_analysis::build_body_const_facts(b, lang))
                .collect()
        })
    }

    /// Look up the cached const facts for a specific body.
    fn body_const_facts(
        &self,
        body: &crate::cfg::BodyCfg,
    ) -> Option<&cfg_analysis::BodyConstFacts> {
        let all = self.body_const_facts_all();
        all.get(body.meta.id.0 as usize).and_then(|f| f.as_ref())
    }

    /// The top-level body's CFG graph (for backward-compatible access).
    fn cfg_graph(&self) -> &Cfg {
        &self.file_cfg.toplevel().graph
    }

    /// The top-level body's entry node.
    #[allow(dead_code)]
    fn entry(&self) -> NodeIndex {
        self.file_cfg.toplevel().entry
    }

    fn local_summaries(&self) -> &FuncSummaries {
        &self.file_cfg.summaries
    }

    fn rules_ref(&self) -> Option<&LangAnalysisRules> {
        if self.has_lang_rules {
            Some(&self.lang_rules)
        } else {
            None
        }
    }

    fn export_summaries(&self) -> Vec<FuncSummary> {
        self.export_summaries_with_root(None)
    }

    fn export_summaries_with_root(&self, scan_root: Option<&Path>) -> Vec<FuncSummary> {
        let mut out = export_summaries(
            self.local_summaries(),
            &self.source.file_path_str,
            self.source.lang_slug,
        );

        // every
        // `FuncSummary` exported from this file carries a copy of the
        // file's `hierarchy_edges` so the inheritance / impl /
        // implements relationships persist through SQLite round-trips
        // and re-merge into `crate::callgraph::TypeHierarchyIndex` at
        // call-graph build time.  Cheap (one clone per summary) and
        // strictly additive, `merge_summaries` deduplicates downstream.
        if !self.file_cfg.hierarchy_edges.is_empty() {
            let edges = self.file_cfg.hierarchy_edges.clone();
            for s in &mut out {
                s.hierarchy_edges = edges.clone();
            }
        }

        // Phase 10 — annotate entry-point summaries.  Match each
        // summary's body span (looked up via `FuncSummaries` keyed on
        // `FuncKey`) against the per-file `entry_kinds` table so the
        // tag survives SQLite round-trips and cross-file consumption.
        if !self.file_cfg.entry_kinds.is_empty() {
            // Build a (name, container, disambig) → span lookup from
            // the file's bodies so we can associate each exported
            // FuncSummary with its body span.
            let mut by_identity: std::collections::HashMap<
                (String, String, Option<u32>),
                (usize, usize),
            > = std::collections::HashMap::new();
            for body in self.file_cfg.function_bodies() {
                if let Some(key) = &body.meta.func_key {
                    by_identity.insert(
                        (key.name.clone(), key.container.clone(), key.disambig),
                        body.meta.span,
                    );
                }
            }
            for s in &mut out {
                let id = (s.name.clone(), s.container.clone(), s.disambig);
                if let Some(span) = by_identity.get(&id) {
                    s.entry_kind = self.file_cfg.entry_kinds.get(span).cloned();
                }
            }
        }

        // Rust-specific enrichment: derive the crate-relative module path for
        // this file and parse every top-level `use` declaration into an alias
        // map. The information lets the call graph resolve same-name functions
        // across modules and is cheap enough to compute once per file and
        // duplicate across the file's summaries. Non-Rust files skip all of
        // this and keep the new fields at `None`.
        if self.source.lang_slug == "rust" && !out.is_empty() {
            let module_path = crate::rust_resolve::derive_module_path(self.source.path, scan_root);
            let use_map =
                crate::rust_resolve::parse_rust_use_map(self.source.bytes, &self.source.tree);

            let aliases = if use_map.aliases.is_empty() {
                None
            } else {
                Some(use_map.aliases)
            };
            let wildcards = if use_map.wildcards.is_empty() {
                None
            } else {
                Some(use_map.wildcards)
            };

            for s in &mut out {
                s.module_path = module_path.clone();
                s.rust_use_map = aliases.clone();
                s.rust_wildcards = wildcards.clone();
            }
        }

        out
    }

    /// Extract SSA function summaries for all functions in this file.
    /// Extract SSA summaries and eligible callee bodies in a single lowering pass.
    ///
    /// Returns two vectors keyed by canonical [`crate::symbol::FuncKey`].
    /// The `FuncKey` identity preserves `(lang, namespace, container, name,
    /// arity, disambig, kind)`, so two same-name definitions in this file
    /// (e.g. a free `process` and a `Worker::process`, or overloads with
    /// different arities) land on distinct entries instead of the later one
    /// shadowing the earlier one.
    fn extract_ssa_artifacts(
        &self,
        global_summaries: Option<&GlobalSummaries>,
        scan_root: Option<&Path>,
        module_graph: Option<&crate::resolve::ModuleGraph>,
    ) -> (
        Vec<(crate::symbol::FuncKey, SsaFuncSummary)>,
        Vec<(
            crate::symbol::FuncKey,
            crate::taint::ssa_transfer::CalleeSsaBody,
        )>,
    ) {
        let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
        let namespace = crate::symbol::namespace_with_package(
            &self.source.file_path_str,
            scan_root_str.as_deref(),
            module_graph,
        );

        // Use the FileCfg path (same one `analyse_file` uses at taint time) so
        // the SSA summaries stored cross-file match exactly what pass 2 will
        // resolve against, no NodeIndex-space or entry-detection drift.
        let locator = crate::summary::SinkSiteLocator {
            tree: &self.source.tree,
            bytes: self.source.bytes,
            file_rel: &namespace,
        };
        let (summaries, bodies) = crate::taint::extract_ssa_artifacts_from_file_cfg(
            &self.file_cfg,
            caller_lang,
            &namespace,
            self.local_summaries(),
            global_summaries,
            Some(&locator),
            scan_root_str.as_deref(),
            module_graph,
        );

        (summaries.into_iter().collect(), bodies)
    }

    /// Lower every function body in this file to SSA exactly once.  Used by
    /// [`analyse_file_fused`] to share the result between the taint engine
    /// ([`run_cfg_analyses_with_lowered`]) and the SSA artifact filter
    /// ([`build_eligible_bodies_from_lowered`]), the prior code path lowered
    /// twice (once inside `analyse_file`, once inside
    /// `extract_ssa_artifacts_from_file_cfg`) and accounted for ~24% of the
    /// pass-2 wall-clock on the bench corpus.
    ///
    /// # Locator policy
    ///
    /// Attaches a [`crate::summary::SinkSiteLocator`] so intra-file
    /// summaries record concrete sink coordinates and a `from_chain` flag
    /// distinguishing chain-hop markers from this body's own locator span.
    /// Pass-2 emission then gates promotion into `Finding.primary_location`
    /// on `from_chain || file_rel != caller_namespace`, see
    /// [`crate::taint::ssa_transfer::should_promote_sink_site`].
    ///
    /// Same-file single-hop helpers continue to surface the flow finding
    /// at the call site (their site is `from_chain=false` and lives in the
    /// caller's namespace, gate fails).  Multi-hop chains promote because
    /// `summary_extract` flips `from_chain=true` on every site that came
    /// via `event.primary_sink_site`, the callee already pierced through
    /// at least one summary boundary to record the deepest coordinates.
    /// Cross-file callees promote because `file_rel` differs.  This
    /// preserves the closure-capture / lambda / helper-with-internal-sink
    /// fixture shape (two findings: deep + call-site) while gaining
    /// deep-line attribution on multi-hop chains that have no per-frame
    /// intermediate finding to dedup with.  See "Multi-hop intra-file
    /// sink attribution gap" in deferred.md for the design tradeoff.
    fn lower_ssa_for_fused(
        &self,
        global_summaries: Option<&GlobalSummaries>,
        scan_root: Option<&Path>,
        module_graph: Option<&crate::resolve::ModuleGraph>,
    ) -> (
        std::collections::HashMap<
            crate::symbol::FuncKey,
            crate::summary::ssa_summary::SsaFuncSummary,
        >,
        std::collections::HashMap<
            crate::symbol::FuncKey,
            crate::taint::ssa_transfer::CalleeSsaBody,
        >,
    ) {
        let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);
        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
        let namespace = crate::symbol::namespace_with_package(
            &self.source.file_path_str,
            scan_root_str.as_deref(),
            module_graph,
        );
        let locator = crate::summary::SinkSiteLocator {
            tree: &self.source.tree,
            bytes: self.source.bytes,
            file_rel: &namespace,
        };
        crate::taint::lower_all_functions_from_bodies(
            &self.file_cfg,
            caller_lang,
            &namespace,
            self.local_summaries(),
            global_summaries,
            Some(&locator),
            scan_root_str.as_deref(),
            module_graph,
        )
    }

    /// Run taint analysis, CFG structural analyses, and state-model analysis.
    ///
    /// Wrapper around [`run_cfg_analyses_with_lowered`] that lowers SSA
    /// internally (the standalone path).  Callers that already hold a
    /// pre-lowered result (today: only [`analyse_file_fused`]) should use
    /// the `_with_lowered` variant directly to avoid the duplicate
    /// lowering.
    fn run_cfg_analyses(
        &self,
        cfg: &Config,
        global_summaries: Option<&GlobalSummaries>,
        scan_root: Option<&Path>,
    ) -> Vec<Diag> {
        // Reset before lowering: probes during lowering may publish
        // path-safe-suppressed sink spans that state analysis consumes,
        // and the SSA engine may publish all-validated sink spans that
        // AST-pattern suppression consumes.  See the equivalent resets
        // in `analyse_file_fused`.
        crate::taint::ssa_transfer::reset_path_safe_suppressed_spans();
        crate::taint::ssa_transfer::reset_all_validated_spans();
        let (ssa_summaries, callee_bodies) =
            self.lower_ssa_for_fused(global_summaries, scan_root, cfg.module_graph.as_deref());
        self.run_cfg_analyses_with_lowered(
            cfg,
            global_summaries,
            scan_root,
            &ssa_summaries,
            &callee_bodies,
        )
    }

    /// Like [`run_cfg_analyses`] but takes pre-lowered SSA summaries +
    /// callee bodies and threads them into [`taint::analyse_file_with_lowered`].
    /// Used by [`analyse_file_fused`] to share the lowering with the SSA
    /// artifact extractor.
    #[allow(clippy::too_many_arguments)]
    fn run_cfg_analyses_with_lowered(
        &self,
        cfg: &Config,
        global_summaries: Option<&GlobalSummaries>,
        scan_root: Option<&Path>,
        ssa_summaries: &std::collections::HashMap<
            crate::symbol::FuncKey,
            crate::summary::ssa_summary::SsaFuncSummary,
        >,
        callee_bodies: &std::collections::HashMap<
            crate::symbol::FuncKey,
            crate::taint::ssa_transfer::CalleeSsaBody,
        >,
    ) -> Vec<Diag> {
        let mut out = Vec::new();
        let caller_lang = Lang::from_slug(self.source.lang_slug).unwrap_or(Lang::Rust);

        // ── Taint analysis ──────────────────────────────────────────────
        tracing::debug!("Running taint analysis on: {}", self.source.path.display());
        tracing::debug!("Func summaries: {:?}", self.local_summaries());
        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
        let namespace = crate::symbol::namespace_with_package(
            &self.source.file_path_str,
            scan_root_str.as_deref(),
            cfg.module_graph.as_deref(),
        );
        let extra = if self.lang_rules.extra_labels.is_empty() {
            None
        } else {
            Some(self.lang_rules.extra_labels.as_slice())
        };
        // Phase-09 cross-package import lookup. Built per-file from the
        // resolver's verdict; consumed by `resolve_callee_full` step 0.7
        // when a flat-name lookup would otherwise miss.
        let cross_package_imports = crate::taint::build_cross_package_func_keys(
            &self.file_cfg.resolved_imports,
            scan_root_str.as_deref(),
            cfg.module_graph.as_deref(),
            caller_lang,
        );
        let cross_package_imports_ref = if cross_package_imports.is_empty() {
            None
        } else {
            Some(&cross_package_imports)
        };
        let taint_results = crate::taint::analyse_file_with_lowered(
            &self.file_cfg,
            self.local_summaries(),
            global_summaries,
            caller_lang,
            &namespace,
            &[],
            extra,
            ssa_summaries,
            callee_bodies,
            cross_package_imports_ref,
        );
        // Drain the path-safe-suppressed sink-span set published by the
        // SSA taint engine.  Used below by the state-analysis pass to
        // suppress `state-unauthed-access` on sinks the taint engine has
        // already proved cannot reach a privileged location.
        let path_safe_suppressed_spans =
            crate::taint::ssa_transfer::take_path_safe_suppressed_spans();
        for finding in &taint_results {
            let body_cfg = &self.file_cfg.body(finding.body_id).graph;

            // Suppress internal redirect taint findings: res.redirect(`/path/...`)
            // with a path-prefix argument is server-relative, not an open redirect.
            let sink_info = &body_cfg[finding.sink];
            let sink_has_ssrf = sink_info
                .taint
                .labels
                .iter()
                .any(|l| matches!(l, DataLabel::Sink(c) if c.contains(Cap::SSRF)));
            if sink_has_ssrf
                && let Some(ref callee) = sink_info.call.callee
                && (callee.ends_with("redirect") || callee.ends_with("Redirect"))
                && crate::cfg_analysis::guards::has_redirect_path_prefix(
                    self.source.bytes,
                    sink_info.ast.span,
                )
            {
                continue;
            }

            if let Some(diag) = build_taint_diag(
                finding,
                body_cfg,
                &self.source.tree,
                self.source.path,
                self.source.bytes,
                scan_root,
            ) {
                out.push(diag);
            }
        }

        // ── CFG structural analyses (per body) ─────────────────────────
        let taint_active = global_summaries.is_some() || !taint_results.is_empty();
        // Pre-compute, per body, the set of variable names whose
        // release / close calls live in a NESTED closure body inside
        // that body (e.g. `socket.on("close", () => ws.close())`).
        // Both the structural ResourceMisuse pass and the state-model
        // leak pass consult it to suppress findings whose cleanup is
        // registered as a callback the per-body CFG can't follow.
        // Only descendants count — sibling methods on the same class
        // don't share resource ownership.
        let closure_released_per_body =
            state::collect_closure_released_var_names(&self.file_cfg.bodies, caller_lang);
        let empty_set: std::collections::HashSet<String> = std::collections::HashSet::new();
        for body in &self.file_cfg.bodies {
            let body_taint: Vec<_> = taint_results
                .iter()
                .filter(|f| f.body_id == body.meta.id)
                .cloned()
                .collect();
            let body_const_facts = self.body_const_facts(body);
            let cfg_ctx = cfg_analysis::AnalysisContext {
                cfg: &body.graph,
                entry: body.entry,
                lang: caller_lang,
                source_bytes: self.source.bytes,
                func_summaries: self.local_summaries(),
                global_summaries,
                ssa_summaries: Some(ssa_summaries),
                taint_findings: &body_taint,
                analysis_rules: self.rules_ref(),
                taint_active,
                body_const_facts,
                type_facts: body_const_facts.map(|f| &f.type_facts),
                auth_decorators: &body.meta.auth_decorators,
                closure_released_var_names: Some(
                    closure_released_per_body
                        .get(&body.meta.id)
                        .unwrap_or(&empty_set),
                ),
                class_constant_scalars: Some(&self.file_cfg.class_constant_scalars),
            };
            for cf in cfg_analysis::run_all(&cfg_ctx) {
                // Layer C4 mirror at the CFG-emission point: Python
                // `pickle.loads` / `yaml.load` / `shelve.open` calls
                // wrapped inside a `unittest.TestCase` literal-bound
                // assertion fire `cfg-unguarded-sink` because the
                // structural rule has no taint context.  Apply the
                // same recogniser used by the AST-pattern layer so
                // both sides agree on what counts as test-bound deser.
                if cf.rule_id == "cfg-unguarded-sink"
                    && self.source.lang_slug == "python"
                    && let Some(node) = self
                        .source
                        .tree
                        .root_node()
                        .descendant_for_byte_range(cf.span.0, cf.span.1)
                    && is_python_deser_inside_unittest_assertion(node, self.source.bytes)
                {
                    continue;
                }
                // Layer C5 mirror: Ruby `Marshal.load` / `YAML.load` /
                // `Psych.load` inside Minitest / RSpec assertions also
                // fire `cfg-unguarded-sink` from the structural rule
                // (which has no taint context).  Apply the same
                // recogniser used by the AST-pattern layer so both
                // sides agree on what counts as test-bound deser.
                if cf.rule_id == "cfg-unguarded-sink"
                    && self.source.lang_slug == "ruby"
                    && let Some(node) = self
                        .source
                        .tree
                        .root_node()
                        .descendant_for_byte_range(cf.span.0, cf.span.1)
                    && is_ruby_deser_inside_test_assertion(node, self.source.bytes)
                {
                    continue;
                }
                let point = byte_offset_to_point(&self.source.tree, cf.span.0);
                let cfg_confidence = Some(match cf.confidence {
                    cfg_analysis::Confidence::High => crate::evidence::Confidence::High,
                    cfg_analysis::Confidence::Medium => crate::evidence::Confidence::Medium,
                    cfg_analysis::Confidence::Low => crate::evidence::Confidence::Low,
                });
                // Carry the sink node's resolved Sink caps onto the structural
                // finding's evidence so downstream cap-classification (and the
                // eval `cap_of`) buckets `cfg-unguarded-sink` under its real cap
                // (sqli/cmdi/ssrf/…) instead of the catch-all `other`. Without
                // this every taint-less structural sink finding fell through to
                // `other`, hiding real recall (e.g. dvpwa `cur.execute` SQLi)
                // and inflating the `other` bucket. Non-sink structural findings
                // (resource-leak, auth-gap) carry no Sink label, so this is 0.
                let cf_sink_caps: u32 = cf
                    .evidence
                    .first()
                    .map(|&n| {
                        cfg_ctx.cfg[n].taint.labels.iter().fold(0u32, |acc, l| {
                            if let crate::labels::DataLabel::Sink(c) = l {
                                acc | c.bits()
                            } else {
                                acc
                            }
                        })
                    })
                    .unwrap_or(0);
                let cf_category = FindingCategory::for_structural_rule(&cf.rule_id);
                out.push(Diag {
                    path: self.source.path.to_string_lossy().into_owned(),
                    line: point.row + 1,
                    col: point.column + 1,
                    severity: cf.severity,
                    id: cf.rule_id,
                    category: cf_category,
                    path_validated: false,
                    guard_kind: None,
                    message: Some(cf.message),
                    labels: vec![],
                    confidence: cfg_confidence,
                    evidence: Some(Evidence {
                        source: None,
                        sink: Some(SpanEvidence {
                            path: self.source.path.to_string_lossy().into_owned(),
                            line: (point.row + 1) as u32,
                            col: (point.column + 1) as u32,
                            kind: "sink".into(),
                            snippet: None,
                        }),
                        sink_caps: cf_sink_caps,
                        guards: vec![],
                        sanitizers: vec![],
                        state: None,
                        notes: vec![],
                        ..Default::default()
                    }),
                    rank_score: None,
                    rank_reason: None,
                    suppressed: false,
                    suppression: None,
                    rollup: None,
                    finding_id: String::new(),
                    alternative_finding_ids: Vec::new(),
                    stable_hash: 0,
                });
            }
        } // end for body in bodies (CFG structural analyses)

        // ── State-model dataflow analysis (per body) ─────────────────────
        if cfg.scanner.enable_state_analysis {
            let resource_method_summaries =
                state::build_resource_method_summaries(&self.file_cfg.bodies, caller_lang);
            let mut all_state_findings = Vec::new();
            for body in &self.file_cfg.bodies {
                // When `NYX_POINTER_ANALYSIS=1` is set, derive a
                // `var_name → PtrProxyHint` map from the body's
                // points-to facts so the proxy-acquire transfer can
                // suppress SymbolId attribution on field-aliased
                // receivers (e.g. `m := c.mu; m.Lock()`).
                let body_pointer_hints = self.body_const_facts(body).and_then(|f| {
                    f.pointer_facts
                        .as_ref()
                        .map(|pf| pf.name_proxy_hints(&f.ssa))
                });
                let state_findings = state::run_state_analysis(
                    &body.graph,
                    body.entry,
                    caller_lang,
                    self.source.bytes,
                    self.local_summaries(),
                    global_summaries,
                    cfg.scanner.enable_auth_analysis,
                    &resource_method_summaries,
                    &body.meta.auth_decorators,
                    &path_safe_suppressed_spans,
                    body_pointer_hints.as_ref(),
                    Some(
                        closure_released_per_body
                            .get(&body.meta.id)
                            .unwrap_or(&empty_set),
                    ),
                );

                for sf in &state_findings {
                    let point = byte_offset_to_point(&self.source.tree, sf.span.0);
                    out.push(Diag {
                        path: self.source.path.to_string_lossy().into_owned(),
                        line: point.row + 1,
                        col: point.column + 1,
                        severity: sf.severity,
                        id: sf.rule_id.clone(),
                        category: FindingCategory::for_structural_rule(&sf.rule_id),
                        path_validated: false,
                        guard_kind: None,
                        message: Some(sf.message.clone()),
                        labels: vec![],
                        confidence: None,
                        evidence: Some(Evidence {
                            source: None,
                            sink: Some(SpanEvidence {
                                path: self.source.path.to_string_lossy().into_owned(),
                                line: (point.row + 1) as u32,
                                col: (point.column + 1) as u32,
                                kind: "sink".into(),
                                snippet: None,
                            }),
                            guards: vec![],
                            sanitizers: vec![],
                            state: Some(StateEvidence {
                                machine: sf.machine.into(),
                                subject: sf.subject.clone(),
                                from_state: sf.from_state.into(),
                                to_state: sf.to_state.into(),
                            }),
                            notes: vec![],
                            ..Default::default()
                        }),
                        rank_score: None,
                        rank_reason: None,
                        suppressed: false,
                        suppression: None,
                        rollup: None,
                        finding_id: String::new(),
                        alternative_finding_ids: Vec::new(),
                        stable_hash: 0,
                    });
                }

                all_state_findings.extend(state_findings);
            } // end for body in bodies (state analysis)

            // Suppress cfg-resource-leak / cfg-auth-gap when state analysis
            // already covers the same line (state analysis is more precise).
            let state_lines: std::collections::HashSet<usize> = all_state_findings
                .iter()
                .map(|sf| byte_offset_to_point(&self.source.tree, sf.span.0).row + 1)
                .collect();
            if !all_state_findings.is_empty() {
                out.retain(|d| {
                    !((d.id == "cfg-resource-leak" || d.id == "cfg-auth-gap")
                        && state_lines.contains(&d.line))
                });
            }
        }

        out
    }

    /// Run AST-backed authorization analyses that do not require CFG construction.
    fn run_auth_analyses(
        &self,
        cfg: &Config,
        global_summaries: Option<&GlobalSummaries>,
        scan_root: Option<&Path>,
    ) -> Vec<Diag> {
        // Harvest SSA-derived variable types across every body in the
        // file so `run_auth_analysis` can refine sink classification by
        // receiver type (e.g. `HttpClient::send` → `OutboundNetwork`,
        // `HashMap::new`-bound var → `InMemoryLocal`).
        let var_types = self.collect_file_var_types();
        auth_analysis::run_auth_analysis(
            &self.source.tree,
            self.source.bytes,
            self.source.lang_slug,
            self.source.path,
            cfg,
            var_types.as_ref(),
            global_summaries,
            scan_root,
        )
    }

    /// Build a per-file `var_name → TypeKind` map from SSA + type facts.
    /// Conflicting non-`Unknown` types across bodies drop the entry ,
    /// absence is safe because the auth sink gate falls back to
    /// syntactic heuristics. Returns `None` when no body produces a
    /// typed variable.
    fn collect_file_var_types(&self) -> Option<auth_analysis::VarTypes> {
        let mut merged: std::collections::HashMap<String, crate::ssa::type_facts::TypeKind> =
            std::collections::HashMap::new();
        let mut dropped: std::collections::HashSet<String> = std::collections::HashSet::new();
        for body in &self.file_cfg.bodies {
            let Some(facts) = self.body_const_facts(body) else {
                continue;
            };
            for (idx, def) in facts.ssa.value_defs.iter().enumerate() {
                let Some(name) = def.var_name.as_ref() else {
                    continue;
                };
                let Some(ty) = facts.type_facts.get_type(crate::ssa::SsaValue(idx as u32)) else {
                    continue;
                };
                if matches!(ty, crate::ssa::type_facts::TypeKind::Unknown) {
                    continue;
                }
                if dropped.contains(name) {
                    continue;
                }
                match merged.get(name) {
                    Some(existing) if existing == ty => {}
                    Some(_) => {
                        merged.remove(name);
                        dropped.insert(name.clone());
                    }
                    None => {
                        merged.insert(name.clone(), ty.clone());
                    }
                }
            }
        }
        if merged.is_empty() {
            None
        } else {
            Some(merged)
        }
    }
}

//  Pass 1: Extract function summaries (no taint analysis)

/// Extract function summaries from pre-read bytes.
///
/// This is the core **pass 1** implementation. Callers that already hold the
/// file contents should use this variant to avoid a redundant `fs::read`.
pub fn extract_summaries_from_bytes(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
) -> NyxResult<Vec<FuncSummary>> {
    let _span = tracing::debug_span!("extract_summaries", file = %path.display()).entered();
    let Some(source) = ParsedSource::try_new(bytes, path)? else {
        return Ok(vec![]);
    };
    let parsed = ParsedFile::from_source(source, cfg);
    Ok(parsed.export_summaries())
}

/// Like [`extract_summaries_from_bytes`] but forwards `scan_root` so Rust
/// summaries carry their crate-relative module path.
pub fn extract_summaries_from_bytes_with_root(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
    scan_root: Option<&Path>,
) -> NyxResult<Vec<FuncSummary>> {
    let _span = tracing::debug_span!("extract_summaries", file = %path.display()).entered();
    let Some(source) = ParsedSource::try_new(bytes, path)? else {
        return Ok(vec![]);
    };
    let parsed = ParsedFile::from_source(source, cfg);
    Ok(parsed.export_summaries_with_root(scan_root))
}

/// Convenience wrapper that reads the file then delegates to
/// [`extract_summaries_from_bytes`].
#[allow(dead_code)] // used by benchmarks and lib consumers
pub fn extract_summaries_from_file(path: &Path, cfg: &Config) -> NyxResult<Vec<FuncSummary>> {
    let bytes = std::fs::read(path)?;
    extract_summaries_from_bytes(&bytes, path, cfg)
}

/// Build a CFG from a file and return the graph, entry node, function summaries,
/// and language.
///
/// Returns `None` for binary files or unsupported languages.
/// Intended for benchmarks and isolated testing of state analysis.
pub fn build_cfg_for_file(path: &Path, cfg: &Config) -> NyxResult<Option<(FileCfg, Lang)>> {
    let bytes = std::fs::read(path)?;
    let Some(source) = ParsedSource::try_new(&bytes, path)? else {
        return Ok(None);
    };
    let lang = Lang::from_slug(source.lang_slug).unwrap_or(Lang::C);
    let parsed = ParsedFile::from_source(source, cfg);
    Ok(Some((parsed.file_cfg, lang)))
}

/// Parse a file and return its `AuthorizationModel` for debug inspection.
///
/// Runs only the auth-extraction pipeline, no taint, no CFG construction.
/// Returns `None` for binary files or unsupported languages.  Used by the
/// `/api/debug/auth` route to surface the structured authorization model
/// (routes, units, sensitive operations, auth checks) in the debug UI.
pub fn extract_auth_model_for_debug(
    path: &Path,
    cfg: &Config,
) -> NyxResult<Option<auth_analysis::model::AuthorizationModel>> {
    let bytes = std::fs::read(path)?;
    let Some(source) = ParsedSource::try_new(&bytes, path)? else {
        return Ok(None);
    };
    let rules = auth_analysis::config::build_auth_rules(cfg, source.lang_slug);
    if !rules.enabled {
        return Ok(Some(auth_analysis::model::AuthorizationModel::default()));
    }
    let model = auth_analysis::extract::extract_authorization_model(
        source.lang_slug,
        cfg.framework_ctx.as_ref(),
        &source.tree,
        source.bytes,
        source.path,
        &rules,
        None,
    );
    Ok(Some(model))
}

/// Production-equivalent fused-path stage timing.
///
/// Returns `[parse+CFG, shared_lower, taint_flow, build_eligible,
///           ast_queries, suppression, auth, run_cfg_state]` in µs, plus
/// the per-substage breakdown of `shared_lower` from the thread-local
/// timers in `taint::perf_lower_timings_*`.
///
/// Mirrors `analyse_file_fused`'s control flow so each chunk is timed
/// without the double-lowering overcount that `perf_stage_breakdown`
/// suffers (the latter calls `run_cfg_analyses` and
/// `extract_ssa_artifacts` separately, both of which lower).
#[doc(hidden)]
pub fn perf_stage_breakdown_fused(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
    global_summaries: Option<&crate::summary::GlobalSummaries>,
    scan_root: Option<&Path>,
) -> Option<([u128; 8], [u128; 7])> {
    use std::time::Instant;
    let s_parse = Instant::now();
    let source = ParsedSource::try_new(bytes, path).ok()??;
    let parsed = ParsedFile::from_source(source, cfg);
    let t_parse_cfg = s_parse.elapsed().as_micros();

    crate::taint::ssa_transfer::reset_path_safe_suppressed_spans();
    crate::taint::ssa_transfer::reset_all_validated_spans();
    crate::taint::perf_lower_timings_start();

    let s_lower = Instant::now();
    let (lowered_summaries, lowered_bodies) =
        parsed.lower_ssa_for_fused(global_summaries, scan_root, cfg.module_graph.as_deref());
    let t_lower = s_lower.elapsed().as_micros();
    let lower_breakdown = crate::taint::perf_lower_timings_take().unwrap_or([0; 7]);

    let s_taint = Instant::now();
    let taint_diags = parsed.run_cfg_analyses_with_lowered(
        cfg,
        global_summaries,
        scan_root,
        &lowered_summaries,
        &lowered_bodies,
    );
    let t_taint_flow = s_taint.elapsed().as_micros();

    let s_eligible = Instant::now();
    let _ = crate::taint::build_eligible_bodies(&parsed.file_cfg, lowered_bodies);
    let t_eligible = s_eligible.elapsed().as_micros();

    let s_ast = Instant::now();
    let ast_findings = parsed.source.run_ast_queries(cfg);
    let t_ast = s_ast.elapsed().as_micros();

    let s_suppr = Instant::now();
    let suppression =
        TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &taint_diags);
    let _filtered: Vec<_> = ast_findings
        .into_iter()
        .filter(|d| {
            !suppression.should_suppress(&d.id, d.line)
                && !suppression.is_redundant_ast_pattern(&d.id, d.line)
        })
        .collect();
    let t_suppr = s_suppr.elapsed().as_micros();

    let s_auth = Instant::now();
    let _ = parsed.run_auth_analyses(cfg, global_summaries, scan_root);
    let t_auth = s_auth.elapsed().as_micros();

    // 8th slot reserved (state-analysis breakdown if needed later);
    // currently included in t_taint_flow.
    let t_state = 0u128;

    Some((
        [
            t_parse_cfg,
            t_lower,
            t_taint_flow,
            t_eligible,
            t_ast,
            t_suppr,
            t_auth,
            t_state,
        ],
        lower_breakdown,
    ))
}

/// Diagnostic stage-timing helper for the perf audit.
///
/// Times each stage of pass 2 internally and returns µs counts.  Returns
/// `None` for unsupported languages.  Not used in production, just for
/// `tests/perf_breakdown.rs` to attribute time inside `run_rules_on_bytes`
/// without touching the hot path.
#[doc(hidden)]
pub fn perf_stage_breakdown(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
    global_summaries: Option<&crate::summary::GlobalSummaries>,
    scan_root: Option<&Path>,
) -> Option<[u128; 6]> {
    use std::time::Instant;
    let s_parse = Instant::now();
    let source = ParsedSource::try_new(bytes, path).ok()??;
    let parsed = ParsedFile::from_source(source, cfg);
    let t_parse_cfg = s_parse.elapsed().as_micros();

    let s_taint = Instant::now();
    let taint = parsed.run_cfg_analyses(cfg, global_summaries, scan_root);
    let t_taint = s_taint.elapsed().as_micros();

    let s_suppr = Instant::now();
    let _ = TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &taint);
    let t_suppr = s_suppr.elapsed().as_micros();

    let s_ast = Instant::now();
    let _ast_findings = parsed.source.run_ast_queries(cfg);
    let t_ast = s_ast.elapsed().as_micros();

    let s_auth = Instant::now();
    let _ = parsed.run_auth_analyses(cfg, global_summaries, scan_root);
    let t_auth = s_auth.elapsed().as_micros();

    let s_ssa = Instant::now();
    let _ = parsed.extract_ssa_artifacts(global_summaries, scan_root, cfg.module_graph.as_deref());
    let t_ssa = s_ssa.elapsed().as_micros();

    Some([t_parse_cfg, t_taint, t_suppr, t_ast, t_auth, t_ssa])
}

/// Extract both `FuncSummary` and `SsaFuncSummary` from pre-read bytes.
///
/// This is the shared pass-1 pipeline for indexed scans: parses once, builds
/// CFG once, and returns both summary types. Uses the same `ParsedFile`
/// pipeline as `analyse_file_fused`, no divergent extraction path.
pub fn extract_all_summaries_from_bytes(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
    scan_root: Option<&Path>,
) -> NyxResult<(
    Vec<FuncSummary>,
    Vec<(crate::symbol::FuncKey, SsaFuncSummary)>,
    Vec<(
        crate::symbol::FuncKey,
        crate::taint::ssa_transfer::CalleeSsaBody,
    )>,
    Vec<(
        crate::symbol::FuncKey,
        auth_analysis::model::AuthCheckSummary,
    )>,
    Option<(
        String,
        std::sync::Arc<HashMap<String, crate::symbol::FuncKey>>,
    )>,
)> {
    let _span = tracing::debug_span!("extract_all_summaries", file = %path.display()).entered();
    let Some(source) = ParsedSource::try_new(bytes, path)? else {
        return Ok((vec![], vec![], vec![], vec![], None));
    };
    let lang_slug = source.lang_slug;
    let parsed = ParsedFile::from_source(source, cfg);
    let func_summaries = parsed.export_summaries_with_root(scan_root);
    let (ssa_summaries, ssa_bodies) =
        parsed.extract_ssa_artifacts(None, scan_root, cfg.module_graph.as_deref());
    let auth_summaries = auth_analysis::extract_auth_summaries_by_key(
        &parsed.source.tree,
        parsed.source.bytes,
        lang_slug,
        parsed.source.path,
        cfg,
        scan_root,
    );
    let cross_package_imports = if parsed.file_cfg.resolved_imports.is_empty() {
        None
    } else {
        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
        let ns = crate::symbol::namespace_with_package(
            &parsed.source.file_path_str,
            scan_root_str.as_deref(),
            cfg.module_graph.as_deref(),
        );
        let caller_lang = Lang::from_slug(parsed.source.lang_slug).unwrap_or(Lang::Rust);
        let map = crate::taint::build_cross_package_func_keys(
            &parsed.file_cfg.resolved_imports,
            scan_root_str.as_deref(),
            cfg.module_graph.as_deref(),
            caller_lang,
        );
        if map.is_empty() {
            None
        } else {
            Some((ns, std::sync::Arc::new(map)))
        }
    };
    Ok((
        func_summaries,
        ssa_summaries,
        ssa_bodies,
        auth_summaries,
        cross_package_imports,
    ))
}

//  Constant-argument suppression helper

/// Returns `true` when the captured call node has only literal arguments
/// (string, number, boolean, null/nil/none), or identifier arguments that
/// resolve to a file-level scalar constant (`const NAME = "x"` at module
/// scope and equivalent in Java / Go / Python / Rust).  Used to suppress
/// AST pattern findings on provably-constant calls like
/// `os.system(DEFAULT_CMD)` where `DEFAULT_CMD = "ls -la"`.
///
/// Conservative: returns `false` whenever the tree structure is unclear or
/// any argument is non-literal (including interpolated strings).
fn is_call_all_args_literal(node: tree_sitter::Node, bytes: &[u8], lang_slug: &str) -> bool {
    // Walk upwards from the captured node to find the closest call_expression
    // (or similar) ancestor, then locate its argument list child.
    let call_node = find_enclosing_call(node);
    let call_node = match call_node {
        Some(n) => n,
        None => return false,
    };

    // Find the argument_list / arguments child of the call node.
    let arg_list = find_arg_list(call_node);
    let arg_list = match arg_list {
        Some(n) => n,
        None => return false,
    };

    // Build the file-level scalar binding set lazily: only resolve once per
    // call, never if every arg is a syntactic literal.  Cheap: walks the
    // file root's direct children for const / module-level assignment forms.
    let scalars = file_level_scalar_bindings(node, bytes, lang_slug);

    let mut has_any_arg = false;
    for i in 0..arg_list.named_child_count() as u32 {
        let child = match arg_list.named_child(i) {
            Some(c) => c,
            None => continue,
        };
        has_any_arg = true;
        if !is_literal_or_named_scalar(child, bytes, &scalars) {
            return false;
        }
    }

    // If the argument list is empty (no args), we conservatively do NOT
    // suppress, the danger may come from side effects, not arguments.
    has_any_arg
}

/// Walk up from `node` to the file root and collect every file-level scalar
/// binding name reachable on this language.  Empty set for languages without
/// a recognised binding form (JS/TS, Ruby, PHP, C/C++).
fn file_level_scalar_bindings(
    node: tree_sitter::Node,
    bytes: &[u8],
    lang_slug: &str,
) -> std::collections::HashSet<String> {
    let mut root = node;
    while let Some(p) = root.parent() {
        root = p;
    }
    crate::cfg::safe_fields::collect_class_constant_scalars(root, lang_slug, bytes)
        .into_keys()
        .collect()
}

/// Like [`is_literal_node`] but also accepts identifiers that resolve to a
/// file-level scalar binding (constant string / number / bool).
fn is_literal_or_named_scalar(
    node: tree_sitter::Node,
    bytes: &[u8],
    scalars: &std::collections::HashSet<String>,
) -> bool {
    if is_literal_node(node, bytes) {
        return true;
    }
    let kind = node.kind();
    // Identifier forms vary across grammars.  PHP / Ruby use `variable_name`;
    // every other supported language uses bare `identifier`.  An `argument`
    // wrapper (PHP / Go) lifts a single child — unwrap and recurse.
    match kind {
        "identifier" | "variable_name" => {
            let Ok(text) = std::str::from_utf8(&bytes[node.byte_range()]) else {
                return false;
            };
            scalars.contains(text)
        }
        "argument" => node
            .named_child(0)
            .is_some_and(|c| is_literal_or_named_scalar(c, bytes, scalars)),
        // Unary / binary forms over a scalar binding remain a literal-valued
        // expression at compile time.
        "unary_expression" | "unary_op" => node
            .named_child(0)
            .is_some_and(|c| is_literal_or_named_scalar(c, bytes, scalars)),
        "binary_expression" | "concatenated_string" => {
            node.named_child_count() >= 2
                && (0..node.named_child_count() as u32).all(|i| {
                    node.named_child(i)
                        .is_some_and(|c| is_literal_or_named_scalar(c, bytes, scalars))
                })
        }
        _ => false,
    }
}

/// Walk up to find a call-expression-like ancestor of the captured node.
/// Stops at statement/block boundaries to avoid matching unrelated outer calls.
fn find_enclosing_call(mut node: tree_sitter::Node) -> Option<tree_sitter::Node> {
    // The captured node may already be the call, or it could be the callee
    // identifier inside a call_expression.  Walk up a few levels.
    for _ in 0..4 {
        let kind = node.kind();
        if kind.contains("call") && !kind.contains("callee") {
            return Some(node);
        }
        // Java / PHP / C-family kinds that don't have "call" in their name
        // but represent the same call shape for arg-list inspection.
        if matches!(
            kind,
            "function_call_expression"
                | "method_invocation"
                | "object_creation_expression"
                | "explicit_constructor_invocation"
        ) {
            return Some(node);
        }
        // Stop at scope/statement boundaries, don't cross into outer calls
        if kind.contains("block")
            || kind.contains("body")
            || kind == "program"
            || kind == "module"
            || kind == "expression_statement"
        {
            return None;
        }
        node = node.parent()?;
    }
    None
}

/// Find the argument-list child of a call node across languages.
fn find_arg_list(call: tree_sitter::Node) -> Option<tree_sitter::Node> {
    for i in 0..call.child_count() as u32 {
        if let Some(child) = call.child(i) {
            let kind = child.kind();
            // Common argument list node kinds across languages:
            // Python/JS/TS/Java/Go/C/C++/Rust: argument_list / arguments
            // PHP: arguments
            // Ruby: argument_list
            if kind == "argument_list" || kind == "arguments" || kind == "actual_parameters" {
                return Some(child);
            }
        }
    }
    None
}

/// Check if a tree-sitter node represents a literal value.
fn is_literal_node(node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let kind = node.kind();
    match kind {
        // String literals, but Python's `string` node also covers
        // f-strings, which carry `interpolation` children.  An f-string
        // with interpolation is *not* a literal: it embeds arbitrary
        // expressions, so a sink call like `cursor.execute(f"…{x}")`
        // must not be suppressed under Layer A's "all-literal args"
        // shortcut.  Same shape applies to any tree-sitter grammar
        // that nests an `interpolation` (or `string_interpolation`)
        // child inside a string node.
        "string"
        | "string_literal"
        | "interpreted_string_literal"
        | "raw_string_literal"
        | "string_content"
        | "string_fragment" => !has_interpolation(node),

        // Numeric literals
        "integer" | "integer_literal" | "int_literal" | "float" | "float_literal" | "number" => {
            true
        }

        // Boolean / null / nil / none
        "true" | "false" | "null" | "nil" | "none" | "null_literal" | "boolean"
        | "boolean_literal" => true,

        // PHP encapsed_string: safe only if it has no variable interpolation
        "encapsed_string" => {
            // If it contains `$` variable interpolation nodes, it's not literal
            !has_interpolation(node)
        }

        // Wrapper nodes: PHP wraps each arg in an `argument` node,
        // Go uses `argument` too.  Unwrap and check the inner value.
        "argument" => {
            node.named_child_count() == 1
                && node
                    .named_child(0)
                    .is_some_and(|c| is_literal_node(c, bytes))
        }

        // Unary minus on a number literal: `-42`
        "unary_expression" | "unary_op" => {
            node.named_child_count() == 1
                && node
                    .named_child(0)
                    .is_some_and(|c| is_literal_node(c, bytes))
        }

        // String concatenation of literals: `"a" + "b"` or `"a" . "b"`
        "binary_expression" | "concatenated_string" => {
            node.named_child_count() >= 2
                && (0..node.named_child_count() as u32).all(|i| {
                    node.named_child(i)
                        .is_some_and(|c| is_literal_node(c, bytes))
                })
        }

        _ => false,
    }
}

/// PHP-only: returns `true` when the captured `include_expression` node is
/// `include $var` (or `require $var`, etc.) and `$var` is a formal parameter
/// of the immediately enclosing function / method / closure / arrow function,
/// with no assignment to `$var` between the function body start and the
/// include site.  This is the canonical PHP autoloader / scope-isolated
/// `Closure::bind(static function ($file) { include $file; }, ...)` shape;
/// composer's `ClassLoader::initializeIncludeClosure`, PSR-4 loaders, and
/// route-file loaders all match this.  The pattern rule is intentionally
/// heuristic (no taint), so a parameter pass-through is the broadest
/// safe-suppression boundary; if the caller passes a tainted value, the
/// engine's separate taint-unsanitised-flow rule still fires.
fn is_php_include_param_passthrough(include_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    // tree-sitter-php shape:
    //   include_expression
    //     variable_name
    //       name "<param>"
    let var_node = include_node.named_child(0);
    let Some(var_node) = var_node else {
        return false;
    };
    if var_node.kind() != "variable_name" {
        return false;
    }
    let name_node = var_node.named_child(0);
    let Some(name_node) = name_node else {
        return false;
    };
    let var_name = match std::str::from_utf8(&bytes[name_node.byte_range()]) {
        Ok(s) => s,
        Err(_) => return false,
    };

    // Walk up to the enclosing function/method/closure.
    let mut cur = include_node;
    while let Some(parent) = cur.parent() {
        match parent.kind() {
            "method_declaration"
            | "function_definition"
            | "anonymous_function"
            | "anonymous_function_creation_expression"
            | "arrow_function" => {
                let params = parent
                    .child_by_field_name("parameters")
                    .or_else(|| find_named_child_of_kind(parent, "formal_parameters"));
                let Some(params) = params else {
                    return false;
                };
                if !param_list_contains_name(params, var_name, bytes) {
                    return false;
                }
                // Reassignment guard: if the variable is reassigned inside the
                // function body before the include, the parameter-pass-through
                // assumption breaks down.
                let body = parent
                    .child_by_field_name("body")
                    .or_else(|| find_named_child_of_kind(parent, "compound_statement"));
                let body_start = body.map(|b| b.start_byte()).unwrap_or(parent.start_byte());
                if is_var_reassigned_before(
                    body.unwrap_or(parent),
                    var_name,
                    include_node.start_byte(),
                    body_start,
                    bytes,
                ) {
                    return false;
                }
                return true;
            }
            // Stop at class/program scope without a matching function, bare
            // top-level `include $var` does not benefit from this guard.
            "program" | "class_declaration" | "trait_declaration" | "interface_declaration" => {
                return false;
            }
            _ => {}
        }
        cur = parent;
    }
    false
}

fn find_named_child_of_kind<'a>(
    parent: tree_sitter::Node<'a>,
    kind: &str,
) -> Option<tree_sitter::Node<'a>> {
    for i in 0..parent.named_child_count() as u32 {
        if let Some(child) = parent.named_child(i)
            && child.kind() == kind
        {
            return Some(child);
        }
    }
    None
}

fn param_list_contains_name(params: tree_sitter::Node, target_name: &str, bytes: &[u8]) -> bool {
    for i in 0..params.named_child_count() as u32 {
        let Some(param) = params.named_child(i) else {
            continue;
        };
        if !matches!(
            param.kind(),
            "simple_parameter"
                | "variadic_parameter"
                | "property_promotion_parameter"
                | "promoted_constructor_parameter"
        ) {
            continue;
        }
        // simple_parameter has a `variable_name` child whose `name` child is the bare ident.
        let var_node = param
            .child_by_field_name("name")
            .or_else(|| find_named_child_of_kind(param, "variable_name"));
        let Some(var_node) = var_node else {
            continue;
        };
        let name_node = if var_node.kind() == "variable_name" {
            var_node.named_child(0)
        } else {
            Some(var_node)
        };
        let Some(name_node) = name_node else {
            continue;
        };
        if let Ok(name) = std::str::from_utf8(&bytes[name_node.byte_range()])
            && name == target_name
        {
            return true;
        }
    }
    false
}

/// Walk the function body looking for any `assignment_expression` whose LHS
/// names `target_name`, between `body_start` (inclusive) and `before_byte`
/// (exclusive).  Crosses nested scopes (closures inside the function are
/// rare in this idiom, and reassignment inside them wouldn't shadow the
/// outer parameter).
fn is_var_reassigned_before(
    root: tree_sitter::Node,
    target_name: &str,
    before_byte: usize,
    body_start: usize,
    bytes: &[u8],
) -> bool {
    let mut stack = vec![root];
    while let Some(node) = stack.pop() {
        if node.start_byte() >= before_byte {
            continue;
        }
        if node.end_byte() <= body_start {
            continue;
        }
        if node.kind() == "assignment_expression" {
            // LHS is the first named child (or the `left` field in newer grammars).
            let lhs = node
                .child_by_field_name("left")
                .or_else(|| node.named_child(0));
            if let Some(lhs) = lhs
                && lhs.kind() == "variable_name"
                && let Some(n) = lhs.named_child(0)
                && let Ok(s) = std::str::from_utf8(&bytes[n.byte_range()])
                && s == target_name
            {
                return true;
            }
        }
        for i in 0..node.named_child_count() as u32 {
            if let Some(c) = node.named_child(i) {
                stack.push(c);
            }
        }
    }
    false
}

/// PHP-only: returns `true` when the captured `function_call_expression`
/// node is `unserialize($x, [..., 'allowed_classes' => <ARRAY|false>, ...])`.
/// This is the canonical PHP 7+ structural mitigation against object
/// injection, explicitly restricting which classes the deserialiser may
/// instantiate.  Only suppress when the option is either:
///
///   - `'allowed_classes' => false`           (no class instantiation), or
///   - `'allowed_classes' => [Foo::class]`    (an array literal allow-list).
///
/// `'allowed_classes' => true` (the unsafe default) and dynamic values
/// (`'allowed_classes' => $opts`) leave the finding in place.
fn is_php_unserialize_allowed_classes_restricted(
    cap_node: tree_sitter::Node,
    bytes: &[u8],
) -> bool {
    // The pattern captures `@n` (the function name) at index 0, so walk up
    // to the enclosing function_call_expression.
    let call_node = if cap_node.kind() == "function_call_expression" {
        cap_node
    } else {
        let mut cur = cap_node;
        let mut found = None;
        for _ in 0..4 {
            if cur.kind() == "function_call_expression" {
                found = Some(cur);
                break;
            }
            match cur.parent() {
                Some(p) => cur = p,
                None => break,
            }
        }
        match found {
            Some(c) => c,
            None => return false,
        }
    };
    let arg_list = find_named_child_of_kind(call_node, "arguments");
    let Some(arg_list) = arg_list else {
        return false;
    };
    // arg 0 is the data; arg 1 is the options array.
    let mut args = Vec::new();
    for i in 0..arg_list.named_child_count() as u32 {
        if let Some(c) = arg_list.named_child(i)
            && c.kind() == "argument"
        {
            args.push(c);
        }
    }
    if args.len() < 2 {
        return false;
    }
    // Unwrap the `argument` wrapper to its inner expression.
    let opts = args[1].named_child(0);
    let Some(opts) = opts else { return false };
    if opts.kind() != "array_creation_expression" {
        return false;
    }
    // Walk array_element_initializer children looking for the
    // 'allowed_classes' key.
    for i in 0..opts.named_child_count() as u32 {
        let Some(elem) = opts.named_child(i) else {
            continue;
        };
        if elem.kind() != "array_element_initializer" {
            continue;
        }
        // Two named children: key, value.
        if elem.named_child_count() < 2 {
            continue;
        }
        let key = elem.named_child(0);
        let value = elem.named_child(1);
        let (Some(key), Some(value)) = (key, value) else {
            continue;
        };
        if !is_string_literal_with_text(key, "allowed_classes", bytes) {
            continue;
        }
        // Accept structural mitigation forms.  The intent signal is
        // "developer explicitly set allowed_classes to something other than
        // `true`":
        //   - boolean `false`            , no class instantiation at all
        //   - array literal              , explicit allow-list
        //   - class-constant reference   , `self::ALLOWED_CLASSES` /
        //                                    `Foo::CONSTANTS` resolved to
        //                                    a const array; engine cannot
        //                                    statically inspect, but the
        //                                    explicit option already
        //                                    distinguishes safe usage from
        //                                    the unsafe default.
        match value.kind() {
            "boolean" => {
                if let Ok(s) = std::str::from_utf8(&bytes[value.byte_range()])
                    && s.eq_ignore_ascii_case("false")
                {
                    return true;
                }
            }
            "array_creation_expression"
            | "class_constant_access_expression"
            | "scoped_property_access_expression" => return true,
            _ => {}
        }
    }
    false
}

/// PHP-only: returns `true` when the captured `function_call_expression`
/// is the canonical `Serializable::unserialize($input)` magic-method
/// pass-through — i.e. the call is inside a `method_declaration` named
/// exactly `unserialize` (PHP method names are case-insensitive) with
/// one formal parameter, and the call's single argument is the bare
/// parameter variable.
///
/// **Why this is a non-actionable site for `php.deser.unserialize`:**
/// `Serializable::unserialize($input)` is an interface contract method
/// that PHP itself invokes when restoring an instance via the runtime
/// `\unserialize($bytes)` machinery.  The implementation MUST decode
/// `$input` (the body's `\unserialize(...)` call) — there is no
/// "safer" rewrite that preserves the contract.  The actionable signal
/// is at the class level (the class implements the deprecated
/// `Serializable` interface — fix is to migrate to `__serialize` /
/// `__unserialize`), not at this call site.
///
/// Conservative recognition:
/// - method must be a `method_declaration` (NOT a free `function_definition` —
///   the magic semantics only apply to instance methods)
/// - method name == `unserialize` (case-insensitive)
/// - exactly 1 formal parameter
/// - call has exactly 1 argument
/// - argument's inner expression is a `variable_name` whose name equals the
///   formal parameter's name
///
/// Genuine deserialization sinks (free `unserialize($_GET[...])`, helpers
/// reading from session/cache and passing through, etc.) keep firing
/// because they are not inside a method declaration named `unserialize`.
fn is_php_unserialize_magic_method_passthrough(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    // The pattern captures `@n` (the function name); locate the enclosing
    // function_call_expression.
    let call_node = if cap_node.kind() == "function_call_expression" {
        cap_node
    } else {
        let mut cur = cap_node;
        let mut found = None;
        for _ in 0..4 {
            if cur.kind() == "function_call_expression" {
                found = Some(cur);
                break;
            }
            match cur.parent() {
                Some(p) => cur = p,
                None => break,
            }
        }
        match found {
            Some(c) => c,
            None => return false,
        }
    };

    // Walk up to the nearest method_declaration.  Stop at any other
    // function-introducing scope (free function, closure, arrow) — those
    // are not the Serializable contract.
    let mut cur = call_node;
    let method = loop {
        let Some(parent) = cur.parent() else {
            return false;
        };
        match parent.kind() {
            "method_declaration" => break parent,
            "function_definition"
            | "anonymous_function"
            | "anonymous_function_creation_expression"
            | "arrow_function"
            | "program" => return false,
            _ => {}
        }
        cur = parent;
    };

    // Method name must be exactly `unserialize` (case-insensitive).
    let Some(name_node) = method
        .child_by_field_name("name")
        .or_else(|| find_named_child_of_kind(method, "name"))
    else {
        return false;
    };
    let Ok(method_name) = std::str::from_utf8(&bytes[name_node.byte_range()]) else {
        return false;
    };
    if !method_name.eq_ignore_ascii_case("unserialize") {
        return false;
    }

    // Method must have exactly 1 formal parameter; capture its bare name.
    let Some(params) = method
        .child_by_field_name("parameters")
        .or_else(|| find_named_child_of_kind(method, "formal_parameters"))
    else {
        return false;
    };
    let mut formal_params: Vec<tree_sitter::Node> = Vec::new();
    for i in 0..params.named_child_count() as u32 {
        if let Some(p) = params.named_child(i)
            && matches!(
                p.kind(),
                "simple_parameter"
                    | "variadic_parameter"
                    | "property_promotion_parameter"
                    | "promoted_constructor_parameter"
            )
        {
            formal_params.push(p);
        }
    }
    if formal_params.len() != 1 {
        return false;
    }
    let param = formal_params[0];
    let var_node = param
        .child_by_field_name("name")
        .or_else(|| find_named_child_of_kind(param, "variable_name"));
    let Some(var_node) = var_node else {
        return false;
    };
    let inner_name_node = if var_node.kind() == "variable_name" {
        var_node.named_child(0)
    } else {
        Some(var_node)
    };
    let Some(inner_name_node) = inner_name_node else {
        return false;
    };
    let Ok(param_name) = std::str::from_utf8(&bytes[inner_name_node.byte_range()]) else {
        return false;
    };

    // Call must have exactly 1 argument that is the bare parameter variable.
    let Some(arg_list) = find_named_child_of_kind(call_node, "arguments") else {
        return false;
    };
    let mut args: Vec<tree_sitter::Node> = Vec::new();
    for i in 0..arg_list.named_child_count() as u32 {
        if let Some(c) = arg_list.named_child(i)
            && c.kind() == "argument"
        {
            args.push(c);
        }
    }
    if args.len() != 1 {
        return false;
    }
    let inner = args[0].named_child(0);
    let Some(inner) = inner else { return false };
    if inner.kind() != "variable_name" {
        return false;
    }
    let Some(arg_name_node) = inner.named_child(0) else {
        return false;
    };
    let Ok(arg_name) = std::str::from_utf8(&bytes[arg_name_node.byte_range()]) else {
        return false;
    };
    arg_name == param_name
}

/// PHP-only Layer C3: returns `true` when an `unserialize($x)` call
/// site is the second (or later) argument of a PHPUnit assertion call
/// whose first (expected) argument is a literal expression
/// (scalar, array literal, class constant access, or unary on a
/// literal).
///
/// **Why this is a non-actionable site for `php.deser.unserialize`:**
/// PHPUnit's `assertSame($expected, $actual)` /
/// `assertEquals(...)` / `assertNull(...)` family bound the
/// `unserialize` result to the literal expected value: if the
/// `$blob` argument were attacker-controlled and produced a
/// different shape, the assertion would fail loudly rather than
/// permit any object-injection side effect to escape the test
/// boundary.  Drupal, Joomla, and Nextcloud each carry tens of
/// these `Serializable` / cache / session round-trip tests and
/// every firing is noise; the actionable signal lives at the
/// production call sites that thread real input through
/// `unserialize` without an assertion sandwich.
///
/// Conservative recognition:
/// - the `unserialize(...)` call must be wrapped in an `argument`
///   node whose parent is `arguments`
/// - the enclosing call must be a `member_call_expression`,
///   `nullsafe_member_call_expression`, `scoped_call_expression`,
///   or `function_call_expression` with a method/function name
///   starting with `assert` (case-insensitive) — covers the entire
///   PHPUnit assertion family
/// - the assertion must have at least two argument slots (an
///   expected/actual pair)
/// - the first argument's inner expression must be a literal: a
///   string / number / boolean / null literal, an
///   `array_creation_expression` whose elements are recursively
///   literal, a `class_constant_access_expression`, or a unary
///   sign on one of the above
///
/// Genuine production sites (`unserialize($_GET[...])`, helpers
/// reading from session/cache and handing the value to caller
/// code) keep firing because they are not wrapped in a PHPUnit
/// assertion.  Single-argument assertions (`assertNotNull($x)`)
/// and assertions whose expected value is itself dynamic
/// (`assertEquals($computed, unserialize($blob))`) keep firing
/// because the bound is not statically verifiable.
fn is_php_unserialize_inside_phpunit_assertion(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    // The pattern captures `@n` (the function name); locate the enclosing
    // function_call_expression.  Mirrors the magic-method recogniser.
    let call_node = if cap_node.kind() == "function_call_expression" {
        cap_node
    } else {
        let mut cur = cap_node;
        let mut found = None;
        for _ in 0..4 {
            if cur.kind() == "function_call_expression" {
                found = Some(cur);
                break;
            }
            match cur.parent() {
                Some(p) => cur = p,
                None => break,
            }
        }
        match found {
            Some(c) => c,
            None => return false,
        }
    };

    // The unserialize call must sit directly inside an `argument` wrapper
    // that is itself inside an `arguments` list.  Reject any wrapping
    // expression (binary, conditional, etc.) — those break the literal
    // bounding the assertion provides.
    let Some(arg_wrapper) = call_node.parent() else {
        return false;
    };
    if arg_wrapper.kind() != "argument" {
        return false;
    }
    let Some(arguments) = arg_wrapper.parent() else {
        return false;
    };
    if arguments.kind() != "arguments" {
        return false;
    }
    let Some(assertion_call) = arguments.parent() else {
        return false;
    };
    if !matches!(
        assertion_call.kind(),
        "member_call_expression"
            | "nullsafe_member_call_expression"
            | "scoped_call_expression"
            | "function_call_expression"
    ) {
        return false;
    }

    // Method/function name must start with `assert` (case-insensitive).
    let name_node = assertion_call
        .child_by_field_name("name")
        .or_else(|| find_named_child_of_kind(assertion_call, "name"));
    let Some(name_node) = name_node else {
        return false;
    };
    let Ok(method_name) = std::str::from_utf8(&bytes[name_node.byte_range()]) else {
        return false;
    };
    if !method_name
        .chars()
        .take(6)
        .collect::<String>()
        .eq_ignore_ascii_case("assert")
    {
        return false;
    }

    // Collect the assertion's argument wrappers.
    let mut args: Vec<tree_sitter::Node> = Vec::new();
    for i in 0..arguments.named_child_count() as u32 {
        if let Some(c) = arguments.named_child(i)
            && c.kind() == "argument"
        {
            args.push(c);
        }
    }
    if args.is_empty() {
        return false;
    }

    // Single-arg assertions: the verb itself bounds the result
    // (`assertNull`, `assertIsArray`, `assertTrue`, ...).  Restrict to
    // a curated set so generic `assertSomething(unserialize($x))`
    // helpers without a documented bound don't qualify.
    if args.len() == 1 {
        return is_phpunit_single_arg_bounding_verb(method_name);
    }

    // Multi-arg assertions: the first argument is the expected /
    // literal-pinned value (PHPUnit's documented `$expected, $actual`
    // order).  The expected must be a static literal expression.
    let Some(first_inner) = args[0].named_child(0) else {
        return false;
    };
    is_php_assertion_literal_expected(first_inner, bytes)
}

/// PHPUnit single-arg assertion verbs whose name itself constrains
/// the inspected value to a known type or constant.  When
/// `unserialize($x)` is the sole argument to one of these, a failed
/// assertion aborts the test rather than letting an object-injection
/// side effect escape.
fn is_phpunit_single_arg_bounding_verb(name: &str) -> bool {
    matches!(
        name.to_ascii_lowercase().as_str(),
        "assertnull"
            | "assertnotnull"
            | "assertempty"
            | "assertnotempty"
            | "asserttrue"
            | "assertfalse"
            | "assertnan"
            | "assertfinite"
            | "assertinfinite"
            | "assertisarray"
            | "assertisnotarray"
            | "assertisbool"
            | "assertisnotbool"
            | "assertiscallable"
            | "assertisnotcallable"
            | "assertisfloat"
            | "assertisnotfloat"
            | "assertisint"
            | "assertisnotint"
            | "assertisiterable"
            | "assertisnotiterable"
            | "assertisnumeric"
            | "assertisnotnumeric"
            | "assertisobject"
            | "assertisnotobject"
            | "assertisresource"
            | "assertisnotresource"
            | "assertisclosedresource"
            | "assertisnotclosedresource"
            | "assertisstring"
            | "assertisnotstring"
            | "assertisscalar"
            | "assertisnotscalar"
    )
}

/// PHP-only helper: returns `true` if `node` is a statically literal
/// expression suitable as the "expected" argument of a PHPUnit
/// assertion.  Recursive: array elements must themselves be literal.
/// Class constants (`Foo::BAR`) count as literal — they resolve to
/// build-time values and PHPUnit treats them as expected pinning.
fn is_php_assertion_literal_expected(node: tree_sitter::Node, bytes: &[u8]) -> bool {
    match node.kind() {
        "string"
        | "integer"
        | "float"
        | "boolean"
        | "null"
        | "true"
        | "false"
        | "class_constant_access_expression"
        | "scoped_property_access_expression" => true,
        "encapsed_string" => !has_interpolation(node),
        "unary_op_expression" => node
            .named_child(0)
            .is_some_and(|c| is_php_assertion_literal_expected(c, bytes)),
        "array_creation_expression" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(child) = node.named_child(i) else {
                    return false;
                };
                if child.kind() != "array_element_initializer" {
                    return false;
                }
                // array_element_initializer can have one (value) or
                // two (key, value) named children; both must be literal.
                for j in 0..child.named_child_count() as u32 {
                    let Some(grand) = child.named_child(j) else {
                        return false;
                    };
                    if !is_php_assertion_literal_expected(grand, bytes) {
                        return false;
                    }
                }
            }
            true
        }
        _ => false,
    }
}

/// Python-only Layer C4: returns `true` when a deserialization call
/// (`pickle.loads`, `yaml.load`, `shelve.open`, etc.) sits inside a
/// test assertion that bounds the result to a literal-expected shape.
///
/// Two assertion idioms are recognised:
/// 1. `unittest.TestCase` style — `self.assertEqual(LITERAL, pickle.loads(b))`,
///    `self.assertIsNone(pickle.loads(b))`, etc.
/// 2. pytest plain `assert` — `assert pickle.loads(b) == LITERAL`,
///    `assert pickle.loads(b) is None`, `assert isinstance(pickle.loads(b),
///    dict)`, `assert pickle.loads(b)` (truthy), `assert not
///    pickle.loads(b)` (falsy).
///
/// **Why this is a non-actionable site:** the assertion bounds the
/// deser result to a literal expected; if the blob argument were
/// attacker-controlled and produced a different shape, the assertion
/// would fail loudly rather than permit any object-injection side
/// effect to escape the test boundary.  Python projects ship
/// round-trip tests for every pickled / YAML-loaded data class, and
/// every firing on those test bodies is noise.
///
/// Conservative recognition:
/// - the deser call must reach the assertion through allowed wrappers
///   only (parenthesized_expression, comparison_operator with literal
///   counterpart, unary `not`, `isinstance(_, TYPE)`, `bool` / `len` /
///   `type` / `id` single-arg wrap); boolean ops and conditional
///   expressions break the bound and reject.
/// - unittest verbs must start with `assert` or `fail` (case-sensitive
///   per Python conventions) and pass the curated single-arg / multi-
///   arg bounding tables.
/// - pytest plain `assert` requires the deser to be the asserted
///   expression (named_child(0) of `assert_statement`), not the
///   optional message at named_child(1).
fn is_python_deser_inside_unittest_assertion(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    // Three entry shapes:
    //   (a) unittest AST-pattern: `cap_node` is the `pickle` / `yaml` /
    //       `shelve` identifier under the deser call's `function.object`
    //       path.  Walk up to the deser call, then up to an outer
    //       assertion call via `argument_list`.
    //   (b) unittest CFG-emission: `cap_node` is somewhere inside the
    //       OUTER assertion call (`self.assertEqual(...)`).  Look for a
    //       deser sub-call inside its argument_list.
    //   (c) pytest plain-assert: `cap_node` resolves to the deser call,
    //       which sits directly under an `assert_statement` (possibly
    //       through allowed bounding wrappers).
    let enclosing_call = find_enclosing_call(cap_node);
    let Some(enclosing_call) = enclosing_call else {
        return false;
    };

    // Path (a)/(c): enclosing call IS the deser.
    if is_python_deser_call(enclosing_call, bytes) {
        // (a) walk to outer call assertion via argument_list.
        if let Some(arg_list) = enclosing_call.parent()
            && arg_list.kind() == "argument_list"
            && let Some(assertion_call) = arg_list.parent()
            && assertion_call.kind() == "call"
            && python_assertion_bounds_deser(assertion_call, enclosing_call, bytes)
        {
            return true;
        }
        // (c) walk up to assert_statement through allowed wrappers.
        if python_pytest_assert_bounds_deser(enclosing_call, bytes) {
            return true;
        }
        return false;
    }

    // Path (b): enclosing call IS an assertion that wraps a deser arg.
    if let Some(deser_call) = python_find_direct_deser_arg(enclosing_call, bytes) {
        return python_assertion_bounds_deser(enclosing_call, deser_call, bytes);
    }

    false
}

/// Search the assertion call's argument_list for a direct child that
/// is a recognised deserialization call.  Direct child only — wrapped
/// expressions (binary, conditional, parenthesized) break the literal
/// bound and must keep firing.
fn python_find_direct_deser_arg<'tree>(
    assertion_call: tree_sitter::Node<'tree>,
    bytes: &[u8],
) -> Option<tree_sitter::Node<'tree>> {
    let arg_list = assertion_call.child_by_field_name("arguments")?;
    if arg_list.kind() != "argument_list" {
        return None;
    }
    for i in 0..arg_list.named_child_count() as u32 {
        let Some(c) = arg_list.named_child(i) else {
            continue;
        };
        if c.kind() == "call" && is_python_deser_call(c, bytes) {
            return Some(c);
        }
    }
    None
}

/// Core bounding check: given an assertion `call` node and the
/// deser sub-call inside its arg list, decide whether the assertion
/// bounds the deser result so the call is non-actionable.
fn python_assertion_bounds_deser(
    assertion_call: tree_sitter::Node,
    deser_call: tree_sitter::Node,
    bytes: &[u8],
) -> bool {
    let Some(func) = assertion_call.child_by_field_name("function") else {
        return false;
    };
    let name_node = match func.kind() {
        "attribute" => func
            .child_by_field_name("attribute")
            .or_else(|| find_named_child_of_kind(func, "identifier")),
        "identifier" => Some(func),
        _ => return false,
    };
    let Some(name_node) = name_node else {
        return false;
    };
    let Ok(verb) = std::str::from_utf8(&bytes[name_node.byte_range()]) else {
        return false;
    };
    let lowered = verb.to_ascii_lowercase();
    if !(lowered.starts_with("assert") || lowered.starts_with("fail")) {
        return false;
    }

    let Some(arg_list) = assertion_call.child_by_field_name("arguments") else {
        return false;
    };
    if arg_list.kind() != "argument_list" {
        return false;
    }
    let mut pos_args: Vec<tree_sitter::Node> = Vec::new();
    let mut deser_pos: Option<usize> = None;
    for i in 0..arg_list.named_child_count() as u32 {
        let Some(c) = arg_list.named_child(i) else {
            continue;
        };
        if c.kind() == "keyword_argument" {
            continue;
        }
        if c.id() == deser_call.id() {
            deser_pos = Some(pos_args.len());
        }
        pos_args.push(c);
    }
    let Some(deser_pos) = deser_pos else {
        return false;
    };
    if pos_args.is_empty() {
        return false;
    }

    if pos_args.len() == 1 {
        return is_python_unittest_single_arg_bounding_verb(verb);
    }

    if matches!(verb, "assertIsInstance" | "assertNotIsInstance") {
        let type_pos = if deser_pos == 0 { 1 } else { 0 };
        if let Some(type_arg) = pos_args.get(type_pos)
            && is_python_type_reference(*type_arg)
        {
            return true;
        }
    }

    if !is_python_unittest_multi_arg_bounding_verb(verb) {
        return false;
    }
    for (i, arg) in pos_args.iter().enumerate() {
        if i == deser_pos {
            continue;
        }
        if is_python_assertion_literal_expected(*arg, bytes) {
            return true;
        }
    }
    false
}

/// pytest plain-`assert` bounding check.  `deser_call` must be the
/// recognised deser invocation; we walk upward through allowed
/// wrappers until we reach an `assert_statement` whose first named
/// child (the asserted expression, NOT the optional message) is the
/// chain we walked.  Boolean operators and conditional expressions
/// break the bound (they can short-circuit past the assertion).
fn python_pytest_assert_bounds_deser(deser_call: tree_sitter::Node, bytes: &[u8]) -> bool {
    let mut cur = deser_call;
    for _ in 0..8 {
        let Some(parent) = cur.parent() else {
            return false;
        };
        match parent.kind() {
            "assert_statement" => {
                // Asserted expression sits at named_child(0); the
                // optional message sits at named_child(1).
                let first = parent.named_child(0);
                return first.is_some_and(|n| n.id() == cur.id());
            }
            "comparison_operator" => {
                if !python_comparison_other_side_is_literal(parent, cur, bytes) {
                    return false;
                }
                cur = parent;
            }
            // `not deser` parses as `not_operator`; `+/-/~ deser` as
            // `unary_operator`.  Both leave the deser-side as the sole
            // operand and bound the assertion result to a scalar.
            "unary_operator" | "not_operator" => {
                cur = parent;
            }
            "parenthesized_expression" => {
                cur = parent;
            }
            "argument_list" => {
                let Some(parent_call) = parent.parent() else {
                    return false;
                };
                if parent_call.kind() != "call" {
                    return false;
                }
                let Some(func) = parent_call.child_by_field_name("function") else {
                    return false;
                };
                if func.kind() != "identifier" {
                    return false;
                }
                let Ok(name) = std::str::from_utf8(&bytes[func.byte_range()]) else {
                    return false;
                };
                match name {
                    "isinstance" => {
                        // isinstance(deser, TYPE) — deser must be at
                        // positional index 0 and the second positional
                        // arg must be a type reference.
                        let mut pos = 0usize;
                        let mut found_at: Option<usize> = None;
                        let mut other_args: Vec<tree_sitter::Node> = Vec::new();
                        for i in 0..parent.named_child_count() as u32 {
                            let Some(c) = parent.named_child(i) else {
                                return false;
                            };
                            if c.kind() == "keyword_argument" {
                                continue;
                            }
                            if c.id() == cur.id() {
                                found_at = Some(pos);
                            } else {
                                other_args.push(c);
                            }
                            pos += 1;
                        }
                        if found_at != Some(0)
                            || other_args.len() != 1
                            || !is_python_type_reference(other_args[0])
                        {
                            return false;
                        }
                    }
                    "bool" | "len" | "type" | "id" => {
                        // bool(deser) / len(deser) / type(deser) /
                        // id(deser) — single-arg scalar wrappers.
                        let mut named_count = 0usize;
                        for i in 0..parent.named_child_count() as u32 {
                            let Some(c) = parent.named_child(i) else {
                                return false;
                            };
                            if c.kind() == "keyword_argument" {
                                continue;
                            }
                            named_count += 1;
                        }
                        if named_count != 1 {
                            return false;
                        }
                    }
                    _ => return false,
                }
                cur = parent_call;
            }
            // Boolean ops and conditionals can short-circuit and let
            // a poisoned blob's side effect run before the assertion
            // fires.  Reject so the original finding stands.
            "boolean_operator" | "conditional_expression" => return false,
            _ => return false,
        }
    }
    false
}

/// `comparison_operator` bounding: the other operand(s) must all be
/// literal expressions (recursive literal classifier).  Operator-kind
/// children (`is` / `is_not` / `in` / `not_in` are named in
/// tree-sitter-python) are skipped.  Also requires `deser_side` to
/// actually be one of the named children, defending against unrelated
/// chained comparisons.
fn python_comparison_other_side_is_literal(
    cmp: tree_sitter::Node,
    deser_side: tree_sitter::Node,
    bytes: &[u8],
) -> bool {
    let mut found_self = false;
    for i in 0..cmp.named_child_count() as u32 {
        let Some(c) = cmp.named_child(i) else {
            return false;
        };
        match c.kind() {
            "is" | "is_not" | "in" | "not_in" => continue,
            _ => {}
        }
        if c.id() == deser_side.id() {
            found_self = true;
            continue;
        }
        if !is_python_assertion_literal_expected(c, bytes) {
            return false;
        }
    }
    found_self
}

/// Returns `true` when `call_node` is a Python `call` whose callee
/// is a recognised deserialization function (`pickle.loads` /
/// `pickle.load` / `yaml.load` / `shelve.open` / `marshal.loads` /
/// `marshal.load`).  Plain identifier callees (`loads(blob)` after
/// `from pickle import loads`) are also recognised by leaf name to
/// match the import-shape ambiguity.
fn is_python_deser_call(call_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let Some(func) = call_node.child_by_field_name("function") else {
        return false;
    };
    match func.kind() {
        "attribute" => {
            let Some(obj) = func.child_by_field_name("object") else {
                return false;
            };
            let Some(attr) = func.child_by_field_name("attribute") else {
                return false;
            };
            let Ok(obj_text) = std::str::from_utf8(&bytes[obj.byte_range()]) else {
                return false;
            };
            let Ok(attr_text) = std::str::from_utf8(&bytes[attr.byte_range()]) else {
                return false;
            };
            matches!(
                (obj_text, attr_text),
                ("pickle", "loads")
                    | ("pickle", "load")
                    | ("cPickle", "loads")
                    | ("cPickle", "load")
                    | ("yaml", "load")
                    | ("yaml", "unsafe_load")
                    | ("shelve", "open")
                    | ("marshal", "loads")
                    | ("marshal", "load")
            )
        }
        "identifier" => {
            let Ok(name) = std::str::from_utf8(&bytes[func.byte_range()]) else {
                return false;
            };
            matches!(name, "loads" | "load" | "unsafe_load")
        }
        _ => false,
    }
}

/// Single-arg `unittest.TestCase` assertion verbs whose name itself
/// constrains the inspected value.  When the deser call is the sole
/// positional argument to one of these, a failed assertion aborts
/// the test rather than letting an object-injection side effect
/// escape.
fn is_python_unittest_single_arg_bounding_verb(name: &str) -> bool {
    matches!(
        name,
        "assertIsNone"
            | "assertIsNotNone"
            | "assertTrue"
            | "assertFalse"
            | "assertNotNone"
            | "assertNone"
            | "failIf"
            | "failUnless"
            | "assert_"
    )
}

/// Multi-arg `unittest.TestCase` assertion verbs that perform a
/// literal-comparable bound on every value position (equality,
/// ordering, membership, regex match, type-equality).
fn is_python_unittest_multi_arg_bounding_verb(name: &str) -> bool {
    matches!(
        name,
        "assertEqual"
            | "assertEquals"
            | "assertNotEqual"
            | "assertNotEquals"
            | "assert_equal"
            | "assert_not_equal"
            | "assertIs"
            | "assertIsNot"
            | "assertAlmostEqual"
            | "assertNotAlmostEqual"
            | "assertGreater"
            | "assertGreaterEqual"
            | "assertLess"
            | "assertLessEqual"
            | "assertListEqual"
            | "assertTupleEqual"
            | "assertDictEqual"
            | "assertSetEqual"
            | "assertSequenceEqual"
            | "assertMultiLineEqual"
            | "assertCountEqual"
            | "assertItemsEqual"
            | "assertIn"
            | "assertNotIn"
            | "assertRegex"
            | "assertNotRegex"
            | "assertRegexpMatches"
            | "assertNotRegexpMatches"
            | "failUnlessEqual"
            | "failIfEqual"
    )
}

/// Recognise a Python type reference suitable as the second arg to
/// `assertIsInstance(value, type)`.  Accepts builtin/user-class
/// identifiers, dotted attribute access (`module.Type`), generic
/// subscripts (`list[int]`), and tuples-of-types.
fn is_python_type_reference(node: tree_sitter::Node) -> bool {
    match node.kind() {
        "identifier" | "attribute" | "subscript" => true,
        "tuple" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(c) = node.named_child(i) else {
                    return false;
                };
                if !is_python_type_reference(c) {
                    return false;
                }
            }
            true
        }
        _ => false,
    }
}

/// Python literal expression suitable as the "expected" argument of
/// a `unittest.TestCase.assertEqual`-family assertion.  Recursive:
/// list / tuple / set / dict elements and unary signs on numerics
/// must themselves be literal.  Identifier references and attribute
/// access do NOT count (those could resolve to dynamic values).
fn is_python_assertion_literal_expected(node: tree_sitter::Node, bytes: &[u8]) -> bool {
    match node.kind() {
        "string" => !has_python_string_interpolation(node),
        "concatenated_string" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(c) = node.named_child(i) else {
                    return false;
                };
                if !is_python_assertion_literal_expected(c, bytes) {
                    return false;
                }
            }
            true
        }
        "integer" | "float" | "true" | "false" | "none" | "ellipsis" => true,
        "unary_operator" => node
            .named_child(0)
            .is_some_and(|c| is_python_assertion_literal_expected(c, bytes)),
        "list" | "tuple" | "set" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(c) = node.named_child(i) else {
                    return false;
                };
                if !is_python_assertion_literal_expected(c, bytes) {
                    return false;
                }
            }
            true
        }
        "dictionary" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(c) = node.named_child(i) else {
                    return false;
                };
                if c.kind() != "pair" {
                    return false;
                }
                let Some(key) = c.child_by_field_name("key") else {
                    return false;
                };
                let Some(value) = c.child_by_field_name("value") else {
                    return false;
                };
                if !is_python_assertion_literal_expected(key, bytes) {
                    return false;
                }
                if !is_python_assertion_literal_expected(value, bytes) {
                    return false;
                }
            }
            true
        }
        _ => false,
    }
}

/// Python f-strings are `string` nodes with `interpolation` children.
/// Treat them as non-literal because the interpolated value is
/// dynamic.
fn has_python_string_interpolation(node: tree_sitter::Node) -> bool {
    for i in 0..node.named_child_count() as u32 {
        if let Some(c) = node.named_child(i)
            && c.kind() == "interpolation"
        {
            return true;
        }
    }
    false
}

/// Ruby Layer C5: returns `true` when a `Marshal.load` / `YAML.load` /
/// `Psych.load` call sits directly inside a Minitest assertion or RSpec
/// matcher chain whose other operand is a literal expected.  Same
/// non-actionability rationale as the Python and PHP recognisers
/// above: round-trip tests bound the deser result to a literal, a
/// poisoned blob would fail the assertion, no object-injection side
/// effect escapes the test boundary.
///
/// Conservative recognition:
/// - Minitest: `assert_equal LIT, deser`, `assert_nil deser`,
///   `assert deser` (truthy), and the `refute_*` mirrors.
/// - RSpec: `expect(deser).to eq(LIT)`, `expect(deser).to be_nil`,
///   `expect(deser).to be_a(TYPE)`, `be_truthy`, `not_to`/`to_not`.
/// - Old-style `.should ==` chains are NOT recognised (they're
///   discouraged in modern RSpec and the AST shape parses as a
///   `binary` rather than the receiver-method-arguments shape).
fn is_ruby_deser_inside_test_assertion(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let enclosing_call = find_enclosing_call(cap_node);
    let Some(deser_call) = enclosing_call else {
        return false;
    };
    if !is_ruby_deser_call(deser_call, bytes) {
        return false;
    }
    let Some(arg_list) = deser_call.parent() else {
        return false;
    };
    if arg_list.kind() != "argument_list" {
        return false;
    }
    let Some(outer_call) = arg_list.parent() else {
        return false;
    };
    if outer_call.kind() != "call" {
        return false;
    }
    if outer_call.child_by_field_name("receiver").is_some() {
        return false;
    }
    let Some(method_node) = outer_call.child_by_field_name("method") else {
        return false;
    };
    let Ok(name) = std::str::from_utf8(&bytes[method_node.byte_range()]) else {
        return false;
    };

    if is_ruby_minitest_single_arg_bounding_verb(name)
        || is_ruby_minitest_multi_arg_bounding_verb(name)
        || matches!(
            name,
            "assert_kind_of" | "assert_instance_of" | "refute_kind_of" | "refute_instance_of"
        )
    {
        return ruby_minitest_assertion_bounds_deser(outer_call, deser_call, bytes);
    }

    if name == "expect" {
        let Some(rspec_outer) = outer_call.parent() else {
            return false;
        };
        if rspec_outer.kind() != "call" {
            return false;
        }
        let Some(receiver) = rspec_outer.child_by_field_name("receiver") else {
            return false;
        };
        if receiver.id() != outer_call.id() {
            return false;
        }
        let Some(rspec_method) = rspec_outer.child_by_field_name("method") else {
            return false;
        };
        let Ok(verb) = std::str::from_utf8(&bytes[rspec_method.byte_range()]) else {
            return false;
        };
        if !matches!(verb, "to" | "not_to" | "to_not") {
            return false;
        }
        let Some(matcher_args) = rspec_outer.child_by_field_name("arguments") else {
            return false;
        };
        return ruby_rspec_matcher_bounds_deser(matcher_args, bytes);
    }

    false
}

/// `Marshal.load` / `YAML.load` / `YAML.unsafe_load` / `Psych.load` /
/// `Psych.unsafe_load` shape recogniser.  Only the canonical `Module.method`
/// chain — bare-leaf `load(b)` is ambiguous in Ruby and not flagged as a
/// pattern hit, so no need to handle it here.
fn is_ruby_deser_call(call_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let Some(receiver) = call_node.child_by_field_name("receiver") else {
        return false;
    };
    let Some(method) = call_node.child_by_field_name("method") else {
        return false;
    };
    if receiver.kind() != "constant" {
        return false;
    }
    let Ok(recv_text) = std::str::from_utf8(&bytes[receiver.byte_range()]) else {
        return false;
    };
    let Ok(method_text) = std::str::from_utf8(&bytes[method.byte_range()]) else {
        return false;
    };
    matches!(
        (recv_text, method_text),
        ("Marshal", "load")
            | ("Marshal", "restore")
            | ("YAML", "load")
            | ("YAML", "unsafe_load")
            | ("YAML", "load_file")
            | ("Psych", "load")
            | ("Psych", "unsafe_load")
            | ("Psych", "load_file")
    )
}

fn ruby_minitest_assertion_bounds_deser(
    call: tree_sitter::Node,
    deser_call: tree_sitter::Node,
    bytes: &[u8],
) -> bool {
    let Some(method) = call.child_by_field_name("method") else {
        return false;
    };
    let Ok(name) = std::str::from_utf8(&bytes[method.byte_range()]) else {
        return false;
    };
    let Some(arg_list) = call.child_by_field_name("arguments") else {
        return false;
    };
    let mut pos_args: Vec<tree_sitter::Node> = Vec::new();
    let mut deser_pos: Option<usize> = None;
    for i in 0..arg_list.named_child_count() as u32 {
        let Some(c) = arg_list.named_child(i) else {
            continue;
        };
        // Minitest verbs accept a trailing message argument as last
        // positional; both that and the value positions are checked
        // through the literal tester so kwargs and hash splats are
        // the only kinds that need to be stripped here.
        if matches!(c.kind(), "pair" | "hash_splat_argument") {
            continue;
        }
        if c.id() == deser_call.id() {
            deser_pos = Some(pos_args.len());
        }
        pos_args.push(c);
    }
    let Some(deser_pos) = deser_pos else {
        return false;
    };
    if pos_args.is_empty() {
        return false;
    }

    if pos_args.len() == 1 {
        return is_ruby_minitest_single_arg_bounding_verb(name);
    }

    if matches!(
        name,
        "assert_kind_of" | "assert_instance_of" | "refute_kind_of" | "refute_instance_of"
    ) {
        let type_pos = if deser_pos == 0 { 1 } else { 0 };
        if let Some(type_arg) = pos_args.get(type_pos)
            && is_ruby_type_reference(*type_arg)
        {
            return true;
        }
    }

    if !is_ruby_minitest_multi_arg_bounding_verb(name) {
        return false;
    }
    for (i, arg) in pos_args.iter().enumerate() {
        if i == deser_pos {
            continue;
        }
        if is_ruby_assertion_literal_expected(*arg, bytes) {
            return true;
        }
    }
    false
}

fn ruby_rspec_matcher_bounds_deser(args_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let Some(matcher) = args_node.named_child(0) else {
        return false;
    };
    match matcher.kind() {
        "identifier" => {
            // Bare-name matchers: be_nil, be_truthy, be_falsey, etc.
            let Ok(name) = std::str::from_utf8(&bytes[matcher.byte_range()]) else {
                return false;
            };
            is_ruby_rspec_bare_matcher(name)
        }
        "call" => {
            let Some(method) = matcher.child_by_field_name("method") else {
                return false;
            };
            let Ok(name) = std::str::from_utf8(&bytes[method.byte_range()]) else {
                return false;
            };
            let Some(matcher_args) = matcher.child_by_field_name("arguments") else {
                return false;
            };
            match name {
                "eq" | "eql" | "equal" | "match_array" | "contain_exactly" => {
                    let mut any = false;
                    for i in 0..matcher_args.named_child_count() as u32 {
                        let Some(c) = matcher_args.named_child(i) else {
                            return false;
                        };
                        if !is_ruby_assertion_literal_expected(c, bytes) {
                            return false;
                        }
                        any = true;
                    }
                    any
                }
                "be_a" | "be_an" | "be_kind_of" | "be_instance_of" | "be_a_kind_of" => {
                    let Some(c) = matcher_args.named_child(0) else {
                        return false;
                    };
                    is_ruby_type_reference(c)
                }
                "be" => {
                    // `be(LITERAL)` — `be == LIT` shape isn't representable here,
                    // accept a single literal arg.
                    let Some(c) = matcher_args.named_child(0) else {
                        return false;
                    };
                    is_ruby_assertion_literal_expected(c, bytes)
                }
                _ => false,
            }
        }
        _ => false,
    }
}

fn is_ruby_minitest_single_arg_bounding_verb(name: &str) -> bool {
    matches!(
        name,
        "assert" | "assert_nil" | "refute" | "refute_nil" | "assert_empty" | "refute_empty"
    )
}

fn is_ruby_minitest_multi_arg_bounding_verb(name: &str) -> bool {
    matches!(
        name,
        "assert_equal"
            | "assert_not_equal"
            | "refute_equal"
            | "assert_in_delta"
            | "assert_in_epsilon"
            | "assert_includes"
            | "refute_includes"
            | "assert_match"
            | "refute_match"
            | "assert_operator"
            | "refute_operator"
            | "assert_predicate"
            | "refute_predicate"
            | "assert_same"
            | "refute_same"
    )
}

fn is_ruby_rspec_bare_matcher(name: &str) -> bool {
    matches!(
        name,
        "be_nil"
            | "be_truthy"
            | "be_falsey"
            | "be_falsy"
            | "be_empty"
            | "be_present"
            | "be_zero"
            | "be_positive"
            | "be_negative"
    )
}

fn is_ruby_type_reference(node: tree_sitter::Node) -> bool {
    matches!(node.kind(), "constant" | "scope_resolution" | "identifier")
}

/// Recursive Ruby literal classifier.  Strings count when they have no
/// `interpolation` children (`"hello"` literal yes, `"#{x}"` no).
/// Symbols, numbers, booleans, `nil`, arrays / hashes (recursive),
/// negative numeric unary, and ranges with literal endpoints all
/// qualify.
fn is_ruby_assertion_literal_expected(node: tree_sitter::Node, bytes: &[u8]) -> bool {
    match node.kind() {
        "string" => !has_ruby_string_interpolation(node),
        "string_array" | "symbol_array" => true,
        "integer" | "float" | "true" | "false" | "nil" | "simple_symbol" | "hash_key_symbol"
        | "rational" | "complex" | "regex" => true,
        "unary" => node
            .named_child(0)
            .is_some_and(|c| is_ruby_assertion_literal_expected(c, bytes)),
        "array" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(c) = node.named_child(i) else {
                    return false;
                };
                if !is_ruby_assertion_literal_expected(c, bytes) {
                    return false;
                }
            }
            true
        }
        "hash" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(pair) = node.named_child(i) else {
                    return false;
                };
                if pair.kind() != "pair" {
                    return false;
                }
                let Some(key) = pair.child_by_field_name("key") else {
                    return false;
                };
                let Some(value) = pair.child_by_field_name("value") else {
                    return false;
                };
                if !is_ruby_assertion_literal_expected(key, bytes) {
                    return false;
                }
                if !is_ruby_assertion_literal_expected(value, bytes) {
                    return false;
                }
            }
            true
        }
        "range" => {
            for i in 0..node.named_child_count() as u32 {
                let Some(c) = node.named_child(i) else {
                    return false;
                };
                if !is_ruby_assertion_literal_expected(c, bytes) {
                    return false;
                }
            }
            true
        }
        _ => false,
    }
}

fn has_ruby_string_interpolation(node: tree_sitter::Node) -> bool {
    for i in 0..node.named_child_count() as u32 {
        if let Some(c) = node.named_child(i)
            && c.kind() == "interpolation"
        {
            return true;
        }
    }
    false
}

/// C/C++-only Layer D: structural suppression of buffer-overflow pattern
/// rules when the source / format-string argument is a literal whose
/// contributed length is statically bounded.
///
/// **Policy (vulnerability detection, not style):** Nyx flags
/// `c.memory.strcpy` / `c.memory.strcat` / `c.memory.sprintf` (and the
/// `cpp.memory.*` mirrors) when the source argument can carry
/// attacker-controlled length.  Calls whose source is a string literal
/// have a compile-time bound and cannot overflow due to attacker input
///, a too-small destination is a fixed developer bug (caught by
/// compiler warnings / `-fstack-protector` / clang-tidy / ASan), not an
/// exploitable channel.  Suppressing these literal-source calls is a
/// deliberate noise / false-positive reduction aligned with Nyx's scope
/// (vulnerability detection over style enforcement).
///
/// **Test coverage convention:**
/// - Negative cases (suppression correct) live alongside other state /
///   lifecycle fixtures and are recorded as soft expectations
///   (`must_match: false`) in `*.expect.json`.  The notes there
///   reference this function so future authors can trace why the AST
///   pattern doesn't fire.  Examples:
///     - `tests/fixtures/real_world/c/state/malloc_lifecycle.expect.json`
///     - `tests/fixtures/real_world/cpp/state/new_delete.expect.json`
///     - `tests/fixtures/real_world/cpp/state/malloc_branches.expect.json`
/// - Positive cases (suppression must NOT fire, source is a parameter
///   or other attacker-reachable value) live as hard expectations
///   (`must_match: true`) in the taint fixtures:
///     - `tests/fixtures/real_world/c/taint/buffer_overflow.c`
///     - `tests/fixtures/real_world/cpp/taint/gets_strcpy.cpp`
///
/// Removing this function or weakening its predicate would be caught by
/// neither, it would be caught by the unit tests below.
///
/// Pattern rules `c.memory.strcpy` / `c.memory.strcat` / `c.memory.sprintf`
/// (and the `cpp.memory.*` mirrors) flag the call syntactically; their
/// stated danger is "no bounds checking on destination buffer" / "no length
/// limit on output buffer".  That danger is realised only when the source
/// argument can carry attacker-controlled length.  When the source is a
/// string literal the bound is fixed at compile time, so the call cannot
/// overflow due to attacker input (a too-small destination is a fixed
/// developer bug, not an exploitable channel).
///
/// Shapes recognised:
///   - `strcpy(dst, "literal")`            → suppress
///   - `strcpy(dst, COND ? "a" : "b")`     → suppress (ternary of two
///     string-literal branches; the postgres `formatting.c` shape)
///   - `strcat(dst, "literal")`            → same
///   - `sprintf(dst, "format")` where the format string is a literal
///     containing no bare `%s` (only width/precision-bounded specifiers
///     like `%d`, `%lld`, `%c`, `%.*s`, `%.5s`)
///     → suppress
///
/// Conservative refusals:
///   - source / format is an identifier (could be tainted, e.g.
///     `sprintf(buf, fmt, …)`) → keep firing
///   - format is `concatenated_string` containing identifier macros (e.g.
///     `"%" PRId64`), we cannot statically expand the macro, so refuse
///   - bare `%s` in format → keep firing (could read unbounded length)
fn is_c_buffer_call_literal_safe(rule_id: &str, cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let kind = match rule_id {
        "c.memory.strcpy" | "cpp.memory.strcpy" => CBufferRule::StrcpyOrCat,
        "c.memory.strcat" | "cpp.memory.strcat" => CBufferRule::StrcpyOrCat,
        "c.memory.sprintf" | "cpp.memory.sprintf" => CBufferRule::Sprintf,
        _ => return false,
    };
    let call = find_enclosing_call(cap_node);
    let Some(call) = call else { return false };
    let arg_list = find_arg_list(call);
    let Some(arg_list) = arg_list else {
        return false;
    };
    let mut args = Vec::new();
    for i in 0..arg_list.named_child_count() as u32 {
        if let Some(c) = arg_list.named_child(i) {
            args.push(c);
        }
    }
    if args.len() < 2 {
        return false;
    }
    let src = args[1];
    match kind {
        CBufferRule::StrcpyOrCat => is_c_string_literal_or_lit_ternary(src, bytes),
        CBufferRule::Sprintf => {
            // Format must be a single string literal with safe specifiers.
            // Refuse identifiers and concatenated_string (PRI* macros).
            if !matches!(
                src.kind(),
                "string_literal" | "raw_string_literal" | "string"
            ) {
                return false;
            }
            let Some(text) = c_string_literal_payload(src, bytes) else {
                return false;
            };
            sprintf_format_is_safe(&text)
        }
    }
}

#[derive(Copy, Clone)]
enum CBufferRule {
    StrcpyOrCat,
    Sprintf,
}

/// True for: a C/C++ string literal, OR a `conditional_expression` whose
/// consequence + alternative are both either string literals or ALL_CAPS
/// identifiers (the canonical preprocessor-macro naming convention for
/// string-constant `#define`s, `P_M_STR`, `A_M_STR`, `BG_NAME`, etc., used
/// pervasively in postgres' `formatting.c::DCH_a_m`).  Parenthesised forms
/// are unwrapped.
///
/// The ALL_CAPS heuristic recognises identifiers whose every character is
/// in `[A-Z0-9_]` and which contain at least one alphabetic letter.
/// Variables in C/C++ are conventionally lower / camelCase; macros are
/// SHOUTING_SNAKE.  False acceptance of an actual variable is possible but
/// extraordinarily rare in real codebases.
fn is_c_string_literal_or_lit_ternary(node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let n = unwrap_c_paren(node);
    match n.kind() {
        "string_literal" | "raw_string_literal" | "string" => true,
        "conditional_expression" => {
            // tree-sitter-c shape: condition, consequence, alternative as
            // named children.  Accept when BOTH branches are string
            // literals or ALL_CAPS identifiers.
            let mut branches: Vec<tree_sitter::Node> = Vec::new();
            for i in 0..n.named_child_count() as u32 {
                if let Some(c) = n.named_child(i) {
                    branches.push(c);
                }
            }
            if branches.len() < 3 {
                return false;
            }
            // first child is the condition; the next two are the branches.
            let conseq = unwrap_c_paren(branches[1]);
            let alt = unwrap_c_paren(branches[2]);
            is_c_lit_or_macro_branch(conseq, bytes) && is_c_lit_or_macro_branch(alt, bytes)
        }
        _ => false,
    }
}

fn is_c_lit_or_macro_branch(node: tree_sitter::Node, bytes: &[u8]) -> bool {
    match node.kind() {
        "string_literal" | "raw_string_literal" | "string" => true,
        "identifier" => {
            let Ok(name) = std::str::from_utf8(&bytes[node.byte_range()]) else {
                return false;
            };
            is_all_caps_macro_name(name)
        }
        _ => false,
    }
}

fn is_all_caps_macro_name(s: &str) -> bool {
    if s.is_empty() {
        return false;
    }
    let mut has_alpha = false;
    for ch in s.chars() {
        if ch.is_ascii_uppercase() {
            has_alpha = true;
        } else if ch == '_' || ch.is_ascii_digit() {
            // ok
        } else {
            return false;
        }
    }
    has_alpha
}

fn unwrap_c_paren(mut node: tree_sitter::Node) -> tree_sitter::Node {
    for _ in 0..4 {
        if node.kind() == "parenthesized_expression"
            && let Some(inner) = node.named_child(0)
        {
            node = inner;
            continue;
        }
        break;
    }
    node
}

/// Extract the textual payload of a C/C++ string literal node, stripping
/// the surrounding double-quotes and the optional encoding prefix
/// (`L"..."`, `u8"..."`, `R"(...)"`).  Returns `None` if the bytes are not
/// valid UTF-8 or the literal cannot be decoded.
fn c_string_literal_payload(node: tree_sitter::Node, bytes: &[u8]) -> Option<String> {
    // Prefer a `string_content` child if tree-sitter exposes one.
    for i in 0..node.named_child_count() as u32 {
        if let Some(c) = node.named_child(i)
            && c.kind() == "string_content"
            && let Ok(s) = std::str::from_utf8(&bytes[c.byte_range()])
        {
            return Some(s.to_string());
        }
    }
    // Fall back: strip the surrounding quotes from the full literal text.
    let raw = std::str::from_utf8(&bytes[node.byte_range()]).ok()?;
    let trimmed = raw.trim();
    // Drop optional encoding prefix.
    let after_prefix = trimmed
        .trim_start_matches('L')
        .trim_start_matches("u8")
        .trim_start_matches('u')
        .trim_start_matches('U');
    let s = after_prefix
        .strip_prefix('"')
        .and_then(|s| s.strip_suffix('"'));
    s.map(|s| s.to_string())
}

/// Returns `true` when a `printf`-family format string can never overflow a
/// destination buffer due to attacker-controlled length.  Walks every `%`
/// specifier in the format and refuses if any bare `%s` is present.
/// Width-bounded `%5s` is unbounded (width is a *minimum*), but
/// precision-bounded `%.5s` / `%.*s` is safe (precision caps the maximum).
pub(crate) fn sprintf_format_is_safe(fmt: &str) -> bool {
    let bytes = fmt.as_bytes();
    let mut i = 0;
    while i < bytes.len() {
        if bytes[i] != b'%' {
            i += 1;
            continue;
        }
        i += 1;
        if i >= bytes.len() {
            // trailing `%`, malformed, refuse to suppress
            return false;
        }
        if bytes[i] == b'%' {
            i += 1;
            continue;
        }
        // Skip flags
        while i < bytes.len() && matches!(bytes[i], b'-' | b'+' | b'#' | b' ' | b'0' | b'\'') {
            i += 1;
        }
        // Skip width (digits or `*`)
        if i < bytes.len() && bytes[i] == b'*' {
            i += 1;
        } else {
            while i < bytes.len() && bytes[i].is_ascii_digit() {
                i += 1;
            }
        }
        // Optional precision
        let mut has_precision = false;
        if i < bytes.len() && bytes[i] == b'.' {
            has_precision = true;
            i += 1;
            if i < bytes.len() && bytes[i] == b'*' {
                i += 1;
            } else {
                while i < bytes.len() && bytes[i].is_ascii_digit() {
                    i += 1;
                }
            }
        }
        // Length modifiers: h hh l ll L q z j t
        while i < bytes.len() && matches!(bytes[i], b'h' | b'l' | b'L' | b'q' | b'z' | b'j' | b't')
        {
            i += 1;
        }
        if i >= bytes.len() {
            return false;
        }
        let conv = bytes[i];
        i += 1;
        match conv {
            // Numeric / char / pointer specifiers, bounded output for any input
            b'd' | b'i' | b'u' | b'o' | b'x' | b'X' | b'c' | b'e' | b'E' | b'f' | b'F' | b'g'
            | b'G' | b'a' | b'A' | b'p' | b'n' => continue,
            // String specifier: only safe when precision-bounded
            b's' => {
                if !has_precision {
                    return false;
                }
            }
            // Unknown conversion (e.g. `%S` wide-char on Windows is
            // unbounded) → conservative refuse.
            _ => return false,
        }
    }
    true
}

fn is_string_literal_with_text(node: tree_sitter::Node, text: &str, bytes: &[u8]) -> bool {
    if node.kind() != "string" && node.kind() != "encapsed_string" {
        return false;
    }
    // Look for a single string_content / string_value child.
    let mut payload = None;
    for i in 0..node.named_child_count() as u32 {
        if let Some(c) = node.named_child(i)
            && (c.kind() == "string_content" || c.kind() == "string_value")
        {
            payload = Some(c);
            break;
        }
    }
    let Some(payload) = payload else {
        // Fall back: PHP single-quoted strings sometimes inline the content.
        if let Ok(s) = std::str::from_utf8(&bytes[node.byte_range()]) {
            let trimmed = s.trim_matches(|c| c == '\'' || c == '"');
            return trimmed == text;
        }
        return false;
    };
    if let Ok(s) = std::str::from_utf8(&bytes[payload.byte_range()]) {
        return s == text;
    }
    false
}

/// C++-only Layer E: structural suppression of `cpp.memory.reinterpret_cast`
/// when the cast's target type is explicitly defined as safe by the C++
/// aliasing rules.
///
/// `reinterpret_cast<T>(x)` is *not* always undefined behaviour — the C++
/// standard ([basic.lval]/11) explicitly permits accessing any object
/// representation through a pointer to `char`, `unsigned char`, or
/// `std::byte` (and, by long-standing convention, `int8_t` / `uint8_t`).
/// `void*` is similarly safe because reads / writes are illegal through it
/// (the program must always cast back before dereferencing).  The integer
/// round-trip `uintptr_t` / `intptr_t` is guaranteed lossless by the
/// standard.  POSIX additionally type-puns the `sockaddr` family — the
/// BSD-socket API takes `struct sockaddr *` and the program must cast from
/// `sockaddr_in*` / `sockaddr_in6*` / `sockaddr_un*` / `sockaddr_storage*`,
/// which is the API's intended use.
///
/// The pattern rule `cpp.memory.reinterpret_cast` cannot distinguish these
/// well-defined casts from genuinely dangerous strict-aliasing UB casts
/// (`reinterpret_cast<MyStruct*>(buf)`), so it over-fires by ~70% on
/// real-repo serialization, hashing, IPC, and socket-API code where the
/// cast is the canonical (and standard-blessed) idiom.  Suppressing the
/// well-defined target-type set is a layer-2 structural fix (per the
/// bughunt depth hierarchy): the engine recognises the property
/// (well-defined target type) that makes the cast safe in C++ and
/// suppresses based on it.  Genuine strict-aliasing risk casts (target is
/// a user struct / class type) keep firing.
///
/// Shapes recognised (any pointer depth `>= 1` unless noted):
///   - `char*`, `signed char*`, `unsigned char*`, `wchar_t*`
///   - `uint8_t*`, `int8_t*`, `std::byte*`, `byte*`
///   - `void*`
///   - `uintptr_t`, `std::uintptr_t`, `intptr_t`, `std::intptr_t` (no
///     pointer depth required — the standard guarantees the lossless
///     round-trip even for the integer form)
///   - `sockaddr*`, `struct sockaddr*`, `sockaddr_in*`, `sockaddr_in6*`,
///     `sockaddr_un*`, `sockaddr_storage*` (any of the BSD-socket
///     address-structure family)
///
/// Conservative refusals (kept firing): user-defined struct / class
/// pointer targets, template type parameters (`T*`), and any target the
/// normaliser cannot identify.
fn is_cpp_cast_target_type_safe(rule_id: &str, cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    if rule_id != "cpp.memory.reinterpret_cast" {
        return false;
    }
    // `cap_node` is the `(identifier) @n` "reinterpret_cast" capture (the
    // pattern's index-0 capture, by query-string order — see Layer A's
    // `c.index == 0` selection in `run_ast_queries`).  Walk up via
    // `find_enclosing_call` to reach the outer `call_expression`.  Its
    // `function` field is a `template_function` whose `arguments` field is
    // the `template_argument_list` carrying the target type.
    let call = find_enclosing_call(cap_node);
    let Some(call) = call else { return false };
    let func = call.child_by_field_name("function");
    let Some(func) = func else { return false };
    if func.kind() != "template_function" {
        return false;
    }
    let targs = func.child_by_field_name("arguments");
    let Some(targs) = targs else { return false };
    if targs.kind() != "template_argument_list" {
        return false;
    }
    let Ok(text) = std::str::from_utf8(&bytes[targs.byte_range()]) else {
        return false;
    };
    let inner = text
        .trim()
        .trim_start_matches('<')
        .trim_end_matches('>')
        .trim();
    cpp_cast_target_type_is_safe(inner)
}

/// Normalise a C++ cast target type string and report whether it names a
/// well-defined-by-aliasing-rules type per the policy in
/// [`is_cpp_cast_target_type_safe`].  Public to the module so the unit
/// tests can pin the canonical and adversarial shapes.
pub(crate) fn cpp_cast_target_type_is_safe(s: &str) -> bool {
    // Collapse all internal whitespace (tabs, newlines, multiple spaces)
    // to single spaces so the normalised form is `const char *` with one
    // space between every token.
    let normalised: String = {
        let mut out = String::with_capacity(s.len());
        let mut prev_ws = true;
        for ch in s.chars() {
            if ch.is_whitespace() {
                if !prev_ws {
                    out.push(' ');
                    prev_ws = true;
                }
            } else {
                out.push(ch);
                prev_ws = false;
            }
        }
        out.trim().to_string()
    };
    let Some(base) = strip_pointer_and_cv(&normalised) else {
        return false;
    };
    // Pointer-indirection depth = count of `*` tokens in the normalised
    // form (whitespace already collapsed; compound forms with parens /
    // brackets / templates are filtered by `strip_pointer_and_cv`).
    let depth = normalised.chars().filter(|c| *c == '*').count();

    // Depth 0 (value cast): only the pointer<->integer round-trip types
    // are well-defined.  Aliasing *through* a `uintptr_t*` / `intptr_t*`
    // is **not** covered by the standard exemption — only converting a
    // pointer value to/from the integer type is defined behaviour
    // ([basic.compound]/3).  Therefore we accept these names only at
    // depth 0.
    if depth == 0 {
        return matches!(
            base.as_str(),
            "uintptr_t" | "intptr_t" | "std::uintptr_t" | "std::intptr_t"
        );
    }

    // Depth >= 2 (pointer-to-pointer and beyond) is never safe: the
    // [basic.lval]/11 aliasing exemption is for accessing an object's
    // representation as bytes through a single pointer indirection.
    // Reading a `char*` object through a `char**` is a strict-aliasing
    // violation, and the same logic applies to `void**`, `uint8_t**`,
    // etc.
    if depth != 1 {
        return false;
    }

    // Depth 1: standard aliasing exemption for byte-view access plus
    // POSIX socket type-punning and the opaque `void*` target.
    matches!(
        base.as_str(),
        "char"
            | "signed char"
            | "unsigned char"
            | "wchar_t"
            | "uint8_t"
            | "int8_t"
            | "std::byte"
            | "byte"
            | "void"
            | "sockaddr"
            | "struct sockaddr"
            | "sockaddr_in"
            | "sockaddr_in6"
            | "sockaddr_un"
            | "sockaddr_storage"
            | "struct sockaddr_in"
            | "struct sockaddr_in6"
            | "struct sockaddr_un"
            | "struct sockaddr_storage"
    )
}

/// Strip a single C++ cast target's leading/trailing `const`/`volatile`
/// qualifiers and trailing `*` characters (any depth).  Returns the bare
/// base type identifier on success.  Returns `None` if anything left over
/// after pointer/cv stripping is not a plain identifier or scoped name
/// (e.g. function-pointer `void(*)(int)` or template `vector<int>`).
fn strip_pointer_and_cv(s: &str) -> Option<String> {
    let mut t: &str = s.trim();
    // Strip leading `const` / `volatile`, possibly multiple.
    loop {
        let after = t
            .strip_prefix("const ")
            .or_else(|| t.strip_prefix("volatile "));
        match after {
            Some(rest) => t = rest.trim_start(),
            None => break,
        }
    }
    // Repeatedly strip trailing `*` and trailing cv-qualifiers in either
    // order — `T*`, `T* const`, `T*const`, `T const*`, `T**`, `const T*`
    // are all reachable.  The loop terminates when neither suffix
    // matches.
    loop {
        let mut progressed = false;
        // Strip trailing const/volatile that appears AFTER any `*` or
        // before the first `*` (e.g. `T const`).  Forms: ` const`, ` volatile`.
        loop {
            let after = t
                .trim_end()
                .strip_suffix(" const")
                .or_else(|| t.trim_end().strip_suffix(" volatile"));
            match after {
                Some(rest) => {
                    t = rest;
                    progressed = true;
                }
                None => break,
            }
        }
        // Strip trailing `*`s.
        let trimmed = t.trim_end();
        if let Some(stripped) = trimmed.strip_suffix('*') {
            t = stripped;
            progressed = true;
        }
        if !progressed {
            break;
        }
    }
    let base = t.trim();
    if base.is_empty() {
        return None;
    }
    // Refuse anything that contains characters typical of compound
    // type forms we don't want to reason about: parens (function
    // pointer), angle brackets (template instantiation), brackets
    // (array), commas (multiple arguments).  Accept identifier
    // characters, `_`, `:` (for `std::byte`), spaces (for `unsigned
    // char` / `struct sockaddr`).
    for ch in base.chars() {
        if !(ch.is_ascii_alphanumeric() || ch == '_' || ch == ':' || ch == ' ') {
            return None;
        }
    }
    Some(base.to_string())
}

/// PHP-only Layer F: structural suppression of `php.crypto.md5` /
/// `php.crypto.sha1` when the call's *consuming context* yields a name
/// that matches a recognised non-cryptographic identifier pattern.
///
/// The pattern rule fires syntactically on every `md5(...)` /
/// `sha1(...)` callsite regardless of how the result is used.  In real
/// PHP code these functions are pervasively used for non-cryptographic
/// purposes — ETag generation (HTTP cache validators), array/cache-key
/// hashing, dedup fingerprints, content addressing for templates — and
/// those uses do not realise the "weak hash function" risk the rule
/// names.  Suppress only when the consuming context yields a name from
/// a recognised non-crypto suffix set, while keeping every callsite
/// whose name contains a crypto-keyword substring (`password`,
/// `secret`, `token`, `signature`, `hmac`, `digest`, `salt`, …).
///
/// Consuming contexts inspected (walk up through transparent wrappers
/// — `binary_expression` for concat / equality, `parenthesized_expression`,
/// `conditional_expression`, `argument`):
///   - `assignment_expression` (covers `=`, `??=`, `+=`, …) — resolve
///     the LHS to a final identifier (variable name, member-access
///     property name, or string-literal subscript index).
///   - `array_element_initializer` — the key is a string literal whose
///     contents are the consuming name.
///   - `subscript_expression` where the call sits in the index position
///     — using a hash as an array index is intrinsically non-crypto.
///   - `return_statement` — resolve the enclosing
///     `function_definition` / `method_declaration` name (with the
///     conventional `get` prefix stripped).
///
/// All other consuming forms (bare expression statements, comparison
/// operands without an LHS, lambda returns, arguments to user-defined
/// helpers) keep firing.
fn is_php_weak_hash_non_crypto_use(cap_node: tree_sitter::Node, bytes: &[u8]) -> bool {
    let call = if cap_node.kind() == "function_call_expression" {
        cap_node
    } else {
        let mut cur = cap_node;
        let mut found = None;
        for _ in 0..4 {
            if cur.kind() == "function_call_expression" {
                found = Some(cur);
                break;
            }
            match cur.parent() {
                Some(p) => cur = p,
                None => break,
            }
        }
        match found {
            Some(c) => c,
            None => return false,
        }
    };

    let mut cur = call;
    let mut steps = 0u32;
    while let Some(parent) = cur.parent() {
        if steps > 16 {
            return false;
        }
        steps += 1;
        match parent.kind() {
            // Transparent wrappers — keep walking to find the
            // consumer.  These node kinds preserve the value flowing
            // out of the md5/sha1 call without transforming its
            // semantics, so we let the OUTER context (LHS name,
            // array key, return method, etc.) classify the use.
            //
            // - `binary_expression`: concat (`'foo_' . md5($x)`),
            //   equality (`md5($x) === $stored`), arithmetic.
            // - `parenthesized_expression`: redundant parens.
            // - `conditional_expression`: `$cond ? md5($x) : ''`.
            // - `argument` / `arguments`: positional / wrapped arg
            //   lists — the enclosing call (`substr(md5($x), 0, 8)`,
            //   `$q->createNamedParameter(md5($x))`) is what matters.
            // - `function_call_expression`: identity-shaped wrappers
            //   such as `substr(...)`, `strtolower(...)`,
            //   `urlencode(...)` which propagate the hash to its
            //   real consumer.
            // - `encapsed_string`: `"prefix-{md5($x)}"` interpolation.
            //
            // `member_call_expression` / `nullsafe_member_call_expression`
            // are NOT in this transparent set — they have their own
            // arm below that performs lookup-verb classification on
            // the method name (`->get(md5($k))`, `->set(...)`, …)
            // before optionally falling through to the outer
            // consumer.
            "binary_expression"
            | "parenthesized_expression"
            | "conditional_expression"
            | "argument"
            | "arguments"
            | "function_call_expression"
            | "encapsed_string" => {}
            "assignment_expression" | "augmented_assignment_expression" => {
                let lhs = parent
                    .child_by_field_name("left")
                    .or_else(|| parent.named_child(0));
                let Some(lhs) = lhs else {
                    return false;
                };
                return resolve_php_lvalue_name(lhs, bytes)
                    .map(|n| name_is_non_crypto(&n))
                    .unwrap_or(false);
            }
            "array_element_initializer" => {
                if parent.named_child_count() < 2 {
                    return false;
                }
                let key = parent.named_child(0);
                let Some(key) = key else {
                    return false;
                };
                let Some(key_text) = string_literal_text(key, bytes) else {
                    return false;
                };
                return name_is_non_crypto(&key_text);
            }
            "subscript_expression" => {
                // tree-sitter-php: subscript_expression has the receiver as
                // the first named child and the index as the second.  If our
                // call sits past the receiver's end byte, we are the index.
                let r0 = parent.named_child(0);
                let Some(r0) = r0 else {
                    cur = parent;
                    continue;
                };
                if call.start_byte() >= r0.end_byte() {
                    return true;
                }
                // Otherwise we're inside the receiver chain; the surrounding
                // `assignment_expression` (if any) will resolve the LHS name.
            }
            "member_call_expression" | "nullsafe_member_call_expression" => {
                // The md5/sha1 result is being passed as an argument to a
                // method call.  When the method name is a recognised
                // key/cache/lookup verb (`get`, `set`, `has`, `delete`,
                // `fetch`, `store`, `find`, `getItem`, `setItem`, …), the
                // result is being used as a non-cryptographic lookup key —
                // canonical for cache backends, hash maps, and storage
                // adapters where the developer is hashing arbitrary input
                // to a fixed-length, character-safe key.  Genuine
                // crypto-comparison wrappers (`hash_equals`, `verify`,
                // `password_verify`) keep firing because their method
                // name does not match the verb set.
                let name_node = parent.child_by_field_name("name").or_else(|| {
                    // Fallback: last named child is the method name.
                    let count = parent.named_child_count();
                    if count == 0 {
                        None
                    } else {
                        parent.named_child(count as u32 - 1)
                    }
                });
                if let Some(nn) = name_node
                    && nn.kind() == "name"
                    && let Ok(method) = std::str::from_utf8(&bytes[nn.byte_range()])
                    && method_is_lookup_verb(method)
                {
                    return true;
                }
                // Otherwise treat as transparent so the OUTER consumer can
                // classify (`$x = $cache->get(sha1($k))` resolves LHS `x`).
            }
            "return_statement" => {
                let mut p = parent;
                for _ in 0..10 {
                    let Some(pp) = p.parent() else {
                        return false;
                    };
                    p = pp;
                    let kind = p.kind();
                    if kind == "method_declaration" || kind == "function_definition" {
                        let Some(nn) = p
                            .child_by_field_name("name")
                            .or_else(|| find_named_child_of_kind(p, "name"))
                        else {
                            return false;
                        };
                        let Ok(name) = std::str::from_utf8(&bytes[nn.byte_range()]) else {
                            return false;
                        };
                        return method_name_is_non_crypto(name);
                    }
                    if kind == "anonymous_function"
                        || kind == "arrow_function"
                        || kind == "anonymous_function_creation_expression"
                    {
                        return false;
                    }
                }
                return false;
            }
            // Halt at scope / statement boundaries we cannot resolve through.
            "expression_statement"
            | "compound_statement"
            | "method_declaration"
            | "function_definition"
            | "anonymous_function"
            | "anonymous_function_creation_expression"
            | "arrow_function"
            | "program" => return false,
            _ => return false,
        }
        cur = parent;
    }
    false
}

/// Resolve the final identifier of a PHP l-value expression to a string
/// suitable for [`name_is_non_crypto`] classification.
///
/// Handles:
///   - `$variable` (`variable_name` → inner name child)
///   - `$obj->property` (`member_access_expression` → name field)
///   - `$arr['literal_key']` (`subscript_expression` → string-literal index)
///   - `Class::$static` / `self::$prop` (`scoped_property_access_expression`)
///
/// Returns `None` for unrecognised l-value shapes (dynamic property
/// access, computed indices, function-call l-values, etc.); the caller
/// then falls back to keeping the finding.
fn resolve_php_lvalue_name(lhs: tree_sitter::Node, bytes: &[u8]) -> Option<String> {
    let lhs = unwrap_php_paren(lhs);
    match lhs.kind() {
        "variable_name" => {
            let name_node = lhs.named_child(0)?;
            std::str::from_utf8(&bytes[name_node.byte_range()])
                .ok()
                .map(String::from)
        }
        "member_access_expression" => {
            let n = lhs.child_by_field_name("name").or_else(|| {
                let count = lhs.named_child_count();
                if count == 0 {
                    None
                } else {
                    lhs.named_child(count as u32 - 1)
                }
            })?;
            // Property access can name a `name` (bare ident) or a
            // `variable_name` (dynamic ${$x} — which we don't resolve).
            if n.kind() == "name" {
                std::str::from_utf8(&bytes[n.byte_range()])
                    .ok()
                    .map(String::from)
            } else {
                None
            }
        }
        "subscript_expression" => {
            if lhs.named_child_count() >= 2 {
                let idx = lhs.named_child(1)?;
                if let Some(txt) = string_literal_text(idx, bytes) {
                    return Some(txt);
                }
            }
            // Dynamic / non-literal index: recurse into the receiver
            // so `$columnNamesHashes[$col]` resolves to
            // `columnNamesHashes`.  This handles canonical
            // `$lookup_by_hash[$key] = md5($key)` shapes.
            let r = lhs.named_child(0)?;
            resolve_php_lvalue_name(r, bytes)
        }
        "scoped_property_access_expression" => {
            let count = lhs.named_child_count();
            if count == 0 {
                return None;
            }
            let prop = lhs.named_child(count as u32 - 1)?;
            // The static property is a `variable_name`.  Reuse this
            // function recursively to extract the bare name.
            resolve_php_lvalue_name(prop, bytes)
        }
        _ => None,
    }
}

/// Return the textual contents of a PHP string literal node (`string`
/// or `encapsed_string`), stripping surrounding quotes.  Returns `None`
/// for any non-string node and for interpolated `encapsed_string`s
/// containing template variables.
fn string_literal_text(node: tree_sitter::Node, bytes: &[u8]) -> Option<String> {
    if node.kind() != "string" && node.kind() != "encapsed_string" {
        return None;
    }
    if has_interpolation(node) {
        return None;
    }
    for i in 0..node.named_child_count() as u32 {
        if let Some(c) = node.named_child(i)
            && (c.kind() == "string_content" || c.kind() == "string_value")
        {
            return std::str::from_utf8(&bytes[c.byte_range()])
                .ok()
                .map(String::from);
        }
    }
    if let Ok(s) = std::str::from_utf8(&bytes[node.byte_range()]) {
        let trimmed = s.trim_matches(|c| c == '\'' || c == '"');
        return Some(trimmed.to_string());
    }
    None
}

fn unwrap_php_paren(mut node: tree_sitter::Node) -> tree_sitter::Node {
    for _ in 0..4 {
        if node.kind() == "parenthesized_expression"
            && let Some(inner) = node.named_child(0)
        {
            node = inner;
            continue;
        }
        break;
    }
    node
}

/// Classify a PHP identifier as non-cryptographic by name.  Two-tier
/// check: any name containing a crypto-keyword substring is hard-rejected
/// (kept as a finding); the remaining names are accepted when their
/// form ends in a recognised non-crypto suffix at a word boundary
/// (underscore, digit, camelCase transition) or via a long-enough
/// stand-alone suffix (≥4 chars).
///
/// The crypto-keyword exclude list uses substring match (not just
/// suffix) so compound names like `hashedPassword` / `tokenHash` /
/// `sigStore` are conservatively kept.  False rejections of safe
/// shapes are acceptable; false acceptances of crypto shapes are not.
pub(crate) fn name_is_non_crypto(name: &str) -> bool {
    if name.is_empty() {
        return false;
    }
    let lower = name.to_ascii_lowercase();
    static CRYPTO_EXCLUDES: &[&str] = &[
        "password",
        "passwd",
        "pw_hash",
        "pwhash",
        "pwdhash",
        "pwd_hash",
        "passhash",
        "pass_hash",
        "secret",
        "token",
        "signature",
        "signed",
        "hmac",
        "digest",
        "verifier",
        "challenge",
        "csrf",
        "salt",
        "nonce_secret",
        "auth_code",
        "authcode",
        "auth_key",
        "authkey",
        "private",
        "credential",
        "creds",
        "encryption",
        "decryption",
        "encryptkey",
        "decryptkey",
        "encrypt_key",
        "decrypt_key",
        "apikey",
        "api_key",
    ];
    for ex in CRYPTO_EXCLUDES {
        if lower.contains(ex) {
            return false;
        }
    }
    // `sig` / `mac` are excluded only at word boundaries — the substrings
    // appear in legitimate non-crypto names (`signal`, `unsigned`,
    // `assignee`, `design`, `magic`).
    if lower == "sig" || lower.ends_with("_sig") || lower.ends_with("sig_") {
        return false;
    }
    if lower == "mac" || lower.ends_with("_mac") {
        return false;
    }
    // Permissive safe-suffix recognition.
    static SAFE_SUFFIXES: &[&str] = &[
        "hash",
        "hashes",
        "etag",
        "etags",
        "md5",
        "sha1",
        "fingerprint",
        "fingerprints",
        "cachekey",
        "cache_key",
        "cacheid",
        "cache_id",
        "id",
        "uid",
        "uuid",
        "guid",
        "name_hash",
        "checksum",
        "slot",
        "bucket",
        "seed",
        "marker",
        "tag",
        "gravatar",
        "hashid",
        "opaque",
        "shortid",
        "short_id",
        "fnv",
        "fingerprintkey",
        "anchor",
        "version",
        "buster",
        "cachebuster",
        "cache_buster",
        "revision",
        "rev",
    ];
    let bytes_orig = name.as_bytes();
    for s in SAFE_SUFFIXES {
        if lower == *s {
            return true;
        }
        if !lower.ends_with(s) {
            continue;
        }
        let prev_pos = lower.len() - s.len();
        if prev_pos == 0 {
            return true;
        }
        // Word boundary: previous byte is ASCII non-letter (underscore,
        // digit, etc.).  Treat non-ASCII (UTF-8 continuation / leading
        // bytes) conservatively as part of an identifier letter — no
        // boundary — to avoid mis-classifying `ëhash`-style names that
        // have no real word break before the suffix.
        let prev_byte = bytes_orig[prev_pos - 1];
        if prev_byte.is_ascii() && !prev_byte.is_ascii_alphabetic() {
            return true;
        }
        // CamelCase boundary: suffix starts with an uppercase letter
        // in the original casing (`storageId`, `tableHash`, `sqlMd5`).
        if bytes_orig[prev_pos].is_ascii_uppercase() {
            return true;
        }
        // Long stand-alone suffix (≥4 chars) — accept without boundary.
        if s.len() >= 4 {
            return true;
        }
    }
    false
}

/// Like [`name_is_non_crypto`] but with a leading `get` prefix stripped
/// to recognise the canonical `getETag` / `getHash` / `getCacheKey`
/// accessor naming convention.  Pass the original-case name through so
/// downstream camelCase-boundary detection still works.
fn method_name_is_non_crypto(name: &str) -> bool {
    let stripped = name
        .strip_prefix("get")
        .or_else(|| name.strip_prefix("Get"))
        .unwrap_or(name);
    if name_is_non_crypto(stripped) {
        return true;
    }
    // Some accessors keep the prefix (e.g., `recoveryKeyId`,
    // `formatPath` returning a hashed-path identifier).  Also try the
    // raw name for camelCase-boundary suffix detection.
    name_is_non_crypto(name)
}

/// Recognise PHP method names that signal a lookup / cache / store /
/// container key-or-value operation.  When `md5(...)` / `sha1(...)` is
/// passed to such a method, the result is being used as a content-
/// addressed key — not for cryptographic strength.  The verb set is
/// purposely narrow so cryptographic comparison helpers
/// (`hash_equals`, `verify`, `password_verify`, `decryptWith`) keep
/// firing.
fn method_is_lookup_verb(method: &str) -> bool {
    let lower = method.to_ascii_lowercase();
    static VERBS: &[&str] = &[
        "get",
        "set",
        "has",
        "delete",
        "remove",
        "fetch",
        "store",
        "put",
        "save",
        "exists",
        "find",
        "lookup",
        "getitem",
        "setitem",
        "hasitem",
        "deleteitem",
        "addtag",
        "addtotag",
        "key",
        "keyfor",
        "containskey",
        "haskey",
        "loadbykey",
        "fetchbykey",
        "getbykey",
        "setbykey",
        "deletebykey",
        "incr",
        "incrby",
        "decr",
        "decrby",
        "expire",
        "ttl",
        "namespacekey",
        "cachekey",
    ];
    if VERBS.contains(&lower.as_str()) {
        return true;
    }
    // Composite forms like `getCacheKey`, `setCacheKey`, `getRoute` —
    // very common in cache adapters, accept any name ending in one of
    // a few non-crypto-typed-result suffixes preceded by a get/set/has
    // verb.
    static SUFFIX_HINTS: &[&str] = &[
        "cachekey",
        "key",
        "id",
        "hash",
        "etag",
        "uid",
        "tag",
        "fingerprint",
    ];
    if let Some(rest) = lower
        .strip_prefix("get")
        .or_else(|| lower.strip_prefix("set"))
        .or_else(|| lower.strip_prefix("has"))
        .or_else(|| lower.strip_prefix("create"))
        .or_else(|| lower.strip_prefix("build"))
    {
        for h in SUFFIX_HINTS {
            if rest.ends_with(h) {
                return true;
            }
        }
    }
    false
}

/// Check if a string node contains interpolation (e.g., PHP `"Hello $name"`).
fn has_interpolation(node: tree_sitter::Node) -> bool {
    for i in 0..node.child_count() as u32 {
        if let Some(child) = node.child(i) {
            let kind = child.kind();
            if kind == "variable_name"
                || kind == "simple_variable"
                || kind.contains("interpolation")
            {
                return true;
            }
        }
    }
    false
}

//  Layer B: AST pattern suppression when taint confirms safety

/// Map the second segment of a pattern ID (e.g. "cmdi" from "py.cmdi.os_system")
/// to the `Cap` that taint analysis models. Returns `None` for categories taint
/// cannot subsume (memory safety, crypto, etc.), so those patterns are never suppressed.
fn pattern_category_cap(pattern_id: &str) -> Option<Cap> {
    let category = pattern_id.split('.').nth(1)?;
    match category {
        "cmdi" => Some(Cap::SHELL_ESCAPE),
        "xss" => Some(Cap::HTML_ESCAPE),
        "sqli" => Some(Cap::SQL_QUERY),
        "code_exec" => Some(Cap::CODE_EXEC),
        "ssrf" => Some(Cap::SSRF),
        "path" => Some(Cap::FILE_IO),
        // deser/memory/crypto: taint cannot fully subsume these structural patterns
        _ => None,
    }
}

/// Suppression context built from CFG + taint results. Used to decide whether
/// an AST pattern finding can be safely suppressed because taint analysis
/// evaluated the data flow and found it safe.
struct TaintSuppressionCtx {
    /// For each function scope, the set of lines containing Source-labeled nodes.
    source_lines_by_func: HashMap<Option<String>, HashSet<usize>>,
    /// For each function scope, the set of lines containing Sanitizer-labeled
    /// nodes.  Presence of an explicit sanitizer is the structural signal
    /// that taint analysis successfully evaluated (and cleared) the flow,
    /// so AST-pattern suppression is safe even when no taint findings
    /// fired in the function.
    sanitizer_lines_by_func: HashMap<Option<String>, HashSet<usize>>,
    /// For each sink node line, its enclosing function scope.
    sink_func_at_line: HashMap<usize, Option<String>>,
    /// Lines where taint emitted a `taint-unsanitised-flow` finding.
    taint_finding_lines: HashSet<usize>,
    /// Per-function set of taint-finding lines.  Used by Condition 4 of
    /// [`should_suppress`] alongside [`sanitizer_lines_by_func`] to
    /// distinguish "taint proved safe" from "taint failed to track".
    taint_finding_lines_by_func: HashMap<Option<String>, HashSet<usize>>,
    /// Functions where the SSA engine emitted at least one
    /// `all_validated` event, every tainted input to *some* sink in
    /// the function passed through a recognised validation/
    /// sanitisation predicate.  Drained from
    /// `take_all_validated_spans`; positive evidence that the engine
    /// reached a sink in this function and proved safety, even when no
    /// `taint-unsanitised-flow` finding fired and no Sanitizer label
    /// is present.  Covers validation, dominator-based pruning,
    /// early-return guards, type-check predicates, and interprocedural
    /// sanitiser wrappers, all of which legitimately clear taint via
    /// SSA branch-narrowing rather than a labelled sanitiser node.
    engine_validated_funcs: HashSet<Option<String>>,
    /// Functions where some Source's defining variable is later
    /// rebound to a literal RHS (carries `TaintMeta.const_text`) in
    /// the same scope, with no Source label on the rebinding node.
    /// Positive evidence that the engine's SSA renaming structurally
    /// kills the source's taint before any sink can read it, covers
    /// `cmd = getenv(); cmd = "echo hello"; system(cmd)` patterns
    /// where the rebind is what makes the code safe but the engine
    /// has no `Sanitizer` label or `taint-unsanitised-flow` finding to
    /// witness it.
    source_killed_funcs: HashSet<Option<String>>,
    /// Functions that call a same-file helper which itself contains a
    /// labelled Sanitizer node.  Positive evidence that the engine's
    /// interprocedural analysis cleared the flow through a
    /// user-defined wrapper (e.g. `def sanitize(s): return
    /// shlex.quote(s)`).  The current per-function `Sanitizer` check
    /// only sees direct sanitisers in the *caller's* scope, without
    /// this signal, every helper-wrapped sanitiser fires as an
    /// AST-pattern FP because the engine cleared the value via Phase
    /// 11 inline analysis but the sink's enclosing scope has no
    /// labelled Sanitizer of its own.
    interproc_sanitizer_callers: HashSet<Option<String>>,
    /// Union of resolved sink-cap bits for cap-specific taint findings at
    /// each line.  Used by [`Self::is_redundant_ast_pattern`] to drop an
    /// AST-pattern finding only when the flow engine already emitted a
    /// specific rule id for the same vulnerability class.  Legacy generic
    /// findings (`taint-unsanitised-flow`, `cfg-unguarded-sink`) are not
    /// canonical enough to subsume language-specific AST rule IDs such as
    /// `py.cmdi.subprocess_shell` or `c.cmdi.system`.
    specific_taint_finding_caps_by_line: HashMap<usize, u32>,
}

impl TaintSuppressionCtx {
    /// Build suppression context from ALL per-body CFG graphs, tree (for
    /// byte→line mapping), and existing taint findings.
    ///
    /// Scans every body's graph (not just top-level) so that Source/Sink
    /// nodes inside function bodies are visible for suppression decisions.
    fn build(file_cfg: &FileCfg, tree: &tree_sitter::Tree, taint_diags: &[Diag]) -> Self {
        let mut source_lines_by_func: HashMap<Option<String>, HashSet<usize>> = HashMap::new();
        let mut sanitizer_lines_by_func: HashMap<Option<String>, HashSet<usize>> = HashMap::new();
        let mut sink_func_at_line: HashMap<usize, Option<String>> = HashMap::new();
        // Per-function (var_name, source_line) pairs for Source nodes whose
        // `defines` is set.  Used below to detect SSA source kills via
        // const reassignment (`cmd = getenv(); cmd = "echo hello"`).
        let mut source_var_defs_by_func: HashMap<Option<String>, Vec<(String, usize)>> =
            HashMap::new();
        // Per-function (var_name, line) pairs for nodes that bind a
        // variable to a literal RHS (carry `TaintMeta.const_text`).
        // Used to match against `source_var_defs_by_func` for kill
        // detection.
        let mut const_def_var_by_func: HashMap<Option<String>, Vec<(String, usize)>> =
            HashMap::new();
        // Set of `enclosing_func` names whose body contains at least
        // one labelled Sanitizer.  These are user-defined sanitiser
        // wrappers callable from other functions in the same file
        // (e.g. `def sanitize(s): return shlex.quote(s)`).
        let mut sanitizer_funcs: HashSet<String> = HashSet::new();
        // Per-function set of bare callee names invoked from this
        // function's body.  Bare = last `.`-separated segment, so
        // `this.sanitize`, `obj.sanitize`, and `sanitize` all collapse
        // to the same key for matching against `sanitizer_funcs`.
        let mut callees_by_func: HashMap<Option<String>, HashSet<String>> = HashMap::new();

        for body in &file_cfg.bodies {
            for idx in body.graph.node_indices() {
                let info = &body.graph[idx];
                let mut has_source = false;
                let mut has_sink = false;
                let mut has_sanitizer = false;
                for label in &info.taint.labels {
                    match label {
                        DataLabel::Source(_) => has_source = true,
                        DataLabel::Sink(_) => has_sink = true,
                        DataLabel::Sanitizer(_) => has_sanitizer = true,
                    }
                }
                // Skip synthetic source nodes emitted by `pre_emit_arg_source_nodes`
                // (`__nyx_src_*` / `__nyx_chainsrc_*`).  These are a CFG-level
                // synthesis that hoists a source-labeled member-expression into
                // its own Source node so taint can see a definition; absence of
                // a downstream taint finding through such a synth source does
                // NOT prove safety, it can also mean the engine couldn't
                // propagate the taint (e.g. `&req` with `var req struct{}`
                // where points-to doesn't track the address-of of a stack
                // variable).  Treating synth sources as "real" sources here
                // would silently silence AST-pattern findings on every Go
                // CRUD handler whose Decode destination is an `&req`-style
                // address-of-local.
                let is_synth_source = info.taint.defines.as_deref().is_some_and(|d| {
                    d.starts_with("__nyx_src_") || d.starts_with("__nyx_chainsrc_")
                });
                let byte = info.classification_span().0;
                let point = byte_offset_to_point(tree, byte);
                let line = point.row + 1;
                if has_source && !is_synth_source {
                    source_lines_by_func
                        .entry(info.ast.enclosing_func.clone())
                        .or_default()
                        .insert(line);
                    if let Some(var) = info.taint.defines.as_deref() {
                        source_var_defs_by_func
                            .entry(info.ast.enclosing_func.clone())
                            .or_default()
                            .push((var.to_string(), line));
                    }
                }
                if has_sanitizer {
                    sanitizer_lines_by_func
                        .entry(info.ast.enclosing_func.clone())
                        .or_default()
                        .insert(line);
                    if let Some(func_name) = info.ast.enclosing_func.as_deref() {
                        sanitizer_funcs.insert(func_name.to_string());
                    }
                }
                if has_sink {
                    sink_func_at_line.insert(line, info.ast.enclosing_func.clone());
                }
                // Const-rebind detection: a node that defines a variable
                // from a literal RHS and carries no Source label is a
                // candidate kill site.  Skip nodes that are themselves
                // Sources (a literal-init source like `cmd := "ls"` is
                // not a kill).
                if !has_source
                    && let (Some(var), Some(_)) = (
                        info.taint.defines.as_deref(),
                        info.taint.const_text.as_ref(),
                    )
                {
                    const_def_var_by_func
                        .entry(info.ast.enclosing_func.clone())
                        .or_default()
                        .push((var.to_string(), line));
                }
                // Per-function callee inventory for interprocedural
                // sanitiser detection.  `bare_method_name` collapses
                // `this.sanitize` / `obj.sanitize` / `sanitize` to the
                // same key so receiver-prefixed Java/Ruby/etc. calls
                // match a bare-named helper definition.  Also include
                // `arg_callees` so `println(... + sanitize(name) +
                // ...)` recognises the inline sanitiser call buried
                // inside the sink's argument expression.
                let bare_inserts: Vec<&str> = info
                    .call
                    .callee
                    .as_deref()
                    .into_iter()
                    .chain(info.arg_callees.iter().filter_map(|c| c.as_deref()))
                    .collect();
                if !bare_inserts.is_empty() {
                    let entry = callees_by_func
                        .entry(info.ast.enclosing_func.clone())
                        .or_default();
                    for callee in bare_inserts {
                        let bare = crate::labels::bare_method_name(callee);
                        if !bare.is_empty() {
                            entry.insert(bare.to_string());
                        }
                    }
                }
            }
        }

        // Source-kill detection: a function is "source-killed" when at
        // least one of its Source-defined variables is re-bound to a
        // literal at a later line in the same scope.  Captures
        // `safe_reassigned`-style fixtures: the SSA engine renames the
        // sink-read SSA value to a clean constant before any sink can
        // observe taint, but neither a `Sanitizer` label nor a
        // `taint-unsanitised-flow` finding fires to witness the kill.
        let mut source_killed_funcs: HashSet<Option<String>> = HashSet::new();
        for (func, src_defs) in &source_var_defs_by_func {
            let Some(kills) = const_def_var_by_func.get(func) else {
                continue;
            };
            for (src_var, src_line) in src_defs {
                if kills
                    .iter()
                    .any(|(kill_var, kill_line)| kill_var == src_var && kill_line > src_line)
                {
                    source_killed_funcs.insert(func.clone());
                    break;
                }
            }
        }

        // Interprocedural sanitiser caller detection: a function is
        // an "interproc sanitiser caller" when its body invokes any
        // helper whose own body contains a labelled Sanitizer.  This
        // handles wrappers like `def sanitize(s): return
        // shlex.quote(s)`, the engine clears taint via
        // inline analysis, but the caller's scope has no labelled
        // Sanitizer of its own to satisfy Condition 4(b).
        let mut interproc_sanitizer_callers: HashSet<Option<String>> = HashSet::new();
        if !sanitizer_funcs.is_empty() {
            for (func, callees) in &callees_by_func {
                if callees.iter().any(|c| sanitizer_funcs.contains(c)) {
                    interproc_sanitizer_callers.insert(func.clone());
                }
            }
        }

        // Drain the SSA engine's all-validated sink spans, attribute
        // each to its enclosing function via `sink_func_at_line`, and
        // record the function as "engine-validated".  The set was
        // populated by `ssa_events_to_findings` whenever the engine
        // emitted an `SsaTaintEvent { all_validated: true, .. }` ,
        // i.e. the engine reached a sink and proved every tainted
        // input passed validation.  This is the broadest form of
        // engine-success evidence, covering predicate validation
        // (`if !allowed[x]`), dominator early-return, type-check
        // (`Atoi` / `typeof`), and interprocedural sanitiser
        // wrappers.
        let mut engine_validated_funcs: HashSet<Option<String>> = HashSet::new();
        for (start, _end) in crate::taint::ssa_transfer::take_all_validated_spans() {
            let line = byte_offset_to_point(tree, start).row + 1;
            if let Some(func) = sink_func_at_line.get(&line) {
                engine_validated_funcs.insert(func.clone());
            }
        }

        let taint_finding_lines: HashSet<usize> = taint_diags
            .iter()
            .filter(|d| d.id.starts_with("taint-unsanitised-flow"))
            .map(|d| d.line)
            .collect();

        // Cap bits per line for cap-specific flow-backed findings only, so a
        // redundant AST pattern at the same line+cap can be dropped in favour
        // of the richer flow.  Do not count legacy generic findings here:
        // `taint-unsanitised-flow` and `cfg-unguarded-sink` carry evidence,
        // but their rule ids are deliberately catch-alls, while AST `cmdi`,
        // `sqli`, etc. IDs are the canonical namespace many tests, SARIF
        // consumers, and dynamic-verification spec derivation rely on.
        let mut specific_taint_finding_caps_by_line: HashMap<usize, u32> = HashMap::new();
        for d in taint_diags {
            if d.id.starts_with("taint-") && !d.id.starts_with("taint-unsanitised-flow") {
                if let Some(caps) = d.evidence.as_ref().map(|e| e.sink_caps) {
                    if caps != 0 {
                        *specific_taint_finding_caps_by_line
                            .entry(d.line)
                            .or_default() |= caps;
                    }
                }
            }
        }

        // Per-function partition of taint findings.  Maps each finding's
        // line to the enclosing function scope by reusing
        // `sink_func_at_line` (the same span/function mapping the Sink-side
        // of taint analysis populated above).
        let mut taint_finding_lines_by_func: HashMap<Option<String>, HashSet<usize>> =
            HashMap::new();
        for line in &taint_finding_lines {
            let func = sink_func_at_line.get(line).cloned().unwrap_or(None);
            taint_finding_lines_by_func
                .entry(func)
                .or_default()
                .insert(*line);
        }

        Self {
            source_lines_by_func,
            sanitizer_lines_by_func,
            sink_func_at_line,
            taint_finding_lines,
            taint_finding_lines_by_func,
            engine_validated_funcs,
            source_killed_funcs,
            interproc_sanitizer_callers,
            specific_taint_finding_caps_by_line,
        }
    }

    /// Returns `true` when an AST pattern finding is a redundant restatement
    /// of a flow the taint engine already reported at the same line.
    ///
    /// The taint / structural flow finding carries source + path evidence the
    /// bare pattern lacks, so when both fire at the same line for the same
    /// cap the pattern is pure duplicate noise.  This is the
    /// taint-found-it-UNSAFE counterpart to [`Self::should_suppress`]'s
    /// taint-found-it-SAFE logic: there, no flow finding means the pattern
    /// may carry unique signal; here, a same-cap flow finding means it does
    /// not.  Cap-matched (not line-only) so a pattern whose cap differs from
    /// the co-located flow's cap — a genuinely distinct sink — is preserved.
    fn is_redundant_ast_pattern(&self, pattern_id: &str, line: usize) -> bool {
        let Some(cap) = pattern_category_cap(pattern_id) else {
            return false;
        };
        self.specific_taint_finding_caps_by_line
            .get(&line)
            .is_some_and(|caps| caps & cap.bits() != 0)
    }

    /// Returns `true` if this AST pattern finding should be suppressed.
    fn should_suppress(&self, pattern_id: &str, line: usize) -> bool {
        // Condition 1: pattern category maps to a Cap taint models
        if pattern_category_cap(pattern_id).is_none() {
            return false;
        }
        // Condition 2: at least one Source exists in the same function scope
        // at an EARLIER line (upstream in control flow). This prevents suppression
        // when the only Source is co-located (dual-label) or downstream from the
        // sink, since taint couldn't have evaluated a flow that doesn't exist.
        let func = match self.sink_func_at_line.get(&line) {
            Some(f) => f,
            None => return false, // No CFG sink at this line, taint had no opportunity to evaluate
        };
        match self.source_lines_by_func.get(func) {
            Some(source_lines) => {
                if !source_lines.iter().any(|&sl| sl < line) {
                    return false;
                }
            }
            None => return false,
        }
        // Condition 3: no taint finding at this line (taint found it safe)
        if self.taint_finding_lines.contains(&line) {
            return false;
        }
        // Condition 4: distinguish "taint proved safe" from "taint failed
        // to track".  Suppress only when there's a structural signal that
        // taint analysis actually evaluated this flow:
        //   (a) the function fired at least one taint-unsanitised-flow
        //       finding (engine ran successfully and reached *some* sink),
        //       OR
        //   (b) the function contains an explicit Sanitizer node (the
        //       canonical mechanism by which a flow is cleared, e.g.
        //       `escapeshellarg` between $_GET and `system`),
        //       OR
        //   (c) the SSA engine emitted at least one `all_validated`
        //       event in this function (engine reached *some* sink and
        //       proved every tainted input was validated, covers
        //       predicate validation, dominator early-return,
        //       type-check predicates, and interprocedural sanitiser
        //       wrappers that don't carry an explicit Sanitizer
        //       label),
        //       OR
        //   (d) the function rebinds a Source's defining variable to
        //       a literal RHS at a later line (engine's SSA renaming
        //       structurally kills taint before any sink reads it ,
        //       covers `cmd = getenv(); cmd = "echo"; system(cmd)`),
        //       OR
        //   (e) the function calls a same-file helper whose body
        //       contains a labelled Sanitizer (interprocedural
        //       sanitiser wrapper, covers `def sanitize(s): return
        //       shlex.quote(s)` patterns where the engine clears
        //       taint via inline analysis but the caller's
        //       scope has no Sanitizer label of its own).
        //
        // When none hold, we can't distinguish silent engine failure
        // from real safety, e.g. Go points-to limitation on `&local`
        // Decode destinations leaves the chain writeback fired but the
        // field-cell propagation dead, suppressing legitimate
        // AST-pattern findings on every Go CRUD handler whose Decode
        // destination is a stack-local address-of.
        let func_has_taint_finding = self
            .taint_finding_lines_by_func
            .get(func)
            .is_some_and(|s| !s.is_empty());
        let func_has_sanitizer = self
            .sanitizer_lines_by_func
            .get(func)
            .is_some_and(|s| !s.is_empty());
        let func_engine_validated = self.engine_validated_funcs.contains(func);
        let func_source_killed = self.source_killed_funcs.contains(func);
        let func_interproc_sanitizer = self.interproc_sanitizer_callers.contains(func);
        if !func_has_taint_finding
            && !func_has_sanitizer
            && !func_engine_validated
            && !func_source_killed
            && !func_interproc_sanitizer
        {
            return false;
        }
        true
    }
}

//  Pass 2 / single‑file: Full rule execution (AST queries + taint)

/// Run all enabled analyses on pre-read bytes and return diagnostics.
///
/// This is the core **pass 2** implementation. Callers that already hold the
/// file contents should use this variant to avoid a redundant `fs::read`.
pub fn run_rules_on_bytes(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
    global_summaries: Option<&GlobalSummaries>,
    scan_root: Option<&Path>,
) -> NyxResult<Vec<Diag>> {
    let _span = tracing::debug_span!("run_rules", file = %path.display()).entered();
    maybe_inject_test_panic(path);

    let Some(source) = ParsedSource::try_new(bytes, path)? else {
        // Not a recognized tree-sitter language, try text-based patterns,
        // but first surface a parse-timeout synthetic diag if that's what
        // caused try_new to return None.
        let mut out = scan_text_based_patterns(bytes, path, cfg);
        if let Some(timeout_ms) = take_last_parse_timeout_ms() {
            out.push(parse_timeout_diag(path, timeout_ms));
        }
        return Ok(out);
    };

    let mut out = Vec::new();

    // CFG construction + taint + cfg_analysis only needed for CFG-capable modes.
    let needs_cfg = matches!(
        cfg.scanner.mode,
        AnalysisMode::Full | AnalysisMode::Cfg | AnalysisMode::Taint
    );

    if needs_cfg {
        let parsed = ParsedFile::from_source(source, cfg);
        out.extend(parsed.run_cfg_analyses(cfg, global_summaries, scan_root));
        if cfg.scanner.mode == AnalysisMode::Full {
            // Layer B: suppress AST findings where taint confirmed safety
            let suppression =
                TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
            let ast_findings = parsed.source.run_ast_queries(cfg);
            out.extend(ast_findings.into_iter().filter(|d| {
                !suppression.should_suppress(&d.id, d.line)
                    && !suppression.is_redundant_ast_pattern(&d.id, d.line)
            }));
        }
        if cfg.scanner.mode == AnalysisMode::Full {
            out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
        }
        parsed.source.finalize_diags(&mut out, cfg);
    } else {
        // AST-only: no CFG construction (fast path preserved)
        out.extend(source.run_ast_queries(cfg));
        let parsed = ParsedFile::from_source(source, cfg);
        out.extend(parsed.run_auth_analyses(cfg, global_summaries, scan_root));
        parsed.source.finalize_diags(&mut out, cfg);
    }

    Ok(out)
}

/// Convenience wrapper that reads the file then delegates to
/// [`run_rules_on_bytes`].
pub fn run_rules_on_file(
    path: &Path,
    cfg: &Config,
    global_summaries: Option<&GlobalSummaries>,
    scan_root: Option<&Path>,
) -> NyxResult<Vec<Diag>> {
    let bytes = std::fs::read(path)?;
    run_rules_on_bytes(&bytes, path, cfg, global_summaries, scan_root)
}

//  Fused single-pass: extract summaries + run full analysis in one parse/CFG

/// Result of a fused analysis pass: both function summaries and diagnostics.
pub struct FusedResult {
    pub summaries: Vec<FuncSummary>,
    pub diags: Vec<Diag>,
    /// SSA-derived per-parameter summaries keyed by canonical
    /// [`crate::symbol::FuncKey`].  Keys preserve `(lang, namespace,
    /// container, name, arity, disambig, kind)` so two same-name definitions
    /// in the same file never collide.
    pub ssa_summaries: Vec<(crate::symbol::FuncKey, SsaFuncSummary)>,
    pub cfg_nodes: usize,
    /// Eligible callee bodies for cross-file symex, keyed by
    /// canonical [`crate::symbol::FuncKey`] (same identity model as
    /// `ssa_summaries`).
    pub ssa_bodies: Vec<(
        crate::symbol::FuncKey,
        crate::taint::ssa_transfer::CalleeSsaBody,
    )>,
    /// Per-function auth-check summaries for cross-file helper
    /// lifting.  One entry per analysis unit whose body proves at
    /// least one positional parameter under an ownership / membership
    /// / admin / authorization check; empty for files with no such
    /// helpers.
    pub auth_summaries: Vec<(
        crate::symbol::FuncKey,
        auth_analysis::model::AuthCheckSummary,
    )>,
    /// Per-Python-file router-level dep declarations + `include_router`
    /// edges for cross-file FastAPI router-dep propagation.  `None` for
    /// non-Python files; `Some((module_id, facts))` for Python files
    /// where `module_id` is the file's
    /// [`auth_analysis::router_facts::module_id_for_storage`] key.
    /// Pass 1 collects these into
    /// `GlobalSummaries.router_facts_by_module`; pass 2 resolves them
    /// per-file via `GlobalSummaries::resolve_cross_file_router_deps`.
    pub router_facts: Option<(String, auth_analysis::router_facts::PerFileRouterFacts)>,
    /// Per-file Phase-09 cross-package import map.  `None` when the
    /// file's resolver produced no resolved bindings; otherwise
    /// `Some((namespace, map))` where `namespace` is the file's
    /// scan-root-relative path (matching `FuncKey::namespace`) and
    /// `map` maps each local import binding name (e.g. `escapeHtml`)
    /// to the canonical `FuncKey` of the imported function in its
    /// own package.  Pass 1 collects these into
    /// `GlobalSummaries.cross_package_imports_by_namespace`; pass 2's
    /// `inline_analyse_callee` consults the index when an inlined
    /// callee body's own `cross_package_imports` Arc is empty (the
    /// indexed-mode case where bodies round-trip through SQLite and
    /// the Arc field is `#[serde(skip)]`).
    pub cross_package_imports: Option<(
        String,
        std::sync::Arc<HashMap<String, crate::symbol::FuncKey>>,
    )>,
}

/// Parse the file once, build the CFG once, and produce both function
/// summaries (for cross-file resolution) and full diagnostics (AST analyses +
/// taint + CFG structural analyses).
///
/// When `global_summaries` is `None`, the taint engine runs with local
/// context only (equivalent to pass 1 + partial pass 2).  A second call
/// to `run_taint_only` can refine findings with the full cross-file view
/// without re-parsing or re-building the CFG.
pub fn analyse_file_fused(
    bytes: &[u8],
    path: &Path,
    cfg: &Config,
    global_summaries: Option<&GlobalSummaries>,
    scan_root: Option<&Path>,
) -> NyxResult<FusedResult> {
    let _span = tracing::debug_span!("analyse_fused", file = %path.display()).entered();
    maybe_inject_test_panic(path);

    let Some(source) = ParsedSource::try_new(bytes, path)? else {
        // Not a recognized tree-sitter language, try text-based patterns,
        // and surface a parse-timeout synthetic diag if that's what caused
        // try_new to return None.
        let mut diags = scan_text_based_patterns(bytes, path, cfg);
        if let Some(timeout_ms) = take_last_parse_timeout_ms() {
            diags.push(parse_timeout_diag(path, timeout_ms));
        }
        return Ok(FusedResult {
            summaries: vec![],
            diags,
            ssa_summaries: vec![],
            cfg_nodes: 0,
            ssa_bodies: vec![],
            auth_summaries: vec![],
            router_facts: None,
            cross_package_imports: None,
        });
    };

    let parsed = ParsedFile::from_source(source, cfg);
    let cfg_nodes = parsed.cfg_graph().node_count();
    let summaries = parsed.export_summaries_with_root(scan_root);

    let mut out = Vec::new();

    let needs_cfg = matches!(
        cfg.scanner.mode,
        AnalysisMode::Full | AnalysisMode::Cfg | AnalysisMode::Taint
    );

    let (ssa_summaries, ssa_bodies) = if needs_cfg {
        // Lower SSA exactly once and feed both the taint engine and the
        // SSA-artifact extractor.  Pre-fix, both consumers re-lowered the
        // same `FileCfg` independently, `lower_all_functions_from_bodies`
        // accounted for ~20% of `analyse_file_fused` wall-clock on the
        // bench corpus.
        //
        // Reset the path-safe-suppressed span set BEFORE lowering: the
        // per-parameter probes inside the lowering phase publish spans
        // (`record_path_safe_suppressed_span`), and the state-analysis
        // pass downstream relies on those spans surviving until
        // `take_path_safe_suppressed_spans` drains the set inside
        // `run_cfg_analyses_with_lowered`.  The all-validated span set
        // (cap-agnostic, AST-pattern suppression evidence) follows the
        // same lifecycle and is drained inside `TaintSuppressionCtx`.
        crate::taint::ssa_transfer::reset_path_safe_suppressed_spans();
        crate::taint::ssa_transfer::reset_all_validated_spans();
        let (lowered_summaries, lowered_bodies) =
            parsed.lower_ssa_for_fused(global_summaries, scan_root, cfg.module_graph.as_deref());
        out.extend(parsed.run_cfg_analyses_with_lowered(
            cfg,
            global_summaries,
            scan_root,
            &lowered_summaries,
            &lowered_bodies,
        ));
        let eligible_bodies = crate::taint::build_eligible_bodies(&parsed.file_cfg, lowered_bodies);
        let summaries_vec: Vec<_> = lowered_summaries.into_iter().collect();
        (summaries_vec, eligible_bodies)
    } else {
        (vec![], vec![])
    };

    let mut auth_summaries: Vec<(
        crate::symbol::FuncKey,
        auth_analysis::model::AuthCheckSummary,
    )> = Vec::new();

    // Per-file router-dep facts for cross-file FastAPI propagation.
    // Extracted unconditionally for Python files so pass 1 can persist
    // them into `GlobalSummaries.router_facts_by_module` even on Cfg /
    // Taint modes (the auth analysis itself runs only under Full, but
    // the index has to be populated by the time pass 2 launches).
    let router_facts_for_this_file = if parsed.source.lang_slug == "python" {
        auth_analysis::router_facts::module_id_for_storage(parsed.source.path).map(|module_id| {
            let facts = auth_analysis::router_facts::extract_router_facts_for_python(
                &parsed.source.tree,
                parsed.source.bytes,
            );
            (module_id, facts)
        })
    } else {
        None
    };

    if cfg.scanner.mode == AnalysisMode::Full || cfg.scanner.mode == AnalysisMode::Ast {
        let ast_findings = parsed.source.run_ast_queries(cfg);
        // Layer B only applies when taint had the opportunity to evaluate
        if needs_cfg && cfg.scanner.mode == AnalysisMode::Full {
            let suppression =
                TaintSuppressionCtx::build(&parsed.file_cfg, &parsed.source.tree, &out);
            out.extend(ast_findings.into_iter().filter(|d| {
                !suppression.should_suppress(&d.id, d.line)
                    && !suppression.is_redundant_ast_pattern(&d.id, d.line)
            }));
        } else {
            out.extend(ast_findings);
        }
        // Build the AuthorizationModel exactly once per file when Full
        // mode needs both diagnostics AND per-file summaries; pre-fix
        // the diag path and the summary path each ran their own
        // `extract::extract_authorization_model`, duplicating
        // `collect_top_level_units` + every framework extractor's AST
        // walk.  See `auth_analysis::run_auth_analysis_with_model` for
        // measured savings.
        let auth_rules = auth_analysis::config::build_auth_rules(cfg, parsed.source.lang_slug);
        if auth_rules.enabled {
            // Resolve cross-file router-deps for the current file (Python only).
            // The resolved map lives on `AuthorizationModel.cross_file_router_deps`
            // BEFORE `FlaskExtractor::extract` runs, so the in-extractor merge
            // sees both inline router-deps and the cross-file lift in one pass.
            let cross_file_router_deps = if parsed.source.lang_slug == "python"
                && let Some(gs) = global_summaries
                && let Some(child_module_id) =
                    auth_analysis::router_facts::module_id_for_path(parsed.source.path)
            {
                let resolved = gs.resolve_cross_file_router_deps(&child_module_id);
                if resolved.is_empty() {
                    None
                } else {
                    Some(resolved)
                }
            } else {
                None
            };
            let auth_model = auth_analysis::extract::extract_authorization_model(
                parsed.source.lang_slug,
                cfg.framework_ctx.as_ref(),
                &parsed.source.tree,
                parsed.source.bytes,
                parsed.source.path,
                &auth_rules,
                cross_file_router_deps.as_ref(),
            );
            // Extract summaries from the **base** model (pre var-types,
            // pre-helper-lifting) so the persisted per-file summary
            // carries only the helper's own intrinsic auth checks,
            // matching the legacy `extract_auth_summaries_by_key` path
            // bit-for-bit.
            if cfg.scanner.mode == AnalysisMode::Full {
                auth_summaries = auth_analysis::extract_auth_summaries_from_model(
                    &auth_model,
                    parsed.source.lang_slug,
                    parsed.source.path,
                    scan_root,
                );
            }
            let var_types = parsed.collect_file_var_types();
            out.extend(auth_analysis::run_auth_analysis_with_model(
                auth_model,
                &parsed.source.tree,
                parsed.source.lang_slug,
                parsed.source.path,
                &auth_rules,
                var_types.as_ref(),
                global_summaries,
                scan_root,
            ));
        }
    }
    parsed.source.finalize_diags(&mut out, cfg);

    let cross_package_imports_for_this_file = if parsed.file_cfg.resolved_imports.is_empty() {
        None
    } else {
        let scan_root_str = scan_root.map(|p| p.to_string_lossy());
        let ns = crate::symbol::namespace_with_package(
            &parsed.source.file_path_str,
            scan_root_str.as_deref(),
            cfg.module_graph.as_deref(),
        );
        let caller_lang = Lang::from_slug(parsed.source.lang_slug).unwrap_or(Lang::Rust);
        let map = crate::taint::build_cross_package_func_keys(
            &parsed.file_cfg.resolved_imports,
            scan_root_str.as_deref(),
            cfg.module_graph.as_deref(),
            caller_lang,
        );
        if map.is_empty() {
            None
        } else {
            Some((ns, std::sync::Arc::new(map)))
        }
    };

    Ok(FusedResult {
        summaries,
        diags: out,
        ssa_summaries,
        cfg_nodes,
        ssa_bodies,
        auth_summaries,
        router_facts: router_facts_for_this_file,
        cross_package_imports: cross_package_imports_for_this_file,
    })
}

//  Text-based pattern scanning (non-tree-sitter files)

/// Run text-based pattern scanners on files whose extension is not supported
/// by tree-sitter.  Currently handles `.ejs` templates.
fn scan_text_based_patterns(bytes: &[u8], path: &Path, cfg: &Config) -> Vec<Diag> {
    let ext = lowercase_ext(path);
    match ext {
        Some("ejs") => {
            let mut diags = crate::patterns::ejs::scan_ejs_file(path, bytes);
            // Respect severity filter
            diags.retain(|d| d.severity <= cfg.scanner.min_severity);
            diags
        }
        _ => vec![],
    }
}

#[test]
fn unknown_extension_returns_empty() {
    let dir = tempfile::tempdir().unwrap();
    let txt = dir.path().join("notes.txt");
    std::fs::write(&txt, "just some text").unwrap();

    let diags = run_rules_on_file(&txt, &Config::default(), None, None)
        .expect("function should never error on plain text");

    assert!(diags.is_empty());
}

#[test]
fn binary_file_guard_triggers() {
    let dir = tempfile::tempdir().unwrap();
    let bin = dir.path().join("junk.bin");

    let mut data = vec![0_u8; 2048];
    for i in (0..data.len()).step_by(3) {
        data[i] = 0;
    }
    std::fs::write(&bin, &data).unwrap();

    let diags = run_rules_on_file(&bin, &Config::default(), None, None).unwrap();
    assert!(diags.is_empty(), "binary files are skipped");
}

#[test]
fn nonprod_path_detection() {
    // Test that is_nonprod_path recognises common non-production paths
    assert!(is_nonprod_path(Path::new("project/tests/test_main.py")));
    assert!(is_nonprod_path(Path::new("src/__tests__/foo.js")));
    assert!(is_nonprod_path(Path::new("benches/bench.rs")));
    assert!(is_nonprod_path(Path::new("vendor/lib/foo.py")));
    assert!(is_nonprod_path(Path::new("src/build.rs")));
    assert!(is_nonprod_path(Path::new("dist/app.min.js")));
    assert!(is_nonprod_path(Path::new("examples/demo.py")));
    assert!(is_nonprod_path(Path::new("fixtures/data.json")));

    // Should NOT match production paths
    assert!(!is_nonprod_path(Path::new("src/main.rs")));
    assert!(!is_nonprod_path(Path::new("lib/handler.py")));
    assert!(!is_nonprod_path(Path::new("app/views.py")));
}

#[test]
fn test_file_detection_covers_all_supported_languages() {
    // JS / TS — the existing surface, kept as a regression guard.
    assert!(is_test_file(Path::new("src/foo.test.js")));
    assert!(is_test_file(Path::new("src/foo.test.ts")));
    assert!(is_test_file(Path::new("src/foo.spec.tsx")));
    assert!(is_test_file(Path::new("src/foo.test.mjs")));
    assert!(is_test_file(Path::new("src/__tests__/Component.jsx")));

    // Python.
    assert!(is_test_file(Path::new("tests/test_login.py")));
    assert!(is_test_file(Path::new("project/views_test.py")));
    assert!(is_test_file(Path::new("project/tests/conftest.py")));
    assert!(is_test_file(Path::new("project/foo_tests.py")));

    // Java (JUnit / TestNG).
    assert!(is_test_file(Path::new("src/UserTest.java")));
    assert!(is_test_file(Path::new("src/UserTests.java")));
    assert!(is_test_file(Path::new("src/UserIT.java")));

    // PHP (PHPUnit).
    assert!(is_test_file(Path::new(
        "tests/unit/Gis/GisVisualizationTest.php"
    )));

    // Ruby (RSpec / Minitest).
    assert!(is_test_file(Path::new("spec/widget_spec.rb")));
    assert!(is_test_file(Path::new("test/widget_test.rb")));

    // Go.
    assert!(is_test_file(Path::new("pkg/auth/login_test.go")));

    // Rust (uncommon but valid).
    assert!(is_test_file(Path::new("src/parser_test.rs")));

    // C / C++.
    assert!(is_test_file(Path::new("src/auth_test.c")));
    assert!(is_test_file(Path::new("src/auth_test.cpp")));
    assert!(is_test_file(Path::new("tests/test_main.cc")));

    // Production paths must NOT match.
    assert!(!is_test_file(Path::new("src/main.rs")));
    assert!(!is_test_file(Path::new("src/UserController.java")));
    assert!(!is_test_file(Path::new("app/views.py")));
    assert!(!is_test_file(Path::new("pkg/auth/login.go")));
    assert!(!is_test_file(Path::new("src/handler.go")));
    assert!(!is_test_file(Path::new("src/Foo.php")));
    assert!(!is_test_file(Path::new("src/Controllers/Operations.php")));
}

#[test]
fn test_suppressible_pattern_covers_cross_language_noise() {
    // JS / TS — pre-existing surface, kept as a regression guard.
    assert!(is_test_suppressible_pattern("js.crypto.math_random"));
    assert!(is_test_suppressible_pattern("ts.crypto.math_random"));
    assert!(is_test_suppressible_pattern("js.secrets.hardcoded_secret"));
    assert!(is_test_suppressible_pattern("ts.transport.fetch_http"));

    // Cross-language extensions added so weak crypto / hardcoded test
    // tokens / insecure RNG used as fixture seeds do not surface as
    // findings inside test modules.
    assert!(is_test_suppressible_pattern("php.crypto.md5"));
    assert!(is_test_suppressible_pattern("php.crypto.sha1"));
    assert!(is_test_suppressible_pattern("php.crypto.rand"));
    assert!(is_test_suppressible_pattern("py.crypto.md5"));
    assert!(is_test_suppressible_pattern("py.crypto.sha1"));
    assert!(is_test_suppressible_pattern("rb.crypto.md5"));
    assert!(is_test_suppressible_pattern("go.crypto.md5"));
    assert!(is_test_suppressible_pattern("go.crypto.sha1"));
    assert!(is_test_suppressible_pattern("go.secrets.hardcoded_key"));
    assert!(is_test_suppressible_pattern("java.crypto.weak_digest"));
    assert!(is_test_suppressible_pattern("java.crypto.insecure_random"));

    // Other security-relevant patterns must NOT be suppressed in tests:
    // they capture real attack surface that test fixtures themselves can
    // demonstrate (deserialization, command injection, taint flows).
    assert!(!is_test_suppressible_pattern("php.deser.unserialize"));
    assert!(!is_test_suppressible_pattern("py.deser.pickle_loads"));
    assert!(!is_test_suppressible_pattern("php.cmdi.system"));
    assert!(!is_test_suppressible_pattern("taint-unsanitised-flow"));
    assert!(!is_test_suppressible_pattern("cfg-unguarded-sink"));
}

#[test]
fn vendored_asset_path_detection() {
    // Minified bundle filename markers always trigger.
    assert!(is_vendored_asset_path(Path::new(
        "src/main/webapp/scripts/jquery-ui.custom.min.js"
    )));
    assert!(is_vendored_asset_path(Path::new("core/assets/htmx.min.js")));
    assert!(is_vendored_asset_path(Path::new("public/app.bundle.js")));
    assert!(is_vendored_asset_path(Path::new(
        "dist/transliteration.umd.min.js"
    )));
    assert!(is_vendored_asset_path(Path::new("dist/lib.iife.js")));
    assert!(is_vendored_asset_path(Path::new("css/site.min.css")));

    // Path-component triggers: bower_components is unambiguous.
    assert!(is_vendored_asset_path(Path::new(
        "bower_components/lodash/lodash.js"
    )));

    // `vendor/` triggers only for front-end asset extensions, so Go module
    // vendoring under `vendor/` keeps being scanned.
    assert!(is_vendored_asset_path(Path::new(
        "core/assets/vendor/jquery/jquery.js"
    )));
    assert!(is_vendored_asset_path(Path::new("src/vendors/foo/lib.css")));
    assert!(!is_vendored_asset_path(Path::new(
        "vendor/github.com/foo/bar/lib.go"
    )));
    assert!(!is_vendored_asset_path(Path::new(
        "vendor/github.com/foo/bar/lib.rs"
    )));

    // Hand-authored production paths must NOT match.
    assert!(!is_vendored_asset_path(Path::new("src/main.js")));
    assert!(!is_vendored_asset_path(Path::new(
        "app/components/Button.tsx"
    )));
    assert!(!is_vendored_asset_path(Path::new("lib/handler.py")));
    // Plain `.js` outside vendor/bower with no `.min` suffix stays in scope
    // even when the directory hints at third-party origin; the engine's
    // existing `is_nonprod_path` downgrade still fires for those.
    assert!(!is_vendored_asset_path(Path::new(
        "webapp/WEB-INF/view/scripts/jquery-ui/jquery-ui-timepicker-addon.js"
    )));
}

#[test]
fn severity_downgrade_works() {
    assert_eq!(downgrade_severity(Severity::High), Severity::Medium);
    assert_eq!(downgrade_severity(Severity::Medium), Severity::Low);
    assert_eq!(downgrade_severity(Severity::Low), Severity::Low);
}

#[test]
fn nonprod_path_downgrades_findings() {
    let dir = tempfile::tempdir().unwrap();
    // Create a file under a "tests" directory
    let test_dir = dir.path().join("tests");
    std::fs::create_dir_all(&test_dir).unwrap();
    let test_file = test_dir.join("test_cmd.py");
    std::fs::write(
        &test_file,
        b"import os\ndef test():\n    cmd = os.environ['X']\n    os.system(cmd)\n",
    )
    .unwrap();

    let default_cfg = Config::default();
    let diags = run_rules_on_file(&test_file, &default_cfg, None, None).unwrap();

    // All findings in tests/ should be downgraded (no HIGH)
    let high: Vec<_> = diags
        .iter()
        .filter(|d| d.severity == Severity::High)
        .collect();
    assert!(
        high.is_empty(),
        "Findings in tests/ should be downgraded from HIGH; got {:?}",
        high
    );

    // With include_nonprod=true, original severity preserved
    let mut prod_cfg = Config::default();
    prod_cfg.scanner.include_nonprod = true;
    let diags_prod = run_rules_on_file(&test_file, &prod_cfg, None, None).unwrap();

    // Not all diagnostics are necessarily high, but include_nonprod should not downgrade
    // Just verify that if there are findings, they weren't downgraded by the nonprod logic
    let _ = diags_prod;
}

#[test]
fn constant_arg_suppression_works() {
    use tree_sitter::StreamingIterator;

    // PHP: system("echo health-ok") should be suppressed
    {
        let mut parser = tree_sitter::Parser::new();
        let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
        parser.set_language(&lang).unwrap();
        let code = b"<?php\nsystem(\"echo health-ok\");\n";
        let tree = parser.parse(code, None).unwrap();
        let query_str = r#"(function_call_expression
            function: (name) @n (#match? @n "^(system)$"))
            @vuln"#;
        let query = tree_sitter::Query::new(&lang, query_str).unwrap();
        let mut cursor = tree_sitter::QueryCursor::new();
        let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
        let m = matches.next().expect("query should match");
        let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
        assert!(
            is_call_all_args_literal(cap.node, code, "php"),
            "PHP system(\"echo health-ok\") should have all-literal args"
        );
    }

    // Python: os.system("echo health-ok") should be suppressed
    {
        let mut parser = tree_sitter::Parser::new();
        let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
        parser.set_language(&lang).unwrap();
        let code = b"import os\nos.system(\"echo health-ok\")\n";
        let tree = parser.parse(code, None).unwrap();
        let query_str = r#"(call
            function: (attribute
                object: (identifier) @pkg (#eq? @pkg "os")
                attribute: (identifier) @fn (#eq? @fn "system")))
            @vuln"#;
        let query = tree_sitter::Query::new(&lang, query_str).unwrap();
        let mut cursor = tree_sitter::QueryCursor::new();
        let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
        let m = matches.next().expect("query should match");
        let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
        assert!(
            is_call_all_args_literal(cap.node, code, "python"),
            "Python os.system(\"echo health-ok\") should have all-literal args"
        );
    }

    // Python: os.system(cmd) should NOT be suppressed (variable arg)
    {
        let mut parser = tree_sitter::Parser::new();
        let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
        parser.set_language(&lang).unwrap();
        let code = b"import os\nos.system(cmd)\n";
        let tree = parser.parse(code, None).unwrap();
        let query_str = r#"(call
            function: (attribute
                object: (identifier) @pkg (#eq? @pkg "os")
                attribute: (identifier) @fn (#eq? @fn "system")))
            @vuln"#;
        let query = tree_sitter::Query::new(&lang, query_str).unwrap();
        let mut cursor = tree_sitter::QueryCursor::new();
        let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
        let m = matches.next().expect("query should match");
        let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
        assert!(
            !is_call_all_args_literal(cap.node, code, "python"),
            "Python os.system(cmd) should NOT have all-literal args"
        );
    }

    // Python: os.system(DEFAULT_CMD) with module-level `DEFAULT_CMD = "ls -la"`
    // should be suppressed via the file-level scalar binding map.
    {
        let mut parser = tree_sitter::Parser::new();
        let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
        parser.set_language(&lang).unwrap();
        let code = b"import os\nDEFAULT_CMD = \"ls -la\"\nos.system(DEFAULT_CMD)\n";
        let tree = parser.parse(code, None).unwrap();
        let query_str = r#"(call
            function: (attribute
                object: (identifier) @pkg (#eq? @pkg "os")
                attribute: (identifier) @fn (#eq? @fn "system")))
            @vuln"#;
        let query = tree_sitter::Query::new(&lang, query_str).unwrap();
        let mut cursor = tree_sitter::QueryCursor::new();
        let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
        let m = matches.next().expect("query should match");
        let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
        assert!(
            is_call_all_args_literal(cap.node, code, "python"),
            "os.system(DEFAULT_CMD) with module-level scalar should be suppressed"
        );
    }

    // Go: db.Exec(DriverName) with package-level `const DriverName = "postgres"`
    // should be suppressed via the file-level scalar binding map.
    {
        let mut parser = tree_sitter::Parser::new();
        let lang = tree_sitter::Language::from(tree_sitter_go::LANGUAGE);
        parser.set_language(&lang).unwrap();
        let code = b"package main\nconst DriverName = \"postgres\"\nfunc f(db Db) { db.Exec(DriverName) }\n";
        let tree = parser.parse(code, None).unwrap();
        let query_str = r#"(call_expression
            function: (selector_expression
                field: (field_identifier) @m (#eq? @m "Exec")))
            @vuln"#;
        let query = tree_sitter::Query::new(&lang, query_str).unwrap();
        let mut cursor = tree_sitter::QueryCursor::new();
        let mut matches = cursor.matches(&query, tree.root_node(), code.as_slice());
        let m = matches.next().expect("query should match");
        let cap = m.captures.iter().find(|c| c.index == 0).unwrap();
        assert!(
            is_call_all_args_literal(cap.node, code, "go"),
            "db.Exec(DriverName) with package-level const should be suppressed"
        );
    }
}

/// Helper that runs a tree-sitter query against Python source and
/// returns the first capture-0 node, panicking if no match is found.
/// Used by the Python suppression tests below.
#[cfg(test)]
fn first_python_capture<'tree>(
    tree: &'tree tree_sitter::Tree,
    code: &[u8],
    query_str: &str,
) -> tree_sitter::Node<'tree> {
    use tree_sitter::StreamingIterator;
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
    let mut cursor = tree_sitter::QueryCursor::new();
    let mut matches = cursor.matches(&query, tree.root_node(), code);
    let m = matches.next().expect("query should match");
    let cap = m
        .captures
        .iter()
        .find(|c| c.index == 0)
        .expect("capture index 0");
    cap.node
}

/// Helper that runs a tree-sitter query against Ruby source and returns
/// the first capture-0 node, panicking if no match is found.  Used by
/// the Ruby suppression tests below.
#[cfg(test)]
fn first_ruby_capture<'tree>(
    tree: &'tree tree_sitter::Tree,
    code: &[u8],
    query_str: &str,
) -> tree_sitter::Node<'tree> {
    use tree_sitter::StreamingIterator;
    let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
    let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
    let mut cursor = tree_sitter::QueryCursor::new();
    let mut matches = cursor.matches(&query, tree.root_node(), code);
    let m = matches.next().expect("query should match");
    let cap = m
        .captures
        .iter()
        .find(|c| c.index == 0)
        .expect("capture index 0");
    cap.node
}

/// Helper that runs a tree-sitter query against PHP source and returns the
/// first capture-0 node, panicking if no match is found.  Used by the PHP
/// suppression tests below.
#[cfg(test)]
fn first_php_capture<'tree>(
    tree: &'tree tree_sitter::Tree,
    code: &[u8],
    query_str: &str,
) -> tree_sitter::Node<'tree> {
    use tree_sitter::StreamingIterator;
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
    let mut cursor = tree_sitter::QueryCursor::new();
    let mut matches = cursor.matches(&query, tree.root_node(), code);
    let m = matches.next().expect("query should match");
    let cap = m
        .captures
        .iter()
        .find(|c| c.index == 0)
        .expect("capture index 0");
    cap.node
}

#[test]
fn php_include_param_passthrough_recognises_canonical_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    parser.set_language(&lang).unwrap();
    let q = r#"(include_expression (variable_name)) @vuln"#;

    // Closure parameter pass-through (composer ClassLoader idiom).
    let code = b"<?php\nstatic $cb = function ($file) { include $file; };\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_include_param_passthrough(cap, code),
        "closure param pass-through should be recognised"
    );

    // Method parameter pass-through.
    let code = b"<?php\nclass C { function f(string $file): void { include $file; } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_include_param_passthrough(cap, code),
        "method param pass-through should be recognised"
    );

    // Local variable assigned from concat, NOT a pass-through.
    let code = b"<?php\nclass C { function f(string $base): void { $f = $base . '/x.php'; include $f; } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_include_param_passthrough(cap, code),
        "concat-built local should NOT be treated as pass-through"
    );

    // Param reassigned before include, NOT a pass-through.
    let code = b"<?php\nfunction f($file) { $file = $_GET['x']; include $file; }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_include_param_passthrough(cap, code),
        "reassigned param should NOT be treated as pass-through"
    );

    // Top-level (no enclosing function), NOT a pass-through.
    let code = b"<?php\n$file = $_GET['x'];\ninclude $file;\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_include_param_passthrough(cap, code),
        "top-level include should NOT be treated as pass-through"
    );
}

#[test]
fn php_unserialize_allowed_classes_recognises_safe_forms() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    parser.set_language(&lang).unwrap();
    let q = r#"(function_call_expression function: (name) @n (#eq? @n "unserialize")) @vuln"#;

    // allowed_classes => false
    let code = b"<?php\n$x = unserialize($d, ['allowed_classes' => false]);\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_allowed_classes_restricted(cap, code),
        "allowed_classes => false should be recognised as safe"
    );

    // allowed_classes => [Foo::class, Bar::class]
    let code = b"<?php\n$x = unserialize($d, ['allowed_classes' => [Foo::class]]);\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_allowed_classes_restricted(cap, code),
        "allowed_classes => [array] should be recognised as safe"
    );

    // allowed_classes => self::ALLOWED  (class constant reference)
    let code =
        b"<?php\nclass C { const A = []; function f($d) { return unserialize($d, ['allowed_classes' => self::A]); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_allowed_classes_restricted(cap, code),
        "allowed_classes => self::CONST should be recognised as safe"
    );

    // allowed_classes => true, unsafe default, must NOT be suppressed
    let code = b"<?php\n$x = unserialize($d, ['allowed_classes' => true]);\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_allowed_classes_restricted(cap, code),
        "allowed_classes => true is the unsafe default, should NOT be suppressed"
    );

    // No second arg, must NOT be suppressed
    let code = b"<?php\n$x = unserialize($d);\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_allowed_classes_restricted(cap, code),
        "single-arg unserialize should NOT be suppressed"
    );

    // Dynamic options variable, must NOT be suppressed
    let code = b"<?php\n$x = unserialize($d, $opts);\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_allowed_classes_restricted(cap, code),
        "dynamic options variable should NOT be suppressed"
    );
}

#[test]
fn php_unserialize_magic_method_passthrough_recognises_serializable_contract() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    parser.set_language(&lang).unwrap();
    let q = r#"(function_call_expression function: (name) @n (#eq? @n "unserialize")) @vuln"#;

    // Canonical Serializable::unserialize delegating to __unserialize.
    let code = b"<?php\nclass R {\n  public function unserialize($serialized): void {\n    $this->__unserialize(unserialize($serialized));\n  }\n}\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_magic_method_passthrough(cap, code),
        "Serializable::unserialize($x) → unserialize($x) should be suppressed"
    );

    // Multi-target list-destructuring assignment shape (Joomla Cli/Input).
    let code = b"<?php\nclass C {\n  public function unserialize($input) {\n    [$this->a, $this->b] = unserialize($input);\n  }\n}\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_magic_method_passthrough(cap, code),
        "list-destructuring inside Serializable::unserialize should be suppressed"
    );

    // Case-insensitive method name (PHP semantics).
    let code = b"<?php\nclass C { public function UnSerialize($d) { return unserialize($d); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_magic_method_passthrough(cap, code),
        "method name should match case-insensitively (PHP)"
    );

    // Free function `unserialize` is NOT a magic method, must NOT be suppressed.
    let code = b"<?php\nfunction load($d) { return unserialize($d); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_magic_method_passthrough(cap, code),
        "free function should NOT be suppressed"
    );

    // Different method name, NOT a Serializable contract, must NOT be suppressed.
    let code = b"<?php\nclass C { public function decode($d) { return unserialize($d); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_magic_method_passthrough(cap, code),
        "method named `decode` should NOT be suppressed"
    );

    // Method named `unserialize` but with TWO params, NOT the magic signature,
    // must NOT be suppressed.
    let code = b"<?php\nclass C { public function unserialize($d, $opts) { return unserialize($d, $opts); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_magic_method_passthrough(cap, code),
        "two-param method named unserialize should NOT be suppressed"
    );

    // Magic-method signature but the call argument is NOT the formal param —
    // user is unserializing some other source.  Must NOT be suppressed.
    let code = b"<?php\nclass C { public function unserialize($input) { return unserialize($_GET['x']); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_magic_method_passthrough(cap, code),
        "non-pass-through arg inside magic method should NOT be suppressed"
    );

    // Wrapped argument (`unserialize(trim($input))`) is NOT a bare-param
    // pass-through — keep firing.  This shape covers cache/session
    // pass-throughs that the rule should still surface.
    let code = b"<?php\nclass C { public function unserialize($input) { return unserialize(trim($input)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_magic_method_passthrough(cap, code),
        "wrapped argument inside magic method should NOT be suppressed (conservative)"
    );

    // Anonymous function named-like context (defensive — anonymous_function
    // is not a method_declaration).
    let code = b"<?php\n$f = function($input) { return unserialize($input); };\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_magic_method_passthrough(cap, code),
        "closure should NOT be suppressed"
    );
}

#[test]
fn php_unserialize_inside_phpunit_assertion_recognises_roundtrip_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    parser.set_language(&lang).unwrap();
    let q = r#"(function_call_expression function: (name) @n (#eq? @n "unserialize")) @vuln"#;

    // Canonical assertSame with array literal expected.
    let code = b"<?php\nclass T { public function t() { $this->assertSame(['a' => 1], unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "assertSame(literal array, unserialize($x)) should be suppressed"
    );

    // assertEquals with scalar string expected.
    let code =
        b"<?php\nclass T { public function t() { $this->assertEquals('hello', unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "assertEquals(literal string, unserialize($x)) should be suppressed"
    );

    // Static dispatch: static::assertSame(...).
    let code =
        b"<?php\nclass T { public function t() { static::assertSame(['x'], unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "static::assertSame should be suppressed"
    );

    // Self dispatch: self::assertEquals(...).
    let code =
        b"<?php\nclass T { public function t() { self::assertEquals(['y'], unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "self::assertEquals should be suppressed"
    );

    // Single-arg verb: assertNull(unserialize($x)).  The verb itself
    // bounds the result.
    let code = b"<?php\nclass T { public function t() { $this->assertNull(unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "assertNull(unserialize($x)) should be suppressed (verb bounds the result)"
    );

    // Single-arg verb: assertIsArray(unserialize($x)).
    let code =
        b"<?php\nclass T { public function t() { $this->assertIsArray(unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "assertIsArray(unserialize($x)) should be suppressed"
    );

    // Case-insensitive method name (PHP semantics).
    let code =
        b"<?php\nclass T { public function t() { $this->AssertSame(['z'], unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_unserialize_inside_phpunit_assertion(cap, code),
        "method name should match case-insensitively"
    );

    // Free function `unserialize` outside any assertion: keep firing.
    let code = b"<?php\n$x = unserialize($_GET['blob']);\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_inside_phpunit_assertion(cap, code),
        "unserialize outside any assertion should NOT be suppressed"
    );

    // assertEquals with a NON-literal first arg ($computed) keeps firing —
    // the result is not statically pinned.
    let code =
        b"<?php\nclass T { public function t($e) { $this->assertEquals($e, unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_inside_phpunit_assertion(cap, code),
        "assertEquals($computed, unserialize($x)) should NOT be suppressed"
    );

    // Single-arg unrecognised assertion verb keeps firing.
    let code = b"<?php\nclass T { public function t() { $this->assertSomethingCustom(unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_inside_phpunit_assertion(cap, code),
        "1-arg unknown assertion verb should NOT be suppressed"
    );

    // Wrapping in another expression (binary, ternary) breaks the
    // bound — unserialize is no longer the direct argument.  Conservative.
    let code = b"<?php\nclass T { public function t() { $this->assertSame(['x'], unserialize($b) ?: []); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_inside_phpunit_assertion(cap, code),
        "wrapped (ternary) unserialize argument should NOT be suppressed"
    );

    // Method call whose name does NOT start with `assert` keeps firing.
    let code = b"<?php\nclass T { public function t() { $this->log(['x'], unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_inside_phpunit_assertion(cap, code),
        "non-assert method should NOT be suppressed"
    );

    // First arg is a literal but it's a single-arg call (no actual) — defensive.
    let code = b"<?php\nclass T { public function t() { $this->assertSame(unserialize($b)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_unserialize_inside_phpunit_assertion(cap, code),
        "single-arg `assertSame(unserialize($x))` should NOT be suppressed (no expected)"
    );
}

#[test]
fn python_deser_inside_unittest_assertion_recognises_roundtrip_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    parser.set_language(&lang).unwrap();
    // Pickle pattern equivalent: capture the `pickle` identifier under
    // the deser call's `function.object` path.
    let q = r#"(call function: (attribute object: (identifier) @pkg (#eq? @pkg "pickle") attribute: (identifier) @fn (#match? @fn "^loads?$"))) @vuln"#;

    // Canonical assertEqual with dict literal expected.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertEqual({'a': 1}, pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertEqual(dict literal, pickle.loads(b)) should be suppressed"
    );

    // assertEquals with list literal expected.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertEquals([1, 2, 3], pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertEquals(list literal, pickle.loads(b)) should be suppressed"
    );

    // pytest-style ordering: deser first, literal second.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertEqual(pickle.loads(b), {'k': 'v'})\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertEqual(pickle.loads(b), dict literal) should be suppressed"
    );

    // Unary negative literal.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertEqual(-7, pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertEqual(unary-negative literal, pickle.loads(b)) should be suppressed"
    );

    // Single-arg verb: assertIsNone.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertIsNone(pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertIsNone(pickle.loads(b)) should be suppressed (verb bounds)"
    );

    // Single-arg verb: assertTrue.
    let code =
        b"import pickle\nclass T:\n    def t(self, b):\n        self.assertTrue(pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertTrue(pickle.loads(b)) should be suppressed (verb bounds)"
    );

    // assertIsInstance(value, type).
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertIsInstance(pickle.loads(b), dict)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assertIsInstance(pickle.loads(b), dict) should be suppressed (type bounds)"
    );

    // msg=... kwarg: keep firing? actually no, msg is just informational; bound is satisfied.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertEqual([1], pickle.loads(b), msg='preserve')\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "msg= kwarg should not break the literal-positional bound"
    );

    // Free function shape (`from pickle import loads`) covered via leaf-
    // name match.  Use a different query that captures the identifier
    // call shape.
    let code_ff = b"from pickle import loads\nclass T:\n    def t(self, b):\n        self.assertEqual([1], loads(b))\n";
    let tree = parser.parse(code_ff, None).unwrap();
    // For free-function calls, use a query matching the bare identifier callee.
    let q2 = r#"(call function: (identifier) @fn (#match? @fn "^loads?$")) @vuln"#;
    let cap = first_python_capture(&tree, code_ff, q2);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code_ff),
        "assertEqual(literal, loads(b)) for `from pickle import loads` should be suppressed"
    );

    // Production call (no assertion wrap) keeps firing.
    let code = b"import pickle\ndef handler(blob):\n    return pickle.loads(blob)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "production pickle.loads should NOT be suppressed"
    );

    // Non-literal expected ($computed) keeps firing.
    let code = b"import pickle\nclass T:\n    def t(self, b, expected):\n        self.assertEqual(expected, pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "assertEqual(non-literal, pickle.loads(b)) should NOT be suppressed"
    );

    // Non-assert verb keeps firing.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.checkEqual([1], pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "checkEqual (non-assert verb) should NOT be suppressed"
    );

    // Wrapped in ternary: bound is broken.
    let code = b"import pickle\nclass T:\n    def t(self, b, c):\n        self.assertEqual([1], pickle.loads(b) if c else [])\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "ternary wrapping pickle.loads should NOT be suppressed"
    );

    // assertCustom (unrecognised single-arg verb) keeps firing.
    let code = b"import pickle\nclass T:\n    def t(self, b):\n        self.assertCustomCheck(pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "assertCustomCheck single-arg should NOT be suppressed (verb not in bounding set)"
    );

    // assertEqual where both args are non-literal keeps firing.
    let code = b"import pickle\nclass T:\n    def t(self, b, e):\n        self.assertEqual(e, pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "two non-literal positional args should NOT be suppressed"
    );

    // f-string expected (interpolation) keeps firing.
    let code = b"import pickle\nclass T:\n    def t(self, b, x):\n        self.assertEqual(f'pre-{x}', pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "f-string expected (interpolation) should NOT be suppressed"
    );
}

/// Pytest plain-`assert` round-trip recogniser invariants.  Same
/// entry point as the unittest test above (the function handles both
/// idioms) but the asserted shape sits under an `assert_statement`
/// instead of a `unittest.TestCase` method call.
#[test]
fn python_deser_inside_pytest_assert_recognises_roundtrip_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    parser.set_language(&lang).unwrap();
    let q = r#"(call function: (attribute object: (identifier) @pkg (#eq? @pkg "pickle") attribute: (identifier) @fn (#match? @fn "^loads?$"))) @vuln"#;

    // assert deser == LITERAL
    let code = b"import pickle\ndef t(b):\n    assert pickle.loads(b) == [1, 2, 3]\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser == [literal] should be suppressed"
    );

    // assert deser is None
    let code = b"import pickle\ndef t(b):\n    assert pickle.loads(b) is None\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser is None should be suppressed"
    );

    // assert deser in [LITERAL, ...]
    let code = b"import pickle\ndef t(b):\n    assert pickle.loads(b) in [1, 2, 3]\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser in [literal] should be suppressed"
    );

    // assert deser  (truthy bare)
    let code = b"import pickle\ndef t(b):\n    assert pickle.loads(b)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser (truthy bare) should be suppressed"
    );

    // assert not deser
    let code = b"import pickle\ndef t(b):\n    assert not pickle.loads(b)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert not deser should be suppressed"
    );

    // assert isinstance(deser, dict)
    let code = b"import pickle\ndef t(b):\n    assert isinstance(pickle.loads(b), dict)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert isinstance(deser, dict) should be suppressed"
    );

    // assert (deser == LITERAL) — paren wrap.
    let code = b"import pickle\ndef t(b):\n    assert (pickle.loads(b) == [1])\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert (deser == literal) with paren wrap should be suppressed"
    );

    // assert deser == LITERAL, "msg"
    let code = b"import pickle\ndef t(b):\n    assert pickle.loads(b) == 1, 'round trip'\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser == literal, msg should be suppressed (msg is named_child(1))"
    );

    // assert bool(deser)
    let code = b"import pickle\ndef t(b):\n    assert bool(pickle.loads(b))\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert bool(deser) should be suppressed"
    );

    // assert len(deser) == 3
    let code = b"import pickle\ndef t(b):\n    assert len(pickle.loads(b)) == 3\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert len(deser) == int_literal should be suppressed"
    );

    // Negatives ----------------------------------------------------------

    // assert deser and X — boolean op short-circuits, can run side effect.
    let code = b"import pickle\ndef t(b, x):\n    assert pickle.loads(b) and x\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser and X (boolean op) should NOT be suppressed"
    );

    // assert deser if cond else X — conditional short-circuits.
    let code = b"import pickle\ndef t(b, c):\n    assert (pickle.loads(b) if c else 0)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "assert (deser if c else x) should NOT be suppressed"
    );

    // assert wrapper(deser) == LITERAL — arbitrary user fn breaks bound.
    let code = b"import pickle\ndef t(b):\n    assert wrapper(pickle.loads(b)) == [1]\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "assert wrapper(deser) == literal should NOT be suppressed"
    );

    // assert deser == non-literal — bound depends on dynamic var.
    let code = b"import pickle\ndef t(b, e):\n    assert pickle.loads(b) == e\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "assert deser == non_literal should NOT be suppressed"
    );

    // assert isinstance(deser, type_var) where type is dynamic.
    let code = b"import pickle\ndef t(b):\n    t = some_type_factory()\n    assert isinstance(pickle.loads(b), t)\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    // `t` is an `identifier` and `is_python_type_reference` accepts
    // identifier (assertIsInstance treats user-class identifiers as
    // type references), so this case stays suppressed.  Pinned to
    // document the matching behaviour rather than tighten it.
    assert!(
        is_python_deser_inside_unittest_assertion(cap, code),
        "assert isinstance(deser, identifier) treats identifier as type ref"
    );

    // Production assignment-then-assert: deser sits in `actual = pickle.loads(b)`,
    // not under the assert.  Must keep firing.
    let code =
        b"import pickle\ndef t(b):\n    actual = pickle.loads(b)\n    assert actual == [1]\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_python_capture(&tree, code, q);
    assert!(
        !is_python_deser_inside_unittest_assertion(cap, code),
        "deser bound to a name then asserted should NOT be suppressed (assignment context)"
    );
}

/// Ruby Layer C5 invariants.  The recogniser must accept Minitest
/// `assert_*`/`refute_*` shapes, RSpec `expect(_).to MATCHER` shapes,
/// and reject production calls / dynamic-expected / unrelated wrappers.
#[test]
fn ruby_deser_inside_test_assertion_recognises_roundtrip_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_ruby::LANGUAGE);
    parser.set_language(&lang).unwrap();
    // Capture the `Marshal` constant under the deser call's `receiver` field.
    let q = r#"(call receiver: (constant) @recv (#eq? @recv "Marshal") method: (identifier) @m (#eq? @m "load")) @vuln"#;

    // Minitest assert_equal LITERAL, deser
    let code = b"class T\n  def t(b)\n    assert_equal [1, 2, 3], Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "assert_equal [literal], Marshal.load(b) should be suppressed"
    );

    // Minitest assert_nil
    let code = b"class T\n  def t(b)\n    assert_nil Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "assert_nil Marshal.load(b) should be suppressed"
    );

    // Minitest single-arg truthy assert
    let code = b"class T\n  def t(b)\n    assert Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "assert Marshal.load(b) (truthy) should be suppressed"
    );

    // Minitest assert_kind_of TYPE, deser
    let code = b"class T\n  def t(b)\n    assert_kind_of Array, Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "assert_kind_of TYPE, deser should be suppressed"
    );

    // Minitest refute_equal
    let code = b"class T\n  def t(b)\n    refute_equal [9, 9], Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "refute_equal [literal], deser should be suppressed"
    );

    // RSpec expect(deser).to eq(LITERAL)
    let code =
        b"describe X do\n  it 'x' do\n    expect(Marshal.load(b)).to eq([1, 2, 3])\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "expect(deser).to eq([literal]) should be suppressed"
    );

    // RSpec expect(deser).to be_nil
    let code = b"describe X do\n  it 'x' do\n    expect(Marshal.load(b)).to be_nil\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "expect(deser).to be_nil should be suppressed"
    );

    // RSpec expect(deser).to be_a(TYPE)
    let code =
        b"describe X do\n  it 'x' do\n    expect(Marshal.load(b)).to be_a(Array)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "expect(deser).to be_a(TYPE) should be suppressed"
    );

    // RSpec not_to be_nil
    let code =
        b"describe X do\n  it 'x' do\n    expect(Marshal.load(b)).not_to be_nil\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        is_ruby_deser_inside_test_assertion(cap, code),
        "expect(deser).not_to be_nil should be suppressed"
    );

    // Negatives ----------------------------------------------------------

    // Production call (no assertion) keeps firing.
    let code = b"def handler(blob)\n  Marshal.load(blob)\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        !is_ruby_deser_inside_test_assertion(cap, code),
        "production Marshal.load should NOT be suppressed"
    );

    // assert_equal with dynamic expected keeps firing.
    let code =
        b"class T\n  def t(b, expected)\n    assert_equal expected, Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        !is_ruby_deser_inside_test_assertion(cap, code),
        "assert_equal non_literal, deser should NOT be suppressed"
    );

    // RSpec expect(deser).to eq(dynamic) keeps firing.
    let code =
        b"describe X do\n  it 'x' do\n    expect(Marshal.load(b)).to eq(expected)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        !is_ruby_deser_inside_test_assertion(cap, code),
        "expect(deser).to eq(non_literal) should NOT be suppressed"
    );

    // Custom unrecognised verb (not in the bounding sets) keeps firing.
    let code = b"class T\n  def t(b)\n    custom_check Marshal.load(b)\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        !is_ruby_deser_inside_test_assertion(cap, code),
        "non-assertion-verb wrap should NOT be suppressed"
    );

    // RSpec .should == LIT (old-style, parses as `binary`, not the
    // expected receiver-method-arguments shape) keeps firing.
    let code = b"describe X do\n  it 'x' do\n    Marshal.load(b).should == [1]\n  end\nend\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_ruby_capture(&tree, code, q);
    assert!(
        !is_ruby_deser_inside_test_assertion(cap, code),
        "old-style .should == LIT should NOT be suppressed"
    );
}

#[test]
fn php_weak_hash_non_crypto_use_recognises_canonical_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_php::LANGUAGE_PHP);
    parser.set_language(&lang).unwrap();
    let q = r#"(function_call_expression function: (name) @n (#match? @n "^(md5|sha1)$")) @vuln"#;

    // ETag concat returned from getETag() — return-statement enclosing
    // method name path.
    let code = b"<?php\nclass C { public function getETag(): string { return '\"' . md5($this->data) . '\"'; } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "getETag concat should be suppressed"
    );

    // Array element value with a string-literal key whose name is non-crypto.
    let code = b"<?php\nfunction f($x) { return ['table_name_hash' => md5($x)]; }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "array element with `*_hash` key should be suppressed"
    );

    // Subscript LHS with a string-literal index `'etag'`.
    let code = b"<?php\nfunction f($x, &$row) { $row['etag'] = md5($x); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "subscript LHS with 'etag' key should be suppressed"
    );

    // Member-access LHS named `storageId` (camelCase boundary on `Id` suffix).
    let code = b"<?php\nclass C { function f() { $this->storageId = md5($this->id); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "member-access LHS `storageId` should be suppressed"
    );

    // Null-coalescing assignment with subscript LHS.
    let code = b"<?php\nfunction f($t, &$tables) { $tables[$t]['hash'] ??= md5($t); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "??= subscript LHS with 'hash' key should be suppressed"
    );

    // Call result used as an array index.
    let code = b"<?php\nfunction f($a, $x) { return $a[md5($x)]; }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "md5 used as subscript index should be suppressed"
    );

    // Cache-style lookup verb (`$cache->get(sha1(...))`).
    let code = b"<?php\nclass C { public $cache; function f($u) { return $this->cache->get(sha1($u)); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "method call to lookup-verb `get(sha1(..))` should be suppressed"
    );

    // Createnamedparameter wrapper around md5 inside an array element value.
    let code = b"<?php\nclass C { public $q; function f($d) { $this->q->insert('t')->values(['etag' => $this->q->createNamedParameter(md5($d))]); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "wrapper-call inside array element with `etag` key should be suppressed"
    );

    // Dynamic-index subscript LHS with a non-crypto receiver name.
    let code = b"<?php\nfunction f($cols) { $columnNamesHashes = []; foreach ($cols as $c) { $columnNamesHashes[$c] = md5($c); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        is_php_weak_hash_non_crypto_use(cap, code),
        "subscript LHS with dynamic index — receiver name `*Hashes` should drive suppression"
    );

    // Crypto consumer — keep firing.  $this->password = md5($pwd).
    let code =
        b"<?php\nclass C { public $password; function f($p) { $this->password = md5($p); } }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_weak_hash_non_crypto_use(cap, code),
        "$this->password = md5(...) is crypto storage and must NOT be suppressed"
    );

    // Compound name with crypto-keyword substring.  $tokenHash = md5(...).
    let code = b"<?php\nfunction f($x) { $tokenHash = md5($x); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_weak_hash_non_crypto_use(cap, code),
        "$tokenHash compound name must NOT be suppressed (contains 'token')"
    );

    // pw_hash compound — must NOT be suppressed.
    let code = b"<?php\nfunction f($p) { $pw_hash = md5($p); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_weak_hash_non_crypto_use(cap, code),
        "$pw_hash compound name must NOT be suppressed"
    );

    // Bare statement / unrecognised consumer — keep firing.
    let code = b"<?php\nfunction f($x) { var_dump(md5($x)); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_php_capture(&tree, code, q);
    assert!(
        !is_php_weak_hash_non_crypto_use(cap, code),
        "var_dump(md5(...)) has no recognisable consumer name and must NOT be suppressed"
    );
}

#[test]
fn name_is_non_crypto_recognises_word_boundary_suffixes() {
    // Whole-word and underscore boundaries.
    assert!(name_is_non_crypto("hash"));
    assert!(name_is_non_crypto("etag"));
    assert!(name_is_non_crypto("table_name_hash"));
    assert!(name_is_non_crypto("table_id"));
    assert!(name_is_non_crypto("cache_key"));

    // CamelCase boundaries.
    assert!(name_is_non_crypto("storageId"));
    assert!(name_is_non_crypto("tableHash"));
    assert!(name_is_non_crypto("sqlMd5"));
    assert!(name_is_non_crypto("cacheBuster"));

    // Long stand-alone suffix (≥4) without word boundary.
    assert!(name_is_non_crypto("columnnameshashes"));
    assert!(name_is_non_crypto("tablefingerprint"));

    // Non-letter previous char — digit.
    assert!(name_is_non_crypto("v1id"));

    // Keep firing on crypto-keyword compound names.
    assert!(!name_is_non_crypto("password_hash"));
    assert!(!name_is_non_crypto("hashedPassword"));
    assert!(!name_is_non_crypto("tokenHash"));
    assert!(!name_is_non_crypto("signatureHash"));
    assert!(!name_is_non_crypto("pw_hash"));
    assert!(!name_is_non_crypto("digest"));
    assert!(!name_is_non_crypto("hmac"));
    assert!(!name_is_non_crypto("salt"));
    assert!(!name_is_non_crypto("private_key"));

    // Bare `key`/`keys` and `apiKey` shapes are crypto-credential
    // candidates and must keep firing; specific safe forms like
    // `cache_key`/`cachekey` are still suppressed via their own
    // entries in `SAFE_SUFFIXES`.
    assert!(!name_is_non_crypto("key"));
    assert!(!name_is_non_crypto("keys"));
    assert!(!name_is_non_crypto("apiKey"));
    assert!(!name_is_non_crypto("api_key"));
    assert!(!name_is_non_crypto("apiKeyHash"));
    assert!(!name_is_non_crypto("api_key_hash"));
    assert!(name_is_non_crypto("cache_key"));
    assert!(name_is_non_crypto("cachekey"));

    // Words that LOOK like an `id` suffix but lack a word boundary —
    // do NOT classify (no boundary, length-2 suffix).
    assert!(!name_is_non_crypto("said"));
    assert!(!name_is_non_crypto("void"));
    assert!(!name_is_non_crypto("rapid"));

    // Unrecognised generic names.
    assert!(!name_is_non_crypto("x"));
    assert!(!name_is_non_crypto("result"));
    assert!(!name_is_non_crypto("output"));
    assert!(!name_is_non_crypto(""));

    // Non-ASCII before a short suffix should NOT be treated as a word
    // boundary (no false-positive classification on identifiers like
    // `tëhash` whose previous char is a Unicode letter, not punctuation).
    assert!(!name_is_non_crypto("tëid"));
    // Non-ASCII before a long (≥4) suffix still classifies via the
    // length fallback, matching the `columnnameshashes` shape.
    assert!(name_is_non_crypto("tëhash"));
    // Non-ASCII before a real underscore-prefixed suffix continues to
    // classify via the underscore boundary.
    assert!(name_is_non_crypto("tablë_id"));
}

#[test]
fn method_is_lookup_verb_recognises_cache_verbs() {
    // Direct verb match.
    assert!(method_is_lookup_verb("get"));
    assert!(method_is_lookup_verb("set"));
    assert!(method_is_lookup_verb("has"));
    assert!(method_is_lookup_verb("delete"));
    assert!(method_is_lookup_verb("fetch"));
    assert!(method_is_lookup_verb("getItem"));
    assert!(method_is_lookup_verb("setItem"));

    // Composite forms — verb prefix + non-crypto suffix.
    assert!(method_is_lookup_verb("getCacheKey"));
    assert!(method_is_lookup_verb("setCacheKey"));
    assert!(method_is_lookup_verb("buildKey"));
    assert!(method_is_lookup_verb("createId"));
    assert!(method_is_lookup_verb("hasFingerprint"));

    // Crypto-comparison helpers — keep firing.
    assert!(!method_is_lookup_verb("hash_equals"));
    assert!(!method_is_lookup_verb("verify"));
    assert!(!method_is_lookup_verb("password_verify"));
    assert!(!method_is_lookup_verb("decrypt"));
    assert!(!method_is_lookup_verb("encrypt"));
    assert!(!method_is_lookup_verb("sign"));
    assert!(!method_is_lookup_verb("invoke"));
    assert!(!method_is_lookup_verb("doSomething"));
}

#[test]
fn sprintf_format_safety_classifier() {
    // Numeric / char / pointer specifiers, bounded by definition.
    assert!(sprintf_format_is_safe(""));
    assert!(sprintf_format_is_safe("hello world"));
    assert!(sprintf_format_is_safe("%d"));
    assert!(sprintf_format_is_safe("%lld%c"));
    assert!(sprintf_format_is_safe("fixed=%d/%c"));
    assert!(sprintf_format_is_safe("%5d %x %llo"));
    assert!(sprintf_format_is_safe("%%literal-percent"));
    assert!(sprintf_format_is_safe("%p"));
    // Precision-bounded `%s` / `%.*s`, output capped at precision.
    assert!(sprintf_format_is_safe(" %.*s"));
    assert!(sprintf_format_is_safe("%.5s"));
    assert!(sprintf_format_is_safe("[%-.10s]"));
    // Bare `%s` / width-only `%5s`, width is a *minimum*, length is
    // unbounded.  Must NOT be suppressed.
    assert!(!sprintf_format_is_safe("%s"));
    assert!(!sprintf_format_is_safe("hello %s world"));
    assert!(!sprintf_format_is_safe("%5s"));
    assert!(!sprintf_format_is_safe("[%-20s]"));
    // Unknown / non-standard conversions → conservative refuse.
    assert!(!sprintf_format_is_safe("%S"));
    assert!(!sprintf_format_is_safe("%"));
    assert!(!sprintf_format_is_safe("%lZ"));
}

#[cfg(test)]
fn first_c_capture<'tree>(
    tree: &'tree tree_sitter::Tree,
    code: &[u8],
    query_str: &str,
) -> tree_sitter::Node<'tree> {
    use tree_sitter::StreamingIterator;
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
    let mut cursor = tree_sitter::QueryCursor::new();
    let mut matches = cursor.matches(&query, tree.root_node(), code);
    let m = matches.next().expect("query should match");
    m.captures
        .iter()
        .find(|c| c.index == 0)
        .expect("capture index 0")
        .node
}

#[cfg(test)]
fn first_cpp_capture<'tree>(
    tree: &'tree tree_sitter::Tree,
    code: &[u8],
    query_str: &str,
) -> tree_sitter::Node<'tree> {
    use tree_sitter::StreamingIterator;
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
    let mut cursor = tree_sitter::QueryCursor::new();
    let mut matches = cursor.matches(&query, tree.root_node(), code);
    let m = matches.next().expect("query should match");
    m.captures
        .iter()
        .find(|c| c.index == 0)
        .expect("capture index 0")
        .node
}

#[test]
fn cpp_cast_target_type_is_safe_recognises_canonical_shapes() {
    use crate::ast::cpp_cast_target_type_is_safe as f;
    // Byte-pointer family — C++ explicitly permits byte-level access.
    assert!(f("char*"));
    assert!(f("char *"));
    assert!(f("const char*"));
    assert!(f("const char *"));
    assert!(f("unsigned char*"));
    assert!(f("const unsigned char*"));
    assert!(f("signed char*"));
    assert!(f("uint8_t*"));
    assert!(f("const uint8_t*"));
    assert!(f("int8_t*"));
    assert!(f("std::byte*"));
    assert!(f("const std::byte*"));
    assert!(f("byte*"));
    assert!(f("wchar_t*"));
    // void* — well-defined target.
    assert!(f("void*"));
    assert!(f("const void*"));
    // Integer round-trip — value cast only (depth 0).  Aliasing
    // *through* a `uintptr_t*` / `intptr_t*` is NOT covered by the
    // standard exemption — only the pointer<->integer value
    // conversion is well-defined.
    assert!(f("uintptr_t"));
    assert!(f("std::uintptr_t"));
    assert!(f("intptr_t"));
    assert!(f("std::intptr_t"));
    // BSD socket family — POSIX intentionally type-puns these.
    assert!(f("sockaddr*"));
    assert!(f("struct sockaddr*"));
    assert!(f("sockaddr_in*"));
    assert!(f("sockaddr_in6*"));
    assert!(f("sockaddr_un*"));
    assert!(f("sockaddr_storage*"));

    // Multi-token / extra whitespace — normaliser should collapse it.
    assert!(f("const   uint8_t *"));
    assert!(f("uint8_t  * const"));
    assert!(f("const  unsigned   char *"));

    // Pointer-to-pointer is NOT covered by the [basic.lval]/11
    // aliasing exemption — accessing a `char*` object through a
    // `char**` is a strict-aliasing violation.  Same for `void**`,
    // `uint8_t**`, etc.
    assert!(!f("char**"));
    assert!(!f("uint8_t**"));
    assert!(!f("void**"));
    assert!(!f("void **"));
    // Pointer-to-integer-roundtrip-type (`uintptr_t*`, `intptr_t*`)
    // is also not safe: only the pointer<->integer **value** cast is
    // well-defined, not aliasing through a pointer-to-uintptr_t.
    assert!(!f("uintptr_t*"));
    assert!(!f("intptr_t*"));
    assert!(!f("std::uintptr_t*"));

    // Non-safe shapes — must NOT be suppressed.
    assert!(!f("MyStruct*"));
    assert!(!f("InstanceType*"));
    assert!(!f("DBImpl*"));
    assert!(!f("C*"));
    assert!(!f("CPP*"));
    assert!(!f("T*"));
    assert!(!f("secp256k1_keypair*"));
    assert!(!f("PIP_ADAPTER_ADDRESSES"));
    assert!(!f("std::vector<int>*"));
    assert!(!f("void(*)(int)"));
    assert!(!f("char[10]"));
    // Bare integer (no pointer) is only safe for the round-trip
    // types — `int`, `size_t`, `uint64_t` should NOT match.
    assert!(!f("int"));
    assert!(!f("size_t"));
    assert!(!f("uint64_t"));
    assert!(!f("char")); // bare char without pointer
    assert!(!f("uint8_t")); // bare uint8_t without pointer
}

#[test]
fn cpp_reinterpret_cast_layer_e_recognises_byte_pointer_targets() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_cpp::LANGUAGE);
    parser.set_language(&lang).unwrap();
    let q = r#"(call_expression
                 function: (template_function
                   name: (identifier) @n (#eq? @n "reinterpret_cast")))
               @vuln"#;

    // reinterpret_cast<uint8_t*>(p) — the leveldb / serialization shape.
    let code = b"void f(int* p) { auto q = reinterpret_cast<uint8_t*>(p); (void)q; }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_cpp_capture(&tree, code, q);
    assert!(
        is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
        "reinterpret_cast<uint8_t*> must be suppressed (byte-pointer target)"
    );

    // reinterpret_cast<const std::byte*>(p) — qualified scoped name.
    let code = b"#include <cstddef>\nvoid f(int* p) { auto q = reinterpret_cast<const std::byte*>(p); (void)q; }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_cpp_capture(&tree, code, q);
    assert!(
        is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
        "reinterpret_cast<const std::byte*> must be suppressed"
    );

    // reinterpret_cast<void*>(0x08000000) — synthetic-address shape.
    let code = b"void* f() { return reinterpret_cast<void*>(0x08000000); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_cpp_capture(&tree, code, q);
    assert!(
        is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
        "reinterpret_cast<void*> must be suppressed (synthetic address)"
    );

    // reinterpret_cast<uintptr_t>(p) — integer round-trip.
    let code =
        b"#include <cstdint>\nuintptr_t f(int* p) { return reinterpret_cast<uintptr_t>(p); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_cpp_capture(&tree, code, q);
    assert!(
        is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
        "reinterpret_cast<uintptr_t> must be suppressed (integer round-trip)"
    );

    // reinterpret_cast<sockaddr*>(&addr) — POSIX socket-API shape.
    let code = b"struct sockaddr_in { int x; };\nstruct sockaddr;\nvoid f(struct sockaddr_in* a) { auto* s = reinterpret_cast<sockaddr*>(a); (void)s; }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_cpp_capture(&tree, code, q);
    assert!(
        is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
        "reinterpret_cast<sockaddr*> must be suppressed (BSD socket pun)"
    );

    // reinterpret_cast<MyStruct*>(buf) — strict-aliasing UB risk, must NOT
    // be suppressed.
    let code = b"struct MyStruct { int a; };\nMyStruct* f(char* buf) { return reinterpret_cast<MyStruct*>(buf); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_cpp_capture(&tree, code, q);
    assert!(
        !is_cpp_cast_target_type_safe("cpp.memory.reinterpret_cast", cap, code),
        "reinterpret_cast<MyStruct*> must NOT be suppressed (genuine strict-aliasing risk)"
    );

    // Other rule ids are unaffected.
    assert!(
        !is_cpp_cast_target_type_safe("cpp.memory.const_cast", cap, code),
        "Layer E must only fire for cpp.memory.reinterpret_cast"
    );
}

#[test]
fn c_buffer_call_literal_safe_recognises_canonical_shapes() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_c::LANGUAGE);
    parser.set_language(&lang).unwrap();

    let q_strcpy = r#"(call_expression function: (identifier) @id (#eq? @id "strcpy")) @vuln"#;
    let q_strcat = r#"(call_expression function: (identifier) @id (#eq? @id "strcat")) @vuln"#;
    let q_sprintf = r#"(call_expression function: (identifier) @id (#eq? @id "sprintf")) @vuln"#;

    // strcpy(dst, "literal"), postgres autoprewarm shape.
    let code = b"void f(char *d) { strcpy(d, \"pg_prewarm\"); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcpy);
    assert!(
        is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
        "strcpy with string-literal source must be suppressed"
    );

    // strcpy(dst, cond ? "a" : "b"), string-literal ternary.
    let code = b"void f(char *s, int h) { strcpy(s, (h >= 12) ? \"p.m.\" : \"a.m.\"); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcpy);
    assert!(
        is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
        "strcpy with ternary-of-literals source must be suppressed"
    );

    // strcpy(dst, cond ? P_M_STR : A_M_STR), postgres formatting.c
    // shape with #define'd ALL_CAPS string-constant macros.
    let code = b"#define P_M_STR \"p.m.\"\n#define A_M_STR \"a.m.\"\nvoid f(char *s, int h) { strcpy(s, (h >= 12) ? P_M_STR : A_M_STR); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcpy);
    assert!(
        is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
        "strcpy with ternary-of-ALL_CAPS-macros must be suppressed"
    );

    // strcpy(dst, cond ? var_a : var_b), lowercase variables, NOT a
    // recognisable preprocessor macro shape.  Must NOT suppress.
    let code = b"void f(char *s, int h, char *a, char *b) { strcpy(s, (h >= 12) ? a : b); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcpy);
    assert!(
        !is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
        "strcpy with ternary-of-lowercase-vars must NOT be suppressed"
    );

    // strcat(dst, "literal"), same principle as strcpy.
    let code = b"void f(char *d) { strcat(d, \" (done)\"); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcat);
    assert!(
        is_c_buffer_call_literal_safe("c.memory.strcat", cap, code),
        "strcat with string-literal source must be suppressed"
    );

    // sprintf(dst, "%lld%c", ...), numeric format string.
    let code = b"void f(char *cp, long long v, char u) { sprintf(cp, \"%lld%c\", v, u); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_sprintf);
    assert!(
        is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
        "sprintf with numeric-only format must be suppressed"
    );

    // sprintf(str, " %.*s", N, x), precision-bounded `%s`.
    let code = b"void f(char *str, int n, const char *x) { sprintf(str, \" %.*s\", n, x); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_sprintf);
    assert!(
        is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
        "sprintf with precision-bounded `%.*s` must be suppressed"
    );

    // strcpy(dst, src) where src is a non-literal, must NOT suppress.
    let code = b"void f(char *d, char **a) { strcpy(d, a[1]); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcpy);
    assert!(
        !is_c_buffer_call_literal_safe("c.memory.strcpy", cap, code),
        "strcpy with non-literal source must NOT be suppressed"
    );

    // sprintf with bare `%s`, must NOT suppress.
    let code = b"void f(char *b, const char *u) { sprintf(b, \"%s\", u); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_sprintf);
    assert!(
        !is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
        "sprintf with bare `%%s` must NOT be suppressed"
    );

    // sprintf with non-literal format (concatenated_string with PRI* macro)
    //, must NOT suppress (engine cannot statically expand the macro).
    let code = b"void f(char *b, long long v) { sprintf(b, \"%\" PRId64, v); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_sprintf);
    assert!(
        !is_c_buffer_call_literal_safe("c.memory.sprintf", cap, code),
        "sprintf with concatenated_string format must NOT be suppressed"
    );

    // Other rule ids should not be affected.
    let code = b"void f(char *d) { strcpy(d, \"x\"); }\n";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_c_capture(&tree, code, q_strcpy);
    assert!(
        !is_c_buffer_call_literal_safe("c.memory.gets", cap, code),
        "Layer D should only fire for buffer-overflow rule ids"
    );
}

/// Regression: `is_literal_node` must NOT classify a Python f-string
/// (a `string` node containing `interpolation` children) as literal.
/// Layer A's "all-args-literal → suppress Security finding" shortcut
/// otherwise hides every CVE that injects via `cursor.execute(f"…{x}…")`
/// or `text(f"…{x}…")`.  Motivated by CVE-2025-69662 (geopandas SQLi
/// via `text(f"SELECT … '{geom_name}' …")`) and CVE-2025-24793
/// (snowflake-connector-python f-string-built CREATE STAGE / DROP).
#[test]
fn is_literal_node_rejects_python_fstring_with_interpolation() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_python::LANGUAGE);
    parser.set_language(&lang).unwrap();

    // f-string with one interpolation segment, must be non-literal.
    let code = b"x = f\"SELECT * WHERE y = '{u}'\"\n";
    let tree = parser.parse(code, None).unwrap();
    let assignment = tree
        .root_node()
        .child(0)
        .and_then(|s| s.child(0))
        .expect("assignment node");
    let rhs = assignment
        .child_by_field_name("right")
        .expect("RHS of assignment");
    assert_eq!(rhs.kind(), "string");
    assert!(
        !is_literal_node(rhs, code),
        "f-string with interpolation must not be classified as literal"
    );

    // Plain string literal, must remain literal.
    let code = b"x = \"plain literal\"\n";
    let tree = parser.parse(code, None).unwrap();
    let assignment = tree
        .root_node()
        .child(0)
        .and_then(|s| s.child(0))
        .expect("assignment node");
    let rhs = assignment
        .child_by_field_name("right")
        .expect("RHS of assignment");
    assert_eq!(rhs.kind(), "string");
    assert!(
        is_literal_node(rhs, code),
        "plain string literal must be classified as literal"
    );
}

#[cfg(test)]
fn first_java_capture<'tree>(
    tree: &'tree tree_sitter::Tree,
    code: &[u8],
    query_str: &str,
) -> tree_sitter::Node<'tree> {
    use tree_sitter::StreamingIterator;
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    let query = tree_sitter::Query::new(&lang, query_str).expect("query compiles");
    let mut cursor = tree_sitter::QueryCursor::new();
    let mut matches = cursor.matches(&query, tree.root_node(), code);
    let m = matches.next().expect("query should match");
    m.captures
        .iter()
        .find(|c| c.index == 0)
        .expect("capture index 0")
        .node
}

#[test]
fn is_call_all_args_literal_recognises_java_call_kinds() {
    let mut parser = tree_sitter::Parser::new();
    let lang = tree_sitter::Language::from(tree_sitter_java::LANGUAGE);
    parser.set_language(&lang).unwrap();

    // method_invocation with literal arg, Layer A must suppress.
    let code = b"class T { void f() throws Exception { Class.forName(\"com.foo.Bar\"); } }";
    let tree = parser.parse(code, None).unwrap();
    let q = r#"(method_invocation
                 object: (identifier) @c (#eq? @c "Class")
                 name: (identifier) @id (#eq? @id "forName"))
               @vuln"#;
    let cap = first_java_capture(&tree, code, q);
    assert!(
        is_call_all_args_literal(cap, code, "java"),
        "method_invocation with literal arg must trigger Layer A suppression"
    );

    // method_invocation with class-constant arg, Layer A must suppress
    // via the file-level scalar-binding lookup (session 0014/0015).
    let code = b"class T {\n  private static final String D = \"com.foo.Bar\";\n  void f() throws Exception { Class.forName(D); }\n}";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_java_capture(&tree, code, q);
    assert!(
        is_call_all_args_literal(cap, code, "java"),
        "method_invocation with class-const arg must trigger Layer A suppression"
    );

    // method_invocation with parameter arg, Layer A must NOT suppress.
    let code = b"class T { void f(String s) throws Exception { Class.forName(s); } }";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_java_capture(&tree, code, q);
    assert!(
        !is_call_all_args_literal(cap, code, "java"),
        "method_invocation with non-literal arg must NOT trigger Layer A suppression"
    );

    // object_creation_expression with empty args (`new Yaml()` shape).
    // `has_any_arg` stays false so the gate also returns false: empty
    // arg lists do not satisfy "all args are literal" (arg-less calls
    // can still carry side-effect risk via the constructor itself).
    let code = b"class T { Object f() { return new Object(); } }";
    let tree = parser.parse(code, None).unwrap();
    let q = r#"(object_creation_expression) @vuln"#;
    let cap = first_java_capture(&tree, code, q);
    assert!(
        !is_call_all_args_literal(cap, code, "java"),
        "object_creation_expression with empty args must NOT trigger Layer A"
    );

    // object_creation_expression with literal arg, must suppress.
    let code = b"class T { Object f() { return new String(\"literal\"); } }";
    let tree = parser.parse(code, None).unwrap();
    let cap = first_java_capture(&tree, code, q);
    assert!(
        is_call_all_args_literal(cap, code, "java"),
        "object_creation_expression with literal arg must trigger Layer A"
    );
}